diff --git a/libs/common/_distutils_hack/__init__.py b/libs/common/_distutils_hack/__init__.py new file mode 100644 index 00000000..f987a536 --- /dev/null +++ b/libs/common/_distutils_hack/__init__.py @@ -0,0 +1,222 @@ +# don't import any costly modules +import sys +import os + + +is_pypy = '__pypy__' in sys.builtin_module_names + + +def warn_distutils_present(): + if 'distutils' not in sys.modules: + return + if is_pypy and sys.version_info < (3, 7): + # PyPy for 3.6 unconditionally imports distutils, so bypass the warning + # https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250 + return + import warnings + + warnings.warn( + "Distutils was imported before Setuptools, but importing Setuptools " + "also replaces the `distutils` module in `sys.modules`. This may lead " + "to undesirable behaviors or errors. To avoid these issues, avoid " + "using distutils directly, ensure that setuptools is installed in the " + "traditional way (e.g. not an editable install), and/or make sure " + "that setuptools is always imported before distutils." + ) + + +def clear_distutils(): + if 'distutils' not in sys.modules: + return + import warnings + + warnings.warn("Setuptools is replacing distutils.") + mods = [ + name + for name in sys.modules + if name == "distutils" or name.startswith("distutils.") + ] + for name in mods: + del sys.modules[name] + + +def enabled(): + """ + Allow selection of distutils by environment variable. + """ + which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'local') + return which == 'local' + + +def ensure_local_distutils(): + import importlib + + clear_distutils() + + # With the DistutilsMetaFinder in place, + # perform an import to cause distutils to be + # loaded from setuptools._distutils. Ref #2906. + with shim(): + importlib.import_module('distutils') + + # check that submodules load as expected + core = importlib.import_module('distutils.core') + assert '_distutils' in core.__file__, core.__file__ + assert 'setuptools._distutils.log' not in sys.modules + + +def do_override(): + """ + Ensure that the local copy of distutils is preferred over stdlib. + + See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401 + for more motivation. + """ + if enabled(): + warn_distutils_present() + ensure_local_distutils() + + +class _TrivialRe: + def __init__(self, *patterns): + self._patterns = patterns + + def match(self, string): + return all(pat in string for pat in self._patterns) + + +class DistutilsMetaFinder: + def find_spec(self, fullname, path, target=None): + # optimization: only consider top level modules and those + # found in the CPython test suite. + if path is not None and not fullname.startswith('test.'): + return + + method_name = 'spec_for_{fullname}'.format(**locals()) + method = getattr(self, method_name, lambda: None) + return method() + + def spec_for_distutils(self): + if self.is_cpython(): + return + + import importlib + import importlib.abc + import importlib.util + + try: + mod = importlib.import_module('setuptools._distutils') + except Exception: + # There are a couple of cases where setuptools._distutils + # may not be present: + # - An older Setuptools without a local distutils is + # taking precedence. Ref #2957. + # - Path manipulation during sitecustomize removes + # setuptools from the path but only after the hook + # has been loaded. Ref #2980. + # In either case, fall back to stdlib behavior. + return + + class DistutilsLoader(importlib.abc.Loader): + def create_module(self, spec): + mod.__name__ = 'distutils' + return mod + + def exec_module(self, module): + pass + + return importlib.util.spec_from_loader( + 'distutils', DistutilsLoader(), origin=mod.__file__ + ) + + @staticmethod + def is_cpython(): + """ + Suppress supplying distutils for CPython (build and tests). + Ref #2965 and #3007. + """ + return os.path.isfile('pybuilddir.txt') + + def spec_for_pip(self): + """ + Ensure stdlib distutils when running under pip. + See pypa/pip#8761 for rationale. + """ + if self.pip_imported_during_build(): + return + clear_distutils() + self.spec_for_distutils = lambda: None + + @classmethod + def pip_imported_during_build(cls): + """ + Detect if pip is being imported in a build script. Ref #2355. + """ + import traceback + + return any( + cls.frame_file_is_setup(frame) for frame, line in traceback.walk_stack(None) + ) + + @staticmethod + def frame_file_is_setup(frame): + """ + Return True if the indicated frame suggests a setup.py file. + """ + # some frames may not have __file__ (#2940) + return frame.f_globals.get('__file__', '').endswith('setup.py') + + def spec_for_sensitive_tests(self): + """ + Ensure stdlib distutils when running select tests under CPython. + + python/cpython#91169 + """ + clear_distutils() + self.spec_for_distutils = lambda: None + + sensitive_tests = ( + [ + 'test.test_distutils', + 'test.test_peg_generator', + 'test.test_importlib', + ] + if sys.version_info < (3, 10) + else [ + 'test.test_distutils', + ] + ) + + +for name in DistutilsMetaFinder.sensitive_tests: + setattr( + DistutilsMetaFinder, + f'spec_for_{name}', + DistutilsMetaFinder.spec_for_sensitive_tests, + ) + + +DISTUTILS_FINDER = DistutilsMetaFinder() + + +def add_shim(): + DISTUTILS_FINDER in sys.meta_path or insert_shim() + + +class shim: + def __enter__(self): + insert_shim() + + def __exit__(self, exc, value, tb): + remove_shim() + + +def insert_shim(): + sys.meta_path.insert(0, DISTUTILS_FINDER) + + +def remove_shim(): + try: + sys.meta_path.remove(DISTUTILS_FINDER) + except ValueError: + pass diff --git a/libs/common/_distutils_hack/override.py b/libs/common/_distutils_hack/override.py new file mode 100644 index 00000000..2cc433a4 --- /dev/null +++ b/libs/common/_distutils_hack/override.py @@ -0,0 +1 @@ +__import__('_distutils_hack').do_override() diff --git a/libs/common/bin/beet.exe b/libs/common/bin/beet.exe index a4e9eb61..0d2b2c80 100644 Binary files a/libs/common/bin/beet.exe and b/libs/common/bin/beet.exe differ diff --git a/libs/common/bin/chardetect.exe b/libs/common/bin/chardetect.exe index cbee2d0a..5d6a3d83 100644 Binary files a/libs/common/bin/chardetect.exe and b/libs/common/bin/chardetect.exe differ diff --git a/libs/common/bin/easy_install.exe b/libs/common/bin/easy_install.exe deleted file mode 100644 index ca60fc91..00000000 Binary files a/libs/common/bin/easy_install.exe and /dev/null differ diff --git a/libs/common/bin/guessit.exe b/libs/common/bin/guessit.exe index 3f453ac0..f863b990 100644 Binary files a/libs/common/bin/guessit.exe and b/libs/common/bin/guessit.exe differ diff --git a/libs/common/bin/mid3cp.exe b/libs/common/bin/mid3cp.exe index 82640c70..c5ea133c 100644 Binary files a/libs/common/bin/mid3cp.exe and b/libs/common/bin/mid3cp.exe differ diff --git a/libs/common/bin/mid3iconv.exe b/libs/common/bin/mid3iconv.exe index 95fc1797..130e8932 100644 Binary files a/libs/common/bin/mid3iconv.exe and b/libs/common/bin/mid3iconv.exe differ diff --git a/libs/common/bin/mid3v2.exe b/libs/common/bin/mid3v2.exe index 2c7a099a..a43345f1 100644 Binary files a/libs/common/bin/mid3v2.exe and b/libs/common/bin/mid3v2.exe differ diff --git a/libs/common/bin/moggsplit.exe b/libs/common/bin/moggsplit.exe index 1160cbce..cddf2944 100644 Binary files a/libs/common/bin/moggsplit.exe and b/libs/common/bin/moggsplit.exe differ diff --git a/libs/common/bin/mutagen-inspect.exe b/libs/common/bin/mutagen-inspect.exe index 4f587d6b..ccb0a08b 100644 Binary files a/libs/common/bin/mutagen-inspect.exe and b/libs/common/bin/mutagen-inspect.exe differ diff --git a/libs/common/bin/mutagen-pony.exe b/libs/common/bin/mutagen-pony.exe index eacd1950..ad2a4958 100644 Binary files a/libs/common/bin/mutagen-pony.exe and b/libs/common/bin/mutagen-pony.exe differ diff --git a/libs/common/bin/easy_install-3.7.exe b/libs/common/bin/normalizer.exe similarity index 99% rename from libs/common/bin/easy_install-3.7.exe rename to libs/common/bin/normalizer.exe index ca60fc91..f2c9b9b0 100644 Binary files a/libs/common/bin/easy_install-3.7.exe and b/libs/common/bin/normalizer.exe differ diff --git a/libs/common/bin/pbr.exe b/libs/common/bin/pbr.exe index 3c2f8c28..192f7f4c 100644 Binary files a/libs/common/bin/pbr.exe and b/libs/common/bin/pbr.exe differ diff --git a/libs/common/bin/srt.exe b/libs/common/bin/srt.exe index a7dae560..435116c8 100644 Binary files a/libs/common/bin/srt.exe and b/libs/common/bin/srt.exe differ diff --git a/libs/common/bin/subliminal.exe b/libs/common/bin/subliminal.exe index 114c64d9..1cdf55b7 100644 Binary files a/libs/common/bin/subliminal.exe and b/libs/common/bin/subliminal.exe differ diff --git a/libs/common/bin/unidecode.exe b/libs/common/bin/unidecode.exe index 9b1c0df9..5d44abe7 100644 Binary files a/libs/common/bin/unidecode.exe and b/libs/common/bin/unidecode.exe differ diff --git a/libs/common/certifi/__init__.py b/libs/common/certifi/__init__.py index eebdf888..af4bcc15 100644 --- a/libs/common/certifi/__init__.py +++ b/libs/common/certifi/__init__.py @@ -1,3 +1,4 @@ from .core import contents, where -__version__ = "2021.05.30" +__all__ = ["contents", "where"] +__version__ = "2022.09.24" diff --git a/libs/common/certifi/cacert.pem b/libs/common/certifi/cacert.pem index 96e2fc65..40051551 100644 --- a/libs/common/certifi/cacert.pem +++ b/libs/common/certifi/cacert.pem @@ -28,36 +28,6 @@ DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A== -----END CERTIFICATE----- -# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2 -# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2 -# Label: "GlobalSign Root CA - R2" -# Serial: 4835703278459682885658125 -# MD5 Fingerprint: 94:14:77:7e:3e:5e:fd:8f:30:bd:41:b0:cf:e7:d0:30 -# SHA1 Fingerprint: 75:e0:ab:b6:13:85:12:27:1c:04:f8:5f:dd:de:38:e4:b7:24:2e:fe -# SHA256 Fingerprint: ca:42:dd:41:74:5f:d0:b8:1e:b9:02:36:2c:f9:d8:bf:71:9d:a1:bd:1b:1e:fc:94:6f:5b:4c:99:f4:2c:1b:9e ------BEGIN CERTIFICATE----- -MIIDujCCAqKgAwIBAgILBAAAAAABD4Ym5g0wDQYJKoZIhvcNAQEFBQAwTDEgMB4G -A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp -Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDYxMjE1MDgwMDAwWhcNMjExMjE1 -MDgwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEG -A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI -hvcNAQEBBQADggEPADCCAQoCggEBAKbPJA6+Lm8omUVCxKs+IVSbC9N/hHD6ErPL -v4dfxn+G07IwXNb9rfF73OX4YJYJkhD10FPe+3t+c4isUoh7SqbKSaZeqKeMWhG8 -eoLrvozps6yWJQeXSpkqBy+0Hne/ig+1AnwblrjFuTosvNYSuetZfeLQBoZfXklq -tTleiDTsvHgMCJiEbKjNS7SgfQx5TfC4LcshytVsW33hoCmEofnTlEnLJGKRILzd -C9XZzPnqJworc5HGnRusyMvo4KD0L5CLTfuwNhv2GXqF4G3yYROIXJ/gkwpRl4pa -zq+r1feqCapgvdzZX99yqWATXgAByUr6P6TqBwMhAo6CygPCm48CAwEAAaOBnDCB -mTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUm+IH -V2ccHsBqBt5ZtJot39wZhi4wNgYDVR0fBC8wLTAroCmgJ4YlaHR0cDovL2NybC5n -bG9iYWxzaWduLm5ldC9yb290LXIyLmNybDAfBgNVHSMEGDAWgBSb4gdXZxwewGoG -3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAmYFThxxol4aR7OBKuEQLq4Gs -J0/WwbgcQ3izDJr86iw8bmEbTUsp9Z8FHSbBuOmDAGJFtqkIk7mpM0sYmsL4h4hO -291xNBrBVNpGP+DTKqttVCL1OmLNIG+6KYnX3ZHu01yiPqFbQfXf5WRDLenVOavS -ot+3i9DAgBkcRcAtjOj4LaR0VknFBbVPFd5uRHg5h6h+u/N5GJG79G+dwfCMNYxd -AfvDbbnvRG15RjF+Cv6pgsH/76tuIMRQyV+dTZsXjAzlAcmgQWpzU/qlULRuJQ/7 -TBj0/VLZjmmx6BEP3ojY+x1J96relc8geMJgEtslQIxq/H5COEBkEveegeGTLg== ------END CERTIFICATE----- - # Issuer: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited # Subject: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited # Label: "Entrust.net Premium 2048 Secure Server CA" @@ -491,34 +461,6 @@ vEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep +OkuE6N36B9K -----END CERTIFICATE----- -# Issuer: CN=DST Root CA X3 O=Digital Signature Trust Co. -# Subject: CN=DST Root CA X3 O=Digital Signature Trust Co. -# Label: "DST Root CA X3" -# Serial: 91299735575339953335919266965803778155 -# MD5 Fingerprint: 41:03:52:dc:0f:f7:50:1b:16:f0:02:8e:ba:6f:45:c5 -# SHA1 Fingerprint: da:c9:02:4f:54:d8:f6:df:94:93:5f:b1:73:26:38:ca:6a:d7:7c:13 -# SHA256 Fingerprint: 06:87:26:03:31:a7:24:03:d9:09:f1:05:e6:9b:cf:0d:32:e1:bd:24:93:ff:c6:d9:20:6d:11:bc:d6:77:07:39 ------BEGIN CERTIFICATE----- -MIIDSjCCAjKgAwIBAgIQRK+wgNajJ7qJMDmGLvhAazANBgkqhkiG9w0BAQUFADA/ -MSQwIgYDVQQKExtEaWdpdGFsIFNpZ25hdHVyZSBUcnVzdCBDby4xFzAVBgNVBAMT -DkRTVCBSb290IENBIFgzMB4XDTAwMDkzMDIxMTIxOVoXDTIxMDkzMDE0MDExNVow -PzEkMCIGA1UEChMbRGlnaXRhbCBTaWduYXR1cmUgVHJ1c3QgQ28uMRcwFQYDVQQD -Ew5EU1QgUm9vdCBDQSBYMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AN+v6ZdQCINXtMxiZfaQguzH0yxrMMpb7NnDfcdAwRgUi+DoM3ZJKuM/IUmTrE4O -rz5Iy2Xu/NMhD2XSKtkyj4zl93ewEnu1lcCJo6m67XMuegwGMoOifooUMM0RoOEq -OLl5CjH9UL2AZd+3UWODyOKIYepLYYHsUmu5ouJLGiifSKOeDNoJjj4XLh7dIN9b -xiqKqy69cK3FCxolkHRyxXtqqzTWMIn/5WgTe1QLyNau7Fqckh49ZLOMxt+/yUFw -7BZy1SbsOFU5Q9D8/RhcQPGX69Wam40dutolucbY38EVAjqr2m7xPi71XAicPNaD -aeQQmxkqtilX4+U9m5/wAl0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNV -HQ8BAf8EBAMCAQYwHQYDVR0OBBYEFMSnsaR7LHH62+FLkHX/xBVghYkQMA0GCSqG -SIb3DQEBBQUAA4IBAQCjGiybFwBcqR7uKGY3Or+Dxz9LwwmglSBd49lZRNI+DT69 -ikugdB/OEIKcdBodfpga3csTS7MgROSR6cz8faXbauX+5v3gTt23ADq1cEmv8uXr -AvHRAosZy5Q6XkjEGB5YGV8eAlrwDPGxrancWYaLbumR9YbK+rlmM6pZW87ipxZz -R8srzJmwN0jP41ZL9c8PDHIyh8bwRLtTcm1D9SZImlJnt1ir/md2cXjbDaJWFBM5 -JDGFoqgCWjBH4d1QB7wCCZAA62RjYJsWvIjJEubSfZGL+T0yjWW06XyxV3bqxbYo -Ob8VZRzI9neWagqNdwvYkQsEjgfbKbYK7p2CNTUQ ------END CERTIFICATE----- - # Issuer: CN=SwissSign Gold CA - G2 O=SwissSign AG # Subject: CN=SwissSign Gold CA - G2 O=SwissSign AG # Label: "SwissSign Gold CA - G2" @@ -779,36 +721,6 @@ t0QmwCbAr1UwnjvVNioZBPRcHv/PLLf/0P2HQBHVESO7SMAhqaQoLf0V+LBOK/Qw WyH8EZE0vkHve52Xdf+XlcCWWC/qu0bXu+TZLg== -----END CERTIFICATE----- -# Issuer: CN=Cybertrust Global Root O=Cybertrust, Inc -# Subject: CN=Cybertrust Global Root O=Cybertrust, Inc -# Label: "Cybertrust Global Root" -# Serial: 4835703278459682877484360 -# MD5 Fingerprint: 72:e4:4a:87:e3:69:40:80:77:ea:bc:e3:f4:ff:f0:e1 -# SHA1 Fingerprint: 5f:43:e5:b1:bf:f8:78:8c:ac:1c:c7:ca:4a:9a:c6:22:2b:cc:34:c6 -# SHA256 Fingerprint: 96:0a:df:00:63:e9:63:56:75:0c:29:65:dd:0a:08:67:da:0b:9c:bd:6e:77:71:4a:ea:fb:23:49:ab:39:3d:a3 ------BEGIN CERTIFICATE----- -MIIDoTCCAomgAwIBAgILBAAAAAABD4WqLUgwDQYJKoZIhvcNAQEFBQAwOzEYMBYG -A1UEChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2Jh -bCBSb290MB4XDTA2MTIxNTA4MDAwMFoXDTIxMTIxNTA4MDAwMFowOzEYMBYGA1UE -ChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2JhbCBS -b290MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+Mi8vRRQZhP/8NN5 -7CPytxrHjoXxEnOmGaoQ25yiZXRadz5RfVb23CO21O1fWLE3TdVJDm71aofW0ozS -J8bi/zafmGWgE07GKmSb1ZASzxQG9Dvj1Ci+6A74q05IlG2OlTEQXO2iLb3VOm2y -HLtgwEZLAfVJrn5GitB0jaEMAs7u/OePuGtm839EAL9mJRQr3RAwHQeWP032a7iP -t3sMpTjr3kfb1V05/Iin89cqdPHoWqI7n1C6poxFNcJQZZXcY4Lv3b93TZxiyWNz -FtApD0mpSPCzqrdsxacwOUBdrsTiXSZT8M4cIwhhqJQZugRiQOwfOHB3EgZxpzAY -XSUnpQIDAQABo4GlMIGiMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/ -MB0GA1UdDgQWBBS2CHsNesysIEyGVjJez6tuhS1wVzA/BgNVHR8EODA2MDSgMqAw -hi5odHRwOi8vd3d3Mi5wdWJsaWMtdHJ1c3QuY29tL2NybC9jdC9jdHJvb3QuY3Js -MB8GA1UdIwQYMBaAFLYIew16zKwgTIZWMl7Pq26FLXBXMA0GCSqGSIb3DQEBBQUA -A4IBAQBW7wojoFROlZfJ+InaRcHUowAl9B8Tq7ejhVhpwjCt2BWKLePJzYFa+HMj -Wqd8BfP9IjsO0QbE2zZMcwSO5bAi5MXzLqXZI+O4Tkogp24CJJ8iYGd7ix1yCcUx -XOl5n4BHPa2hCwcUPUf/A2kaDAtE52Mlp3+yybh2hO0j9n0Hq0V+09+zv+mKts2o -omcrUtW3ZfA5TGOgkXmTUg9U3YO7n9GPp1Nzw8v/MOx8BLjYRB+TX3EJIrduPuoc -A06dGiBh+4E37F78CkWr1+cXVdCg6mCbpvbjjFspwgZgFJ0tl0ypkxWdYcQBX0jW -WL1WMRJOEcgh4LMRkWXbtKaIOM5V ------END CERTIFICATE----- - # Issuer: O=Chunghwa Telecom Co., Ltd. OU=ePKI Root Certification Authority # Subject: O=Chunghwa Telecom Co., Ltd. OU=ePKI Root Certification Authority # Label: "ePKI Root Certification Authority" @@ -1411,78 +1323,6 @@ t/2jioSgrGK+KwmHNPBqAbubKVY8/gA3zyNs8U6qtnRGEmyR7jTV7JqR50S+kDFy SjnRBUkLp7Y3gaVdjKozXoEofKd9J+sAro03 -----END CERTIFICATE----- -# Issuer: CN=EC-ACC O=Agencia Catalana de Certificacio (NIF Q-0801176-I) OU=Serveis Publics de Certificacio/Vegeu https://www.catcert.net/verarrel (c)03/Jerarquia Entitats de Certificacio Catalanes -# Subject: CN=EC-ACC O=Agencia Catalana de Certificacio (NIF Q-0801176-I) OU=Serveis Publics de Certificacio/Vegeu https://www.catcert.net/verarrel (c)03/Jerarquia Entitats de Certificacio Catalanes -# Label: "EC-ACC" -# Serial: -23701579247955709139626555126524820479 -# MD5 Fingerprint: eb:f5:9d:29:0d:61:f9:42:1f:7c:c2:ba:6d:e3:15:09 -# SHA1 Fingerprint: 28:90:3a:63:5b:52:80:fa:e6:77:4c:0b:6d:a7:d6:ba:a6:4a:f2:e8 -# SHA256 Fingerprint: 88:49:7f:01:60:2f:31:54:24:6a:e2:8c:4d:5a:ef:10:f1:d8:7e:bb:76:62:6f:4a:e0:b7:f9:5b:a7:96:87:99 ------BEGIN CERTIFICATE----- -MIIFVjCCBD6gAwIBAgIQ7is969Qh3hSoYqwE893EATANBgkqhkiG9w0BAQUFADCB -8zELMAkGA1UEBhMCRVMxOzA5BgNVBAoTMkFnZW5jaWEgQ2F0YWxhbmEgZGUgQ2Vy -dGlmaWNhY2lvIChOSUYgUS0wODAxMTc2LUkpMSgwJgYDVQQLEx9TZXJ2ZWlzIFB1 -YmxpY3MgZGUgQ2VydGlmaWNhY2lvMTUwMwYDVQQLEyxWZWdldSBodHRwczovL3d3 -dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbCAoYykwMzE1MDMGA1UECxMsSmVyYXJxdWlh -IEVudGl0YXRzIGRlIENlcnRpZmljYWNpbyBDYXRhbGFuZXMxDzANBgNVBAMTBkVD -LUFDQzAeFw0wMzAxMDcyMzAwMDBaFw0zMTAxMDcyMjU5NTlaMIHzMQswCQYDVQQG -EwJFUzE7MDkGA1UEChMyQWdlbmNpYSBDYXRhbGFuYSBkZSBDZXJ0aWZpY2FjaW8g -KE5JRiBRLTA4MDExNzYtSSkxKDAmBgNVBAsTH1NlcnZlaXMgUHVibGljcyBkZSBD -ZXJ0aWZpY2FjaW8xNTAzBgNVBAsTLFZlZ2V1IGh0dHBzOi8vd3d3LmNhdGNlcnQu -bmV0L3ZlcmFycmVsIChjKTAzMTUwMwYDVQQLEyxKZXJhcnF1aWEgRW50aXRhdHMg -ZGUgQ2VydGlmaWNhY2lvIENhdGFsYW5lczEPMA0GA1UEAxMGRUMtQUNDMIIBIjAN -BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsyLHT+KXQpWIR4NA9h0X84NzJB5R -85iKw5K4/0CQBXCHYMkAqbWUZRkiFRfCQ2xmRJoNBD45b6VLeqpjt4pEndljkYRm -4CgPukLjbo73FCeTae6RDqNfDrHrZqJyTxIThmV6PttPB/SnCWDaOkKZx7J/sxaV -HMf5NLWUhdWZXqBIoH7nF2W4onW4HvPlQn2v7fOKSGRdghST2MDk/7NQcvJ29rNd -QlB50JQ+awwAvthrDk4q7D7SzIKiGGUzE3eeml0aE9jD2z3Il3rucO2n5nzbcc8t -lGLfbdb1OL4/pYUKGbio2Al1QnDE6u/LDsg0qBIimAy4E5S2S+zw0JDnJwIDAQAB -o4HjMIHgMB0GA1UdEQQWMBSBEmVjX2FjY0BjYXRjZXJ0Lm5ldDAPBgNVHRMBAf8E -BTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUoMOLRKo3pUW/l4Ba0fF4 -opvpXY0wfwYDVR0gBHgwdjB0BgsrBgEEAfV4AQMBCjBlMCwGCCsGAQUFBwIBFiBo -dHRwczovL3d3dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbDA1BggrBgEFBQcCAjApGidW -ZWdldSBodHRwczovL3d3dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbCAwDQYJKoZIhvcN -AQEFBQADggEBAKBIW4IB9k1IuDlVNZyAelOZ1Vr/sXE7zDkJlF7W2u++AVtd0x7Y -/X1PzaBB4DSTv8vihpw3kpBWHNzrKQXlxJ7HNd+KDM3FIUPpqojlNcAZQmNaAl6k -SBg6hW/cnbw/nZzBh7h6YQjpdwt/cKt63dmXLGQehb+8dJahw3oS7AwaboMMPOhy -Rp/7SNVel+axofjk70YllJyJ22k4vuxcDlbHZVHlUIiIv0LVKz3l+bqeLrPK9HOS -Agu+TGbrIP65y7WZf+a2E/rKS03Z7lNGBjvGTq2TWoF+bCpLagVFjPIhpDGQh2xl -nJ2lYJU6Un/10asIbvPuW/mIPX64b24D5EI= ------END CERTIFICATE----- - -# Issuer: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority -# Subject: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority -# Label: "Hellenic Academic and Research Institutions RootCA 2011" -# Serial: 0 -# MD5 Fingerprint: 73:9f:4c:4b:73:5b:79:e9:fa:ba:1c:ef:6e:cb:d5:c9 -# SHA1 Fingerprint: fe:45:65:9b:79:03:5b:98:a1:61:b5:51:2e:ac:da:58:09:48:22:4d -# SHA256 Fingerprint: bc:10:4f:15:a4:8b:e7:09:dc:a5:42:a7:e1:d4:b9:df:6f:05:45:27:e8:02:ea:a9:2d:59:54:44:25:8a:fe:71 ------BEGIN CERTIFICATE----- -MIIEMTCCAxmgAwIBAgIBADANBgkqhkiG9w0BAQUFADCBlTELMAkGA1UEBhMCR1Ix -RDBCBgNVBAoTO0hlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1 -dGlvbnMgQ2VydC4gQXV0aG9yaXR5MUAwPgYDVQQDEzdIZWxsZW5pYyBBY2FkZW1p -YyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIFJvb3RDQSAyMDExMB4XDTExMTIw -NjEzNDk1MloXDTMxMTIwMTEzNDk1MlowgZUxCzAJBgNVBAYTAkdSMUQwQgYDVQQK -EztIZWxsZW5pYyBBY2FkZW1pYyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIENl -cnQuIEF1dGhvcml0eTFAMD4GA1UEAxM3SGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl -c2VhcmNoIEluc3RpdHV0aW9ucyBSb290Q0EgMjAxMTCCASIwDQYJKoZIhvcNAQEB -BQADggEPADCCAQoCggEBAKlTAOMupvaO+mDYLZU++CwqVE7NuYRhlFhPjz2L5EPz -dYmNUeTDN9KKiE15HrcS3UN4SoqS5tdI1Q+kOilENbgH9mgdVc04UfCMJDGFr4PJ -fel3r+0ae50X+bOdOFAPplp5kYCvN66m0zH7tSYJnTxa71HFK9+WXesyHgLacEns -bgzImjeN9/E2YEsmLIKe0HjzDQ9jpFEw4fkrJxIH2Oq9GGKYsFk3fb7u8yBRQlqD -75O6aRXxYp2fmTmCobd0LovUxQt7L/DICto9eQqakxylKHJzkUOap9FNhYS5qXSP -FEDH3N6sQWRstBmbAmNtJGSPRLIl6s5ddAxjMlyNh+UCAwEAAaOBiTCBhjAPBgNV -HRMBAf8EBTADAQH/MAsGA1UdDwQEAwIBBjAdBgNVHQ4EFgQUppFC/RNhSiOeCKQp -5dgTBCPuQSUwRwYDVR0eBEAwPqA8MAWCAy5ncjAFggMuZXUwBoIELmVkdTAGggQu -b3JnMAWBAy5ncjAFgQMuZXUwBoEELmVkdTAGgQQub3JnMA0GCSqGSIb3DQEBBQUA -A4IBAQAf73lB4XtuP7KMhjdCSk4cNx6NZrokgclPEg8hwAOXhiVtXdMiKahsog2p -6z0GW5k6x8zDmjR/qw7IThzh+uTczQ2+vyT+bOdrwg3IBp5OjWEopmr95fZi6hg8 -TqBTnbI6nOulnJEWtk2C4AwFSKls9cz4y51JtPACpf1wA+2KIaWuE4ZJwzNzvoc7 -dIsXRSZMFpGD/md9zU1jZ/rzAxKWeAaNsWftjj++n08C9bMJL/NMh98qy5V8Acys -Nnq/onN694/BtZqhFLKPM58N7yLcZnuEvUUXBj08yrl3NI/K6s8/MT7jiOOASSXI -l7WdmplNsDz4SgCbZN2fOUvRJ9e4 ------END CERTIFICATE----- - # Issuer: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 # Subject: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 # Label: "Actalis Authentication Root CA" @@ -2342,27 +2182,6 @@ zzuqQhFkoJ2UOQIReVx7Hfpkue4WQrO/isIJxOzksU0CMQDpKmFHjFJKS04YcPbW RNZu9YO6bVi9JNlWSOrvxKJGgYhqOkbRqZtNyWHa0V1Xahg= -----END CERTIFICATE----- -# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 -# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 -# Label: "GlobalSign ECC Root CA - R4" -# Serial: 14367148294922964480859022125800977897474 -# MD5 Fingerprint: 20:f0:27:68:d1:7e:a0:9d:0e:e6:2a:ca:df:5c:89:8e -# SHA1 Fingerprint: 69:69:56:2e:40:80:f4:24:a1:e7:19:9f:14:ba:f3:ee:58:ab:6a:bb -# SHA256 Fingerprint: be:c9:49:11:c2:95:56:76:db:6c:0a:55:09:86:d7:6e:3b:a0:05:66:7c:44:2c:97:62:b4:fb:b7:73:de:22:8c ------BEGIN CERTIFICATE----- -MIIB4TCCAYegAwIBAgIRKjikHJYKBN5CsiilC+g0mAIwCgYIKoZIzj0EAwIwUDEk -MCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBDQSAtIFI0MRMwEQYDVQQKEwpH -bG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWduMB4XDTEyMTExMzAwMDAwMFoX -DTM4MDExOTAzMTQwN1owUDEkMCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBD -QSAtIFI0MRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu -MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEuMZ5049sJQ6fLjkZHAOkrprlOQcJ -FspjsbmG+IpXwVfOQvpzofdlQv8ewQCybnMO/8ch5RikqtlxP6jUuc6MHaNCMEAw -DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFFSwe61F -uOJAf/sKbvu+M8k8o4TVMAoGCCqGSM49BAMCA0gAMEUCIQDckqGgE6bPA7DmxCGX -kPoUVy0D7O48027KqGx2vKLeuwIgJ6iFJzWbVsaj8kfSt24bAgAXqmemFZHe+pTs -ewv4n4Q= ------END CERTIFICATE----- - # Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5 # Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5 # Label: "GlobalSign ECC Root CA - R5" @@ -3337,126 +3156,6 @@ rYy0UGYwEAYJKwYBBAGCNxUBBAMCAQAwCgYIKoZIzj0EAwMDaAAwZQIwJsdpW9zV Mgj/mkkCtojeFK9dbJlxjRo/i9fgojaGHAeCOnZT/cKi7e97sIBPWA9LUzm9 -----END CERTIFICATE----- -# Issuer: CN=GTS Root R1 O=Google Trust Services LLC -# Subject: CN=GTS Root R1 O=Google Trust Services LLC -# Label: "GTS Root R1" -# Serial: 146587175971765017618439757810265552097 -# MD5 Fingerprint: 82:1a:ef:d4:d2:4a:f2:9f:e2:3d:97:06:14:70:72:85 -# SHA1 Fingerprint: e1:c9:50:e6:ef:22:f8:4c:56:45:72:8b:92:20:60:d7:d5:a7:a3:e8 -# SHA256 Fingerprint: 2a:57:54:71:e3:13:40:bc:21:58:1c:bd:2c:f1:3e:15:84:63:20:3e:ce:94:bc:f9:d3:cc:19:6b:f0:9a:54:72 ------BEGIN CERTIFICATE----- -MIIFWjCCA0KgAwIBAgIQbkepxUtHDA3sM9CJuRz04TANBgkqhkiG9w0BAQwFADBH -MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM -QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy -MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl -cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEB -AQUAA4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaM -f/vo27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vX -mX7wCl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7 -zUjwTcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0P -fyblqAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtc -vfaHszVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4 -Zor8Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUsp -zBmkMiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOO -Rc92wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYW -k70paDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+ -DVrNVjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgF -lQIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV -HQ4EFgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBADiW -Cu49tJYeX++dnAsznyvgyv3SjgofQXSlfKqE1OXyHuY3UjKcC9FhHb8owbZEKTV1 -d5iyfNm9dKyKaOOpMQkpAWBz40d8U6iQSifvS9efk+eCNs6aaAyC58/UEBZvXw6Z -XPYfcX3v73svfuo21pdwCxXu11xWajOl40k4DLh9+42FpLFZXvRq4d2h9mREruZR -gyFmxhE+885H7pwoHyXa/6xmld01D1zvICxi/ZG6qcz8WpyTgYMpl0p8WnK0OdC3 -d8t5/Wk6kjftbjhlRn7pYL15iJdfOBL07q9bgsiG1eGZbYwE8na6SfZu6W0eX6Dv -J4J2QPim01hcDyxC2kLGe4g0x8HYRZvBPsVhHdljUEn2NIVq4BjFbkerQUIpm/Zg -DdIx02OYI5NaAIFItO/Nis3Jz5nu2Z6qNuFoS3FJFDYoOj0dzpqPJeaAcWErtXvM -+SUWgeExX6GjfhaknBZqlxi9dnKlC54dNuYvoS++cJEPqOba+MSSQGwlfnuzCdyy -F62ARPBopY+Udf90WuioAnwMCeKpSwughQtiue+hMZL77/ZRBIls6Kl0obsXs7X9 -SQ98POyDGCBDTtWTurQ0sR8WNh8M5mQ5Fkzc4P4dyKliPUDqysU0ArSuiYgzNdws -E3PYJ/HQcu51OyLemGhmW/HGY0dVHLqlCFF1pkgl ------END CERTIFICATE----- - -# Issuer: CN=GTS Root R2 O=Google Trust Services LLC -# Subject: CN=GTS Root R2 O=Google Trust Services LLC -# Label: "GTS Root R2" -# Serial: 146587176055767053814479386953112547951 -# MD5 Fingerprint: 44:ed:9a:0e:a4:09:3b:00:f2:ae:4c:a3:c6:61:b0:8b -# SHA1 Fingerprint: d2:73:96:2a:2a:5e:39:9f:73:3f:e1:c7:1e:64:3f:03:38:34:fc:4d -# SHA256 Fingerprint: c4:5d:7b:b0:8e:6d:67:e6:2e:42:35:11:0b:56:4e:5f:78:fd:92:ef:05:8c:84:0a:ea:4e:64:55:d7:58:5c:60 ------BEGIN CERTIFICATE----- -MIIFWjCCA0KgAwIBAgIQbkepxlqz5yDFMJo/aFLybzANBgkqhkiG9w0BAQwFADBH -MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM -QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy -MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl -cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwggIiMA0GCSqGSIb3DQEB -AQUAA4ICDwAwggIKAoICAQDO3v2m++zsFDQ8BwZabFn3GTXd98GdVarTzTukk3Lv -CvptnfbwhYBboUhSnznFt+4orO/LdmgUud+tAWyZH8QiHZ/+cnfgLFuv5AS/T3Kg -GjSY6Dlo7JUle3ah5mm5hRm9iYz+re026nO8/4Piy33B0s5Ks40FnotJk9/BW9Bu -XvAuMC6C/Pq8tBcKSOWIm8Wba96wyrQD8Nr0kLhlZPdcTK3ofmZemde4wj7I0BOd -re7kRXuJVfeKH2JShBKzwkCX44ofR5GmdFrS+LFjKBC4swm4VndAoiaYecb+3yXu -PuWgf9RhD1FLPD+M2uFwdNjCaKH5wQzpoeJ/u1U8dgbuak7MkogwTZq9TwtImoS1 -mKPV+3PBV2HdKFZ1E66HjucMUQkQdYhMvI35ezzUIkgfKtzra7tEscszcTJGr61K -8YzodDqs5xoic4DSMPclQsciOzsSrZYuxsN2B6ogtzVJV+mSSeh2FnIxZyuWfoqj -x5RWIr9qS34BIbIjMt/kmkRtWVtd9QCgHJvGeJeNkP+byKq0rxFROV7Z+2et1VsR -nTKaG73VululycslaVNVJ1zgyjbLiGH7HrfQy+4W+9OmTN6SpdTi3/UGVN4unUu0 -kzCqgc7dGtxRcw1PcOnlthYhGXmy5okLdWTK1au8CcEYof/UVKGFPP0UJAOyh9Ok -twIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV -HQ4EFgQUu//KjiOfT5nK2+JopqUVJxce2Q4wDQYJKoZIhvcNAQEMBQADggIBALZp -8KZ3/p7uC4Gt4cCpx/k1HUCCq+YEtN/L9x0Pg/B+E02NjO7jMyLDOfxA325BS0JT -vhaI8dI4XsRomRyYUpOM52jtG2pzegVATX9lO9ZY8c6DR2Dj/5epnGB3GFW1fgiT -z9D2PGcDFWEJ+YF59exTpJ/JjwGLc8R3dtyDovUMSRqodt6Sm2T4syzFJ9MHwAiA -pJiS4wGWAqoC7o87xdFtCjMwc3i5T1QWvwsHoaRc5svJXISPD+AVdyx+Jn7axEvb -pxZ3B7DNdehyQtaVhJ2Gg/LkkM0JR9SLA3DaWsYDQvTtN6LwG1BUSw7YhN4ZKJmB -R64JGz9I0cNv4rBgF/XuIwKl2gBbbZCr7qLpGzvpx0QnRY5rn/WkhLx3+WuXrD5R -RaIRpsyF7gpo8j5QOHokYh4XIDdtak23CZvJ/KRY9bb7nE4Yu5UC56GtmwfuNmsk -0jmGwZODUNKBRqhfYlcsu2xkiAhu7xNUX90txGdj08+JN7+dIPT7eoOboB6BAFDC -5AwiWVIQ7UNWhwD4FFKnHYuTjKJNRn8nxnGbJN7k2oaLDX5rIMHAnuFl2GqjpuiF -izoHCBy69Y9Vmhh1fuXsgWbRIXOhNUQLgD1bnF5vKheW0YMjiGZt5obicDIvUiLn -yOd/xCxgXS/Dr55FBcOEArf9LAhST4Ldo/DUhgkC ------END CERTIFICATE----- - -# Issuer: CN=GTS Root R3 O=Google Trust Services LLC -# Subject: CN=GTS Root R3 O=Google Trust Services LLC -# Label: "GTS Root R3" -# Serial: 146587176140553309517047991083707763997 -# MD5 Fingerprint: 1a:79:5b:6b:04:52:9c:5d:c7:74:33:1b:25:9a:f9:25 -# SHA1 Fingerprint: 30:d4:24:6f:07:ff:db:91:89:8a:0b:e9:49:66:11:eb:8c:5e:46:e5 -# SHA256 Fingerprint: 15:d5:b8:77:46:19:ea:7d:54:ce:1c:a6:d0:b0:c4:03:e0:37:a9:17:f1:31:e8:a0:4e:1e:6b:7a:71:ba:bc:e5 ------BEGIN CERTIFICATE----- -MIICDDCCAZGgAwIBAgIQbkepx2ypcyRAiQ8DVd2NHTAKBggqhkjOPQQDAzBHMQsw -CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU -MBIGA1UEAxMLR1RTIFJvb3QgUjMwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw -MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp -Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjMwdjAQBgcqhkjOPQIBBgUrgQQA -IgNiAAQfTzOHMymKoYTey8chWEGJ6ladK0uFxh1MJ7x/JlFyb+Kf1qPKzEUURout -736GjOyxfi//qXGdGIRFBEFVbivqJn+7kAHjSxm65FSWRQmx1WyRRK2EE46ajA2A -DDL24CejQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud -DgQWBBTB8Sa6oC2uhYHP0/EqEr24Cmf9vDAKBggqhkjOPQQDAwNpADBmAjEAgFuk -fCPAlaUs3L6JbyO5o91lAFJekazInXJ0glMLfalAvWhgxeG4VDvBNhcl2MG9AjEA -njWSdIUlUfUk7GRSJFClH9voy8l27OyCbvWFGFPouOOaKaqW04MjyaR7YbPMAuhd ------END CERTIFICATE----- - -# Issuer: CN=GTS Root R4 O=Google Trust Services LLC -# Subject: CN=GTS Root R4 O=Google Trust Services LLC -# Label: "GTS Root R4" -# Serial: 146587176229350439916519468929765261721 -# MD5 Fingerprint: 5d:b6:6a:c4:60:17:24:6a:1a:99:a8:4b:ee:5e:b4:26 -# SHA1 Fingerprint: 2a:1d:60:27:d9:4a:b1:0a:1c:4d:91:5c:cd:33:a0:cb:3e:2d:54:cb -# SHA256 Fingerprint: 71:cc:a5:39:1f:9e:79:4b:04:80:25:30:b3:63:e1:21:da:8a:30:43:bb:26:66:2f:ea:4d:ca:7f:c9:51:a4:bd ------BEGIN CERTIFICATE----- -MIICCjCCAZGgAwIBAgIQbkepyIuUtui7OyrYorLBmTAKBggqhkjOPQQDAzBHMQsw -CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU -MBIGA1UEAxMLR1RTIFJvb3QgUjQwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw -MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp -Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjQwdjAQBgcqhkjOPQIBBgUrgQQA -IgNiAATzdHOnaItgrkO4NcWBMHtLSZ37wWHO5t5GvWvVYRg1rkDdc/eJkTBa6zzu -hXyiQHY7qca4R9gq55KRanPpsXI5nymfopjTX15YhmUPoYRlBtHci8nHc8iMai/l -xKvRHYqjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud -DgQWBBSATNbrdP9JNqPV2Py1PsVq8JQdjDAKBggqhkjOPQQDAwNnADBkAjBqUFJ0 -CMRw3J5QdCHojXohw0+WbhXRIjVhLfoIN+4Zba3bssx9BzT1YBkstTTZbyACMANx -sbqjYAuG7ZoIapVon+Kz4ZNkfF6Tpt95LY2F45TPI11xzPKwTdb+mciUqXWi4w== ------END CERTIFICATE----- - # Issuer: CN=UCA Global G2 Root O=UniTrust # Subject: CN=UCA Global G2 Root O=UniTrust # Label: "UCA Global G2 Root" @@ -4255,3 +3954,755 @@ qJZ9ZPskWkoDbGs4xugDQ5r3V7mzKWmTOPQD8rv7gmsHINFSH5pkAnuYZttcTVoP 0ISVoDwUQwbKytu4QTbaakRnh6+v40URFWkIsr4WOZckbxJF0WddCajJFdr60qZf E2Efv4WstK2tBZQIgx51F9NxO5NQI1mg7TyRVJ12AMXDuDjb -----END CERTIFICATE----- + +# Issuer: CN=TunTrust Root CA O=Agence Nationale de Certification Electronique +# Subject: CN=TunTrust Root CA O=Agence Nationale de Certification Electronique +# Label: "TunTrust Root CA" +# Serial: 108534058042236574382096126452369648152337120275 +# MD5 Fingerprint: 85:13:b9:90:5b:36:5c:b6:5e:b8:5a:f8:e0:31:57:b4 +# SHA1 Fingerprint: cf:e9:70:84:0f:e0:73:0f:9d:f6:0c:7f:2c:4b:ee:20:46:34:9c:bb +# SHA256 Fingerprint: 2e:44:10:2a:b5:8c:b8:54:19:45:1c:8e:19:d9:ac:f3:66:2c:af:bc:61:4b:6a:53:96:0a:30:f7:d0:e2:eb:41 +-----BEGIN CERTIFICATE----- +MIIFszCCA5ugAwIBAgIUEwLV4kBMkkaGFmddtLu7sms+/BMwDQYJKoZIhvcNAQEL +BQAwYTELMAkGA1UEBhMCVE4xNzA1BgNVBAoMLkFnZW5jZSBOYXRpb25hbGUgZGUg +Q2VydGlmaWNhdGlvbiBFbGVjdHJvbmlxdWUxGTAXBgNVBAMMEFR1blRydXN0IFJv +b3QgQ0EwHhcNMTkwNDI2MDg1NzU2WhcNNDQwNDI2MDg1NzU2WjBhMQswCQYDVQQG +EwJUTjE3MDUGA1UECgwuQWdlbmNlIE5hdGlvbmFsZSBkZSBDZXJ0aWZpY2F0aW9u +IEVsZWN0cm9uaXF1ZTEZMBcGA1UEAwwQVHVuVHJ1c3QgUm9vdCBDQTCCAiIwDQYJ +KoZIhvcNAQEBBQADggIPADCCAgoCggIBAMPN0/y9BFPdDCA61YguBUtB9YOCfvdZ +n56eY+hz2vYGqU8ftPkLHzmMmiDQfgbU7DTZhrx1W4eI8NLZ1KMKsmwb60ksPqxd +2JQDoOw05TDENX37Jk0bbjBU2PWARZw5rZzJJQRNmpA+TkBuimvNKWfGzC3gdOgF +VwpIUPp6Q9p+7FuaDmJ2/uqdHYVy7BG7NegfJ7/Boce7SBbdVtfMTqDhuazb1YMZ +GoXRlJfXyqNlC/M4+QKu3fZnz8k/9YosRxqZbwUN/dAdgjH8KcwAWJeRTIAAHDOF +li/LQcKLEITDCSSJH7UP2dl3RxiSlGBcx5kDPP73lad9UKGAwqmDrViWVSHbhlnU +r8a83YFuB9tgYv7sEG7aaAH0gxupPqJbI9dkxt/con3YS7qC0lH4Zr8GRuR5KiY2 +eY8fTpkdso8MDhz/yV3A/ZAQprE38806JG60hZC/gLkMjNWb1sjxVj8agIl6qeIb +MlEsPvLfe/ZdeikZjuXIvTZxi11Mwh0/rViizz1wTaZQmCXcI/m4WEEIcb9PuISg +jwBUFfyRbVinljvrS5YnzWuioYasDXxU5mZMZl+QviGaAkYt5IPCgLnPSz7ofzwB +7I9ezX/SKEIBlYrilz0QIX32nRzFNKHsLA4KUiwSVXAkPcvCFDVDXSdOvsC9qnyW +5/yeYa1E0wCXAgMBAAGjYzBhMB0GA1UdDgQWBBQGmpsfU33x9aTI04Y+oXNZtPdE +ITAPBgNVHRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFAaamx9TffH1pMjThj6hc1m0 +90QhMA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAqgVutt0Vyb+z +xiD2BkewhpMl0425yAA/l/VSJ4hxyXT968pk21vvHl26v9Hr7lxpuhbI87mP0zYu +QEkHDVneixCwSQXi/5E/S7fdAo74gShczNxtr18UnH1YeA32gAm56Q6XKRm4t+v4 +FstVEuTGfbvE7Pi1HE4+Z7/FXxttbUcoqgRYYdZ2vyJ/0Adqp2RT8JeNnYA/u8EH +22Wv5psymsNUk8QcCMNE+3tjEUPRahphanltkE8pjkcFwRJpadbGNjHh/PqAulxP +xOu3Mqz4dWEX1xAZufHSCe96Qp1bWgvUxpVOKs7/B9dPfhgGiPEZtdmYu65xxBzn +dFlY7wyJz4sfdZMaBBSSSFCp61cpABbjNhzI+L/wM9VBD8TMPN3pM0MBkRArHtG5 +Xc0yGYuPjCB31yLEQtyEFpslbei0VXF/sHyz03FJuc9SpAQ/3D2gu68zngowYI7b +nV2UqL1g52KAdoGDDIzMMEZJ4gzSqK/rYXHv5yJiqfdcZGyfFoxnNidF9Ql7v/YQ +CvGwjVRDjAS6oz/v4jXH+XTgbzRB0L9zZVcg+ZtnemZoJE6AZb0QmQZZ8mWvuMZH +u/2QeItBcy6vVR/cO5JyboTT0GFMDcx2V+IthSIVNg3rAZ3r2OvEhJn7wAzMMujj +d9qDRIueVSjAi1jTkD5OGwDxFa2DK5o= +-----END CERTIFICATE----- + +# Issuer: CN=HARICA TLS RSA Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Subject: CN=HARICA TLS RSA Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Label: "HARICA TLS RSA Root CA 2021" +# Serial: 76817823531813593706434026085292783742 +# MD5 Fingerprint: 65:47:9b:58:86:dd:2c:f0:fc:a2:84:1f:1e:96:c4:91 +# SHA1 Fingerprint: 02:2d:05:82:fa:88:ce:14:0c:06:79:de:7f:14:10:e9:45:d7:a5:6d +# SHA256 Fingerprint: d9:5d:0e:8e:da:79:52:5b:f9:be:b1:1b:14:d2:10:0d:32:94:98:5f:0c:62:d9:fa:bd:9c:d9:99:ec:cb:7b:1d +-----BEGIN CERTIFICATE----- +MIIFpDCCA4ygAwIBAgIQOcqTHO9D88aOk8f0ZIk4fjANBgkqhkiG9w0BAQsFADBs +MQswCQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl +c2VhcmNoIEluc3RpdHV0aW9ucyBDQTEkMCIGA1UEAwwbSEFSSUNBIFRMUyBSU0Eg +Um9vdCBDQSAyMDIxMB4XDTIxMDIxOTEwNTUzOFoXDTQ1MDIxMzEwNTUzN1owbDEL +MAkGA1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNl +YXJjaCBJbnN0aXR1dGlvbnMgQ0ExJDAiBgNVBAMMG0hBUklDQSBUTFMgUlNBIFJv +b3QgQ0EgMjAyMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAIvC569l +mwVnlskNJLnQDmT8zuIkGCyEf3dRywQRNrhe7Wlxp57kJQmXZ8FHws+RFjZiPTgE +4VGC/6zStGndLuwRo0Xua2s7TL+MjaQenRG56Tj5eg4MmOIjHdFOY9TnuEFE+2uv +a9of08WRiFukiZLRgeaMOVig1mlDqa2YUlhu2wr7a89o+uOkXjpFc5gH6l8Cct4M +pbOfrqkdtx2z/IpZ525yZa31MJQjB/OCFks1mJxTuy/K5FrZx40d/JiZ+yykgmvw +Kh+OC19xXFyuQnspiYHLA6OZyoieC0AJQTPb5lh6/a6ZcMBaD9YThnEvdmn8kN3b +LW7R8pv1GmuebxWMevBLKKAiOIAkbDakO/IwkfN4E8/BPzWr8R0RI7VDIp4BkrcY +AuUR0YLbFQDMYTfBKnya4dC6s1BG7oKsnTH4+yPiAwBIcKMJJnkVU2DzOFytOOqB +AGMUuTNe3QvboEUHGjMJ+E20pwKmafTCWQWIZYVWrkvL4N48fS0ayOn7H6NhStYq +E613TBoYm5EPWNgGVMWX+Ko/IIqmhaZ39qb8HOLubpQzKoNQhArlT4b4UEV4AIHr +W2jjJo3Me1xR9BQsQL4aYB16cmEdH2MtiKrOokWQCPxrvrNQKlr9qEgYRtaQQJKQ +CoReaDH46+0N0x3GfZkYVVYnZS6NRcUk7M7jAgMBAAGjQjBAMA8GA1UdEwEB/wQF +MAMBAf8wHQYDVR0OBBYEFApII6ZgpJIKM+qTW8VX6iVNvRLuMA4GA1UdDwEB/wQE +AwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAPpBIqm5iFSVmewzVjIuJndftTgfvnNAU +X15QvWiWkKQUEapobQk1OUAJ2vQJLDSle1mESSmXdMgHHkdt8s4cUCbjnj1AUz/3 +f5Z2EMVGpdAgS1D0NTsY9FVqQRtHBmg8uwkIYtlfVUKqrFOFrJVWNlar5AWMxaja +H6NpvVMPxP/cyuN+8kyIhkdGGvMA9YCRotxDQpSbIPDRzbLrLFPCU3hKTwSUQZqP +JzLB5UkZv/HywouoCjkxKLR9YjYsTewfM7Z+d21+UPCfDtcRj88YxeMn/ibvBZ3P +zzfF0HvaO7AWhAw6k9a+F9sPPg4ZeAnHqQJyIkv3N3a6dcSFA1pj1bF1BcK5vZSt +jBWZp5N99sXzqnTPBIWUmAD04vnKJGW/4GKvyMX6ssmeVkjaef2WdhW+o45WxLM0 +/L5H9MG0qPzVMIho7suuyWPEdr6sOBjhXlzPrjoiUevRi7PzKzMHVIf6tLITe7pT +BGIBnfHAT+7hOtSLIBD6Alfm78ELt5BGnBkpjNxvoEppaZS3JGWg/6w/zgH7IS79 +aPib8qXPMThcFarmlwDB31qlpzmq6YR/PFGoOtmUW4y/Twhx5duoXNTSpv4Ao8YW +xw/ogM4cKGR0GQjTQuPOAF1/sdwTsOEFy9EgqoZ0njnnkf3/W9b3raYvAwtt41dU +63ZTGI0RmLo= +-----END CERTIFICATE----- + +# Issuer: CN=HARICA TLS ECC Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Subject: CN=HARICA TLS ECC Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Label: "HARICA TLS ECC Root CA 2021" +# Serial: 137515985548005187474074462014555733966 +# MD5 Fingerprint: ae:f7:4c:e5:66:35:d1:b7:9b:8c:22:93:74:d3:4b:b0 +# SHA1 Fingerprint: bc:b0:c1:9d:e9:98:92:70:19:38:57:e9:8d:a7:b4:5d:6e:ee:01:48 +# SHA256 Fingerprint: 3f:99:cc:47:4a:cf:ce:4d:fe:d5:87:94:66:5e:47:8d:15:47:73:9f:2e:78:0f:1b:b4:ca:9b:13:30:97:d4:01 +-----BEGIN CERTIFICATE----- +MIICVDCCAdugAwIBAgIQZ3SdjXfYO2rbIvT/WeK/zjAKBggqhkjOPQQDAzBsMQsw +CQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJlc2Vh +cmNoIEluc3RpdHV0aW9ucyBDQTEkMCIGA1UEAwwbSEFSSUNBIFRMUyBFQ0MgUm9v +dCBDQSAyMDIxMB4XDTIxMDIxOTExMDExMFoXDTQ1MDIxMzExMDEwOVowbDELMAkG +A1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJj +aCBJbnN0aXR1dGlvbnMgQ0ExJDAiBgNVBAMMG0hBUklDQSBUTFMgRUNDIFJvb3Qg +Q0EgMjAyMTB2MBAGByqGSM49AgEGBSuBBAAiA2IABDgI/rGgltJ6rK9JOtDA4MM7 +KKrxcm1lAEeIhPyaJmuqS7psBAqIXhfyVYf8MLA04jRYVxqEU+kw2anylnTDUR9Y +STHMmE5gEYd103KUkE+bECUqqHgtvpBBWJAVcqeht6NCMEAwDwYDVR0TAQH/BAUw +AwEB/zAdBgNVHQ4EFgQUyRtTgRL+BNUW0aq8mm+3oJUZbsowDgYDVR0PAQH/BAQD +AgGGMAoGCCqGSM49BAMDA2cAMGQCMBHervjcToiwqfAircJRQO9gcS3ujwLEXQNw +SaSS6sUUiHCm0w2wqsosQJz76YJumgIwK0eaB8bRwoF8yguWGEEbo/QwCZ61IygN +nxS2PFOiTAZpffpskcYqSUXm7LcT4Tps +-----END CERTIFICATE----- + +# Issuer: CN=Autoridad de Certificacion Firmaprofesional CIF A62634068 +# Subject: CN=Autoridad de Certificacion Firmaprofesional CIF A62634068 +# Label: "Autoridad de Certificacion Firmaprofesional CIF A62634068" +# Serial: 1977337328857672817 +# MD5 Fingerprint: 4e:6e:9b:54:4c:ca:b7:fa:48:e4:90:b1:15:4b:1c:a3 +# SHA1 Fingerprint: 0b:be:c2:27:22:49:cb:39:aa:db:35:5c:53:e3:8c:ae:78:ff:b6:fe +# SHA256 Fingerprint: 57:de:05:83:ef:d2:b2:6e:03:61:da:99:da:9d:f4:64:8d:ef:7e:e8:44:1c:3b:72:8a:fa:9b:cd:e0:f9:b2:6a +-----BEGIN CERTIFICATE----- +MIIGFDCCA/ygAwIBAgIIG3Dp0v+ubHEwDQYJKoZIhvcNAQELBQAwUTELMAkGA1UE +BhMCRVMxQjBABgNVBAMMOUF1dG9yaWRhZCBkZSBDZXJ0aWZpY2FjaW9uIEZpcm1h +cHJvZmVzaW9uYWwgQ0lGIEE2MjYzNDA2ODAeFw0xNDA5MjMxNTIyMDdaFw0zNjA1 +MDUxNTIyMDdaMFExCzAJBgNVBAYTAkVTMUIwQAYDVQQDDDlBdXRvcmlkYWQgZGUg +Q2VydGlmaWNhY2lvbiBGaXJtYXByb2Zlc2lvbmFsIENJRiBBNjI2MzQwNjgwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDKlmuO6vj78aI14H9M2uDDUtd9 +thDIAl6zQyrET2qyyhxdKJp4ERppWVevtSBC5IsP5t9bpgOSL/UR5GLXMnE42QQM +cas9UX4PB99jBVzpv5RvwSmCwLTaUbDBPLutN0pcyvFLNg4kq7/DhHf9qFD0sefG +L9ItWY16Ck6WaVICqjaY7Pz6FIMMNx/Jkjd/14Et5cS54D40/mf0PmbR0/RAz15i +NA9wBj4gGFrO93IbJWyTdBSTo3OxDqqHECNZXyAFGUftaI6SEspd/NYrspI8IM/h +X68gvqB2f3bl7BqGYTM+53u0P6APjqK5am+5hyZvQWyIplD9amML9ZMWGxmPsu2b +m8mQ9QEM3xk9Dz44I8kvjwzRAv4bVdZO0I08r0+k8/6vKtMFnXkIoctXMbScyJCy +Z/QYFpM6/EfY0XiWMR+6KwxfXZmtY4laJCB22N/9q06mIqqdXuYnin1oKaPnirja +EbsXLZmdEyRG98Xi2J+Of8ePdG1asuhy9azuJBCtLxTa/y2aRnFHvkLfuwHb9H/T +KI8xWVvTyQKmtFLKbpf7Q8UIJm+K9Lv9nyiqDdVF8xM6HdjAeI9BZzwelGSuewvF +6NkBiDkal4ZkQdU7hwxu+g/GvUgUvzlN1J5Bto+WHWOWk9mVBngxaJ43BjuAiUVh +OSPHG0SjFeUc+JIwuwIDAQABo4HvMIHsMB0GA1UdDgQWBBRlzeurNR4APn7VdMAc +tHNHDhpkLzASBgNVHRMBAf8ECDAGAQH/AgEBMIGmBgNVHSAEgZ4wgZswgZgGBFUd +IAAwgY8wLwYIKwYBBQUHAgEWI2h0dHA6Ly93d3cuZmlybWFwcm9mZXNpb25hbC5j +b20vY3BzMFwGCCsGAQUFBwICMFAeTgBQAGEAcwBlAG8AIABkAGUAIABsAGEAIABC +AG8AbgBhAG4AbwB2AGEAIAA0ADcAIABCAGEAcgBjAGUAbABvAG4AYQAgADAAOAAw +ADEANzAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQELBQADggIBAHSHKAIrdx9m +iWTtj3QuRhy7qPj4Cx2Dtjqn6EWKB7fgPiDL4QjbEwj4KKE1soCzC1HA01aajTNF +Sa9J8OA9B3pFE1r/yJfY0xgsfZb43aJlQ3CTkBW6kN/oGbDbLIpgD7dvlAceHabJ +hfa9NPhAeGIQcDq+fUs5gakQ1JZBu/hfHAsdCPKxsIl68veg4MSPi3i1O1ilI45P +Vf42O+AMt8oqMEEgtIDNrvx2ZnOorm7hfNoD6JQg5iKj0B+QXSBTFCZX2lSX3xZE +EAEeiGaPcjiT3SC3NL7X8e5jjkd5KAb881lFJWAiMxujX6i6KtoaPc1A6ozuBRWV +1aUsIC+nmCjuRfzxuIgALI9C2lHVnOUTaHFFQ4ueCyE8S1wF3BqfmI7avSKecs2t +CsvMo2ebKHTEm9caPARYpoKdrcd7b/+Alun4jWq9GJAd/0kakFI3ky88Al2CdgtR +5xbHV/g4+afNmyJU72OwFW1TZQNKXkqgsqeOSQBZONXH9IBk9W6VULgRfhVwOEqw +f9DEMnDAGf/JOC0ULGb0QkTmVXYbgBVX/8Cnp6o5qtjTcNAuuuuUavpfNIbnYrX9 +ivAwhZTJryQCL2/W3Wf+47BVTwSYT6RBVuKT0Gro1vP7ZeDOdcQxWQzugsgMYDNK +GbqEZycPvEJdvSRUDewdcAZfpLz6IHxV +-----END CERTIFICATE----- + +# Issuer: CN=vTrus ECC Root CA O=iTrusChina Co.,Ltd. +# Subject: CN=vTrus ECC Root CA O=iTrusChina Co.,Ltd. +# Label: "vTrus ECC Root CA" +# Serial: 630369271402956006249506845124680065938238527194 +# MD5 Fingerprint: de:4b:c1:f5:52:8c:9b:43:e1:3e:8f:55:54:17:8d:85 +# SHA1 Fingerprint: f6:9c:db:b0:fc:f6:02:13:b6:52:32:a6:a3:91:3f:16:70:da:c3:e1 +# SHA256 Fingerprint: 30:fb:ba:2c:32:23:8e:2a:98:54:7a:f9:79:31:e5:50:42:8b:9b:3f:1c:8e:eb:66:33:dc:fa:86:c5:b2:7d:d3 +-----BEGIN CERTIFICATE----- +MIICDzCCAZWgAwIBAgIUbmq8WapTvpg5Z6LSa6Q75m0c1towCgYIKoZIzj0EAwMw +RzELMAkGA1UEBhMCQ04xHDAaBgNVBAoTE2lUcnVzQ2hpbmEgQ28uLEx0ZC4xGjAY +BgNVBAMTEXZUcnVzIEVDQyBSb290IENBMB4XDTE4MDczMTA3MjY0NFoXDTQzMDcz +MTA3MjY0NFowRzELMAkGA1UEBhMCQ04xHDAaBgNVBAoTE2lUcnVzQ2hpbmEgQ28u +LEx0ZC4xGjAYBgNVBAMTEXZUcnVzIEVDQyBSb290IENBMHYwEAYHKoZIzj0CAQYF +K4EEACIDYgAEZVBKrox5lkqqHAjDo6LN/llWQXf9JpRCux3NCNtzslt188+cToL0 +v/hhJoVs1oVbcnDS/dtitN9Ti72xRFhiQgnH+n9bEOf+QP3A2MMrMudwpremIFUd +e4BdS49nTPEQo0IwQDAdBgNVHQ4EFgQUmDnNvtiyjPeyq+GtJK97fKHbH88wDwYD +VR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwCgYIKoZIzj0EAwMDaAAwZQIw +V53dVvHH4+m4SVBrm2nDb+zDfSXkV5UTQJtS0zvzQBm8JsctBp61ezaf9SXUY2sA +AjEA6dPGnlaaKsyh2j/IZivTWJwghfqrkYpwcBE4YGQLYgmRWAD5Tfs0aNoJrSEG +GJTO +-----END CERTIFICATE----- + +# Issuer: CN=vTrus Root CA O=iTrusChina Co.,Ltd. +# Subject: CN=vTrus Root CA O=iTrusChina Co.,Ltd. +# Label: "vTrus Root CA" +# Serial: 387574501246983434957692974888460947164905180485 +# MD5 Fingerprint: b8:c9:37:df:fa:6b:31:84:64:c5:ea:11:6a:1b:75:fc +# SHA1 Fingerprint: 84:1a:69:fb:f5:cd:1a:25:34:13:3d:e3:f8:fc:b8:99:d0:c9:14:b7 +# SHA256 Fingerprint: 8a:71:de:65:59:33:6f:42:6c:26:e5:38:80:d0:0d:88:a1:8d:a4:c6:a9:1f:0d:cb:61:94:e2:06:c5:c9:63:87 +-----BEGIN CERTIFICATE----- +MIIFVjCCAz6gAwIBAgIUQ+NxE9izWRRdt86M/TX9b7wFjUUwDQYJKoZIhvcNAQEL +BQAwQzELMAkGA1UEBhMCQ04xHDAaBgNVBAoTE2lUcnVzQ2hpbmEgQ28uLEx0ZC4x +FjAUBgNVBAMTDXZUcnVzIFJvb3QgQ0EwHhcNMTgwNzMxMDcyNDA1WhcNNDMwNzMx +MDcyNDA1WjBDMQswCQYDVQQGEwJDTjEcMBoGA1UEChMTaVRydXNDaGluYSBDby4s +THRkLjEWMBQGA1UEAxMNdlRydXMgUm9vdCBDQTCCAiIwDQYJKoZIhvcNAQEBBQAD +ggIPADCCAgoCggIBAL1VfGHTuB0EYgWgrmy3cLRB6ksDXhA/kFocizuwZotsSKYc +IrrVQJLuM7IjWcmOvFjai57QGfIvWcaMY1q6n6MLsLOaXLoRuBLpDLvPbmyAhykU +AyyNJJrIZIO1aqwTLDPxn9wsYTwaP3BVm60AUn/PBLn+NvqcwBauYv6WTEN+VRS+ +GrPSbcKvdmaVayqwlHeFXgQPYh1jdfdr58tbmnDsPmcF8P4HCIDPKNsFxhQnL4Z9 +8Cfe/+Z+M0jnCx5Y0ScrUw5XSmXX+6KAYPxMvDVTAWqXcoKv8R1w6Jz1717CbMdH +flqUhSZNO7rrTOiwCcJlwp2dCZtOtZcFrPUGoPc2BX70kLJrxLT5ZOrpGgrIDajt +J8nU57O5q4IikCc9Kuh8kO+8T/3iCiSn3mUkpF3qwHYw03dQ+A0Em5Q2AXPKBlim +0zvc+gRGE1WKyURHuFE5Gi7oNOJ5y1lKCn+8pu8fA2dqWSslYpPZUxlmPCdiKYZN +pGvu/9ROutW04o5IWgAZCfEF2c6Rsffr6TlP9m8EQ5pV9T4FFL2/s1m02I4zhKOQ +UqqzApVg+QxMaPnu1RcN+HFXtSXkKe5lXa/R7jwXC1pDxaWG6iSe4gUH3DRCEpHW +OXSuTEGC2/KmSNGzm/MzqvOmwMVO9fSddmPmAsYiS8GVP1BkLFTltvA8Kc9XAgMB +AAGjQjBAMB0GA1UdDgQWBBRUYnBj8XWEQ1iO0RYgscasGrz2iTAPBgNVHRMBAf8E +BTADAQH/MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAKbqSSaet +8PFww+SX8J+pJdVrnjT+5hpk9jprUrIQeBqfTNqK2uwcN1LgQkv7bHbKJAs5EhWd +nxEt/Hlk3ODg9d3gV8mlsnZwUKT+twpw1aA08XXXTUm6EdGz2OyC/+sOxL9kLX1j +bhd47F18iMjrjld22VkE+rxSH0Ws8HqA7Oxvdq6R2xCOBNyS36D25q5J08FsEhvM +Kar5CKXiNxTKsbhm7xqC5PD48acWabfbqWE8n/Uxy+QARsIvdLGx14HuqCaVvIiv +TDUHKgLKeBRtRytAVunLKmChZwOgzoy8sHJnxDHO2zTlJQNgJXtxmOTAGytfdELS +S8VZCAeHvsXDf+eW2eHcKJfWjwXj9ZtOyh1QRwVTsMo554WgicEFOwE30z9J4nfr +I8iIZjs9OXYhRvHsXyO466JmdXTBQPfYaJqT4i2pLr0cox7IdMakLXogqzu4sEb9 +b91fUlV1YvCXoHzXOP0l382gmxDPi7g4Xl7FtKYCNqEeXxzP4padKar9mK5S4fNB +UvupLnKWnyfjqnN9+BojZns7q2WwMgFLFT49ok8MKzWixtlnEjUwzXYuFrOZnk1P +Ti07NEPhmg4NpGaXutIcSkwsKouLgU9xGqndXHt7CMUADTdA43x7VF8vhV929ven +sBxXVsFy6K2ir40zSbofitzmdHxghm+Hl3s= +-----END CERTIFICATE----- + +# Issuer: CN=ISRG Root X2 O=Internet Security Research Group +# Subject: CN=ISRG Root X2 O=Internet Security Research Group +# Label: "ISRG Root X2" +# Serial: 87493402998870891108772069816698636114 +# MD5 Fingerprint: d3:9e:c4:1e:23:3c:a6:df:cf:a3:7e:6d:e0:14:e6:e5 +# SHA1 Fingerprint: bd:b1:b9:3c:d5:97:8d:45:c6:26:14:55:f8:db:95:c7:5a:d1:53:af +# SHA256 Fingerprint: 69:72:9b:8e:15:a8:6e:fc:17:7a:57:af:b7:17:1d:fc:64:ad:d2:8c:2f:ca:8c:f1:50:7e:34:45:3c:cb:14:70 +-----BEGIN CERTIFICATE----- +MIICGzCCAaGgAwIBAgIQQdKd0XLq7qeAwSxs6S+HUjAKBggqhkjOPQQDAzBPMQsw +CQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJuZXQgU2VjdXJpdHkgUmVzZWFyY2gg +R3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBYMjAeFw0yMDA5MDQwMDAwMDBaFw00 +MDA5MTcxNjAwMDBaME8xCzAJBgNVBAYTAlVTMSkwJwYDVQQKEyBJbnRlcm5ldCBT +ZWN1cml0eSBSZXNlYXJjaCBHcm91cDEVMBMGA1UEAxMMSVNSRyBSb290IFgyMHYw +EAYHKoZIzj0CAQYFK4EEACIDYgAEzZvVn4CDCuwJSvMWSj5cz3es3mcFDR0HttwW ++1qLFNvicWDEukWVEYmO6gbf9yoWHKS5xcUy4APgHoIYOIvXRdgKam7mAHf7AlF9 +ItgKbppbd9/w+kHsOdx1ymgHDB/qo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0T +AQH/BAUwAwEB/zAdBgNVHQ4EFgQUfEKWrt5LSDv6kviejM9ti6lyN5UwCgYIKoZI +zj0EAwMDaAAwZQIwe3lORlCEwkSHRhtFcP9Ymd70/aTSVaYgLXTWNLxBo1BfASdW +tL4ndQavEi51mI38AjEAi/V3bNTIZargCyzuFJ0nN6T5U6VR5CmD1/iQMVtCnwr1 +/q4AaOeMSQ+2b1tbFfLn +-----END CERTIFICATE----- + +# Issuer: CN=HiPKI Root CA - G1 O=Chunghwa Telecom Co., Ltd. +# Subject: CN=HiPKI Root CA - G1 O=Chunghwa Telecom Co., Ltd. +# Label: "HiPKI Root CA - G1" +# Serial: 60966262342023497858655262305426234976 +# MD5 Fingerprint: 69:45:df:16:65:4b:e8:68:9a:8f:76:5f:ff:80:9e:d3 +# SHA1 Fingerprint: 6a:92:e4:a8:ee:1b:ec:96:45:37:e3:29:57:49:cd:96:e3:e5:d2:60 +# SHA256 Fingerprint: f0:15:ce:3c:c2:39:bf:ef:06:4b:e9:f1:d2:c4:17:e1:a0:26:4a:0a:94:be:1f:0c:8d:12:18:64:eb:69:49:cc +-----BEGIN CERTIFICATE----- +MIIFajCCA1KgAwIBAgIQLd2szmKXlKFD6LDNdmpeYDANBgkqhkiG9w0BAQsFADBP +MQswCQYDVQQGEwJUVzEjMCEGA1UECgwaQ2h1bmdod2EgVGVsZWNvbSBDby4sIEx0 +ZC4xGzAZBgNVBAMMEkhpUEtJIFJvb3QgQ0EgLSBHMTAeFw0xOTAyMjIwOTQ2MDRa +Fw0zNzEyMzExNTU5NTlaME8xCzAJBgNVBAYTAlRXMSMwIQYDVQQKDBpDaHVuZ2h3 +YSBUZWxlY29tIENvLiwgTHRkLjEbMBkGA1UEAwwSSGlQS0kgUm9vdCBDQSAtIEcx +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA9B5/UnMyDHPkvRN0o9Qw +qNCuS9i233VHZvR85zkEHmpwINJaR3JnVfSl6J3VHiGh8Ge6zCFovkRTv4354twv +Vcg3Px+kwJyz5HdcoEb+d/oaoDjq7Zpy3iu9lFc6uux55199QmQ5eiY29yTw1S+6 +lZgRZq2XNdZ1AYDgr/SEYYwNHl98h5ZeQa/rh+r4XfEuiAU+TCK72h8q3VJGZDnz +Qs7ZngyzsHeXZJzA9KMuH5UHsBffMNsAGJZMoYFL3QRtU6M9/Aes1MU3guvklQgZ +KILSQjqj2FPseYlgSGDIcpJQ3AOPgz+yQlda22rpEZfdhSi8MEyr48KxRURHH+CK +FgeW0iEPU8DtqX7UTuybCeyvQqww1r/REEXgphaypcXTT3OUM3ECoWqj1jOXTyFj +HluP2cFeRXF3D4FdXyGarYPM+l7WjSNfGz1BryB1ZlpK9p/7qxj3ccC2HTHsOyDr +y+K49a6SsvfhhEvyovKTmiKe0xRvNlS9H15ZFblzqMF8b3ti6RZsR1pl8w4Rm0bZ +/W3c1pzAtH2lsN0/Vm+h+fbkEkj9Bn8SV7apI09bA8PgcSojt/ewsTu8mL3WmKgM +a/aOEmem8rJY5AIJEzypuxC00jBF8ez3ABHfZfjcK0NVvxaXxA/VLGGEqnKG/uY6 +fsI/fe78LxQ+5oXdUG+3Se0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAdBgNV +HQ4EFgQU8ncX+l6o/vY9cdVouslGDDjYr7AwDgYDVR0PAQH/BAQDAgGGMA0GCSqG +SIb3DQEBCwUAA4ICAQBQUfB13HAE4/+qddRxosuej6ip0691x1TPOhwEmSKsxBHi +7zNKpiMdDg1H2DfHb680f0+BazVP6XKlMeJ45/dOlBhbQH3PayFUhuaVevvGyuqc +SE5XCV0vrPSltJczWNWseanMX/mF+lLFjfiRFOs6DRfQUsJ748JzjkZ4Bjgs6Fza +ZsT0pPBWGTMpWmWSBUdGSquEwx4noR8RkpkndZMPvDY7l1ePJlsMu5wP1G4wB9Tc +XzZoZjmDlicmisjEOf6aIW/Vcobpf2Lll07QJNBAsNB1CI69aO4I1258EHBGG3zg +iLKecoaZAeO/n0kZtCW+VmWuF2PlHt/o/0elv+EmBYTksMCv5wiZqAxeJoBF1Pho +L5aPruJKHJwWDBNvOIf2u8g0X5IDUXlwpt/L9ZlNec1OvFefQ05rLisY+GpzjLrF +Ne85akEez3GoorKGB1s6yeHvP2UEgEcyRHCVTjFnanRbEEV16rCf0OY1/k6fi8wr +kkVbbiVghUbN0aqwdmaTd5a+g744tiROJgvM7XpWGuDpWsZkrUx6AEhEL7lAuxM+ +vhV4nYWBSipX3tUZQ9rbyltHhoMLP7YNdnhzeSJesYAfz77RP1YQmCuVh6EfnWQU +YDksswBVLuT1sw5XxJFBAJw/6KXf6vb/yPCtbVKoF6ubYfwSUTXkJf2vqmqGOQ== +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 +# Label: "GlobalSign ECC Root CA - R4" +# Serial: 159662223612894884239637590694 +# MD5 Fingerprint: 26:29:f8:6d:e1:88:bf:a2:65:7f:aa:c4:cd:0f:7f:fc +# SHA1 Fingerprint: 6b:a0:b0:98:e1:71:ef:5a:ad:fe:48:15:80:77:10:f4:bd:6f:0b:28 +# SHA256 Fingerprint: b0:85:d7:0b:96:4f:19:1a:73:e4:af:0d:54:ae:7a:0e:07:aa:fd:af:9b:71:dd:08:62:13:8a:b7:32:5a:24:a2 +-----BEGIN CERTIFICATE----- +MIIB3DCCAYOgAwIBAgINAgPlfvU/k/2lCSGypjAKBggqhkjOPQQDAjBQMSQwIgYD +VQQLExtHbG9iYWxTaWduIEVDQyBSb290IENBIC0gUjQxEzARBgNVBAoTCkdsb2Jh +bFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTIxMTEzMDAwMDAwWhcNMzgw +MTE5MDMxNDA3WjBQMSQwIgYDVQQLExtHbG9iYWxTaWduIEVDQyBSb290IENBIC0g +UjQxEzARBgNVBAoTCkdsb2JhbFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wWTAT +BgcqhkjOPQIBBggqhkjOPQMBBwNCAAS4xnnTj2wlDp8uORkcA6SumuU5BwkWymOx +uYb4ilfBV85C+nOh92VC/x7BALJucw7/xyHlGKSq2XE/qNS5zowdo0IwQDAOBgNV +HQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUVLB7rUW44kB/ ++wpu+74zyTyjhNUwCgYIKoZIzj0EAwIDRwAwRAIgIk90crlgr/HmnKAWBVBfw147 +bmF0774BxL4YSFlhgjICICadVGNA3jdgUM/I2O2dgq43mLyjj0xMqTQrbO/7lZsm +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R1 O=Google Trust Services LLC +# Subject: CN=GTS Root R1 O=Google Trust Services LLC +# Label: "GTS Root R1" +# Serial: 159662320309726417404178440727 +# MD5 Fingerprint: 05:fe:d0:bf:71:a8:a3:76:63:da:01:e0:d8:52:dc:40 +# SHA1 Fingerprint: e5:8c:1c:c4:91:3b:38:63:4b:e9:10:6e:e3:ad:8e:6b:9d:d9:81:4a +# SHA256 Fingerprint: d9:47:43:2a:bd:e7:b7:fa:90:fc:2e:6b:59:10:1b:12:80:e0:e1:c7:e4:e4:0f:a3:c6:88:7f:ff:57:a7:f4:cf +-----BEGIN CERTIFICATE----- +MIIFVzCCAz+gAwIBAgINAgPlk28xsBNJiGuiFzANBgkqhkiG9w0BAQwFADBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEBAQUA +A4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaMf/vo +27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vXmX7w +Cl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7zUjw +TcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0Pfybl +qAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtcvfaH +szVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4Zor8 +Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUspzBmk +MiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOORc92 +wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYWk70p +aDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+DVrN +VjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgFlQID +AQABo0IwQDAOBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4E +FgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBAJ+qQibb +C5u+/x6Wki4+omVKapi6Ist9wTrYggoGxval3sBOh2Z5ofmmWJyq+bXmYOfg6LEe +QkEzCzc9zolwFcq1JKjPa7XSQCGYzyI0zzvFIoTgxQ6KfF2I5DUkzps+GlQebtuy +h6f88/qBVRRiClmpIgUxPoLW7ttXNLwzldMXG+gnoot7TiYaelpkttGsN/H9oPM4 +7HLwEXWdyzRSjeZ2axfG34arJ45JK3VmgRAhpuo+9K4l/3wV3s6MJT/KYnAK9y8J +ZgfIPxz88NtFMN9iiMG1D53Dn0reWVlHxYciNuaCp+0KueIHoI17eko8cdLiA6Ef +MgfdG+RCzgwARWGAtQsgWSl4vflVy2PFPEz0tv/bal8xa5meLMFrUKTX5hgUvYU/ +Z6tGn6D/Qqc6f1zLXbBwHSs09dR2CQzreExZBfMzQsNhFRAbd03OIozUhfJFfbdT +6u9AWpQKXCBfTkBdYiJ23//OYb2MI3jSNwLgjt7RETeJ9r/tSQdirpLsQBqvFAnZ +0E6yove+7u7Y/9waLd64NnHi/Hm3lCXRSHNboTXns5lndcEZOitHTtNCjv0xyBZm +2tIMPNuzjsmhDYAPexZ3FL//2wmUspO8IFgV6dtxQ/PeEMMA3KgqlbbC1j+Qa3bb +bP6MvPJwNQzcmRk13NfIRmPVNnGuV/u3gm3c +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R2 O=Google Trust Services LLC +# Subject: CN=GTS Root R2 O=Google Trust Services LLC +# Label: "GTS Root R2" +# Serial: 159662449406622349769042896298 +# MD5 Fingerprint: 1e:39:c0:53:e6:1e:29:82:0b:ca:52:55:36:5d:57:dc +# SHA1 Fingerprint: 9a:44:49:76:32:db:de:fa:d0:bc:fb:5a:7b:17:bd:9e:56:09:24:94 +# SHA256 Fingerprint: 8d:25:cd:97:22:9d:bf:70:35:6b:da:4e:b3:cc:73:40:31:e2:4c:f0:0f:af:cf:d3:2d:c7:6e:b5:84:1c:7e:a8 +-----BEGIN CERTIFICATE----- +MIIFVzCCAz+gAwIBAgINAgPlrsWNBCUaqxElqjANBgkqhkiG9w0BAQwFADBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjIwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwggIiMA0GCSqGSIb3DQEBAQUA +A4ICDwAwggIKAoICAQDO3v2m++zsFDQ8BwZabFn3GTXd98GdVarTzTukk3LvCvpt +nfbwhYBboUhSnznFt+4orO/LdmgUud+tAWyZH8QiHZ/+cnfgLFuv5AS/T3KgGjSY +6Dlo7JUle3ah5mm5hRm9iYz+re026nO8/4Piy33B0s5Ks40FnotJk9/BW9BuXvAu +MC6C/Pq8tBcKSOWIm8Wba96wyrQD8Nr0kLhlZPdcTK3ofmZemde4wj7I0BOdre7k +RXuJVfeKH2JShBKzwkCX44ofR5GmdFrS+LFjKBC4swm4VndAoiaYecb+3yXuPuWg +f9RhD1FLPD+M2uFwdNjCaKH5wQzpoeJ/u1U8dgbuak7MkogwTZq9TwtImoS1mKPV ++3PBV2HdKFZ1E66HjucMUQkQdYhMvI35ezzUIkgfKtzra7tEscszcTJGr61K8Yzo +dDqs5xoic4DSMPclQsciOzsSrZYuxsN2B6ogtzVJV+mSSeh2FnIxZyuWfoqjx5RW +Ir9qS34BIbIjMt/kmkRtWVtd9QCgHJvGeJeNkP+byKq0rxFROV7Z+2et1VsRnTKa +G73VululycslaVNVJ1zgyjbLiGH7HrfQy+4W+9OmTN6SpdTi3/UGVN4unUu0kzCq +gc7dGtxRcw1PcOnlthYhGXmy5okLdWTK1au8CcEYof/UVKGFPP0UJAOyh9OktwID +AQABo0IwQDAOBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4E +FgQUu//KjiOfT5nK2+JopqUVJxce2Q4wDQYJKoZIhvcNAQEMBQADggIBAB/Kzt3H +vqGf2SdMC9wXmBFqiN495nFWcrKeGk6c1SuYJF2ba3uwM4IJvd8lRuqYnrYb/oM8 +0mJhwQTtzuDFycgTE1XnqGOtjHsB/ncw4c5omwX4Eu55MaBBRTUoCnGkJE+M3DyC +B19m3H0Q/gxhswWV7uGugQ+o+MePTagjAiZrHYNSVc61LwDKgEDg4XSsYPWHgJ2u +NmSRXbBoGOqKYcl3qJfEycel/FVL8/B/uWU9J2jQzGv6U53hkRrJXRqWbTKH7QMg +yALOWr7Z6v2yTcQvG99fevX4i8buMTolUVVnjWQye+mew4K6Ki3pHrTgSAai/Gev +HyICc/sgCq+dVEuhzf9gR7A/Xe8bVr2XIZYtCtFenTgCR2y59PYjJbigapordwj6 +xLEokCZYCDzifqrXPW+6MYgKBesntaFJ7qBFVHvmJ2WZICGoo7z7GJa7Um8M7YNR +TOlZ4iBgxcJlkoKM8xAfDoqXvneCbT+PHV28SSe9zE8P4c52hgQjxcCMElv924Sg +JPFI/2R80L5cFtHvma3AH/vLrrw4IgYmZNralw4/KBVEqE8AyvCazM90arQ+POuV +7LXTWtiBmelDGDfrs7vRWGJB82bSj6p4lVQgw1oudCvV0b4YacCs1aTPObpRhANl +6WLAYv7YTVWW4tAR+kg0Eeye7QUd5MjWHYbL +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R3 O=Google Trust Services LLC +# Subject: CN=GTS Root R3 O=Google Trust Services LLC +# Label: "GTS Root R3" +# Serial: 159662495401136852707857743206 +# MD5 Fingerprint: 3e:e7:9d:58:02:94:46:51:94:e5:e0:22:4a:8b:e7:73 +# SHA1 Fingerprint: ed:e5:71:80:2b:c8:92:b9:5b:83:3c:d2:32:68:3f:09:cd:a0:1e:46 +# SHA256 Fingerprint: 34:d8:a7:3e:e2:08:d9:bc:db:0d:95:65:20:93:4b:4e:40:e6:94:82:59:6e:8b:6f:73:c8:42:6b:01:0a:6f:48 +-----BEGIN CERTIFICATE----- +MIICCTCCAY6gAwIBAgINAgPluILrIPglJ209ZjAKBggqhkjOPQQDAzBHMQswCQYD +VQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIG +A1UEAxMLR1RTIFJvb3QgUjMwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAwMDAw +WjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2Vz +IExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AAQfTzOHMymKoYTey8chWEGJ6ladK0uFxh1MJ7x/JlFyb+Kf1qPKzEUURout736G +jOyxfi//qXGdGIRFBEFVbivqJn+7kAHjSxm65FSWRQmx1WyRRK2EE46ajA2ADDL2 +4CejQjBAMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW +BBTB8Sa6oC2uhYHP0/EqEr24Cmf9vDAKBggqhkjOPQQDAwNpADBmAjEA9uEglRR7 +VKOQFhG/hMjqb2sXnh5GmCCbn9MN2azTL818+FsuVbu/3ZL3pAzcMeGiAjEA/Jdm +ZuVDFhOD3cffL74UOO0BzrEXGhF16b0DjyZ+hOXJYKaV11RZt+cRLInUue4X +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R4 O=Google Trust Services LLC +# Subject: CN=GTS Root R4 O=Google Trust Services LLC +# Label: "GTS Root R4" +# Serial: 159662532700760215368942768210 +# MD5 Fingerprint: 43:96:83:77:19:4d:76:b3:9d:65:52:e4:1d:22:a5:e8 +# SHA1 Fingerprint: 77:d3:03:67:b5:e0:0c:15:f6:0c:38:61:df:7c:e1:3b:92:46:4d:47 +# SHA256 Fingerprint: 34:9d:fa:40:58:c5:e2:63:12:3b:39:8a:e7:95:57:3c:4e:13:13:c8:3f:e6:8f:93:55:6c:d5:e8:03:1b:3c:7d +-----BEGIN CERTIFICATE----- +MIICCTCCAY6gAwIBAgINAgPlwGjvYxqccpBQUjAKBggqhkjOPQQDAzBHMQswCQYD +VQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIG +A1UEAxMLR1RTIFJvb3QgUjQwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAwMDAw +WjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2Vz +IExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjQwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AATzdHOnaItgrkO4NcWBMHtLSZ37wWHO5t5GvWvVYRg1rkDdc/eJkTBa6zzuhXyi +QHY7qca4R9gq55KRanPpsXI5nymfopjTX15YhmUPoYRlBtHci8nHc8iMai/lxKvR +HYqjQjBAMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW +BBSATNbrdP9JNqPV2Py1PsVq8JQdjDAKBggqhkjOPQQDAwNpADBmAjEA6ED/g94D +9J+uHXqnLrmvT/aDHQ4thQEd0dlq7A/Cr8deVl5c1RxYIigL9zC2L7F8AjEA8GE8 +p/SgguMh1YQdc4acLa/KNJvxn7kjNuK8YAOdgLOaVsjh4rsUecrNIdSUtUlD +-----END CERTIFICATE----- + +# Issuer: CN=Telia Root CA v2 O=Telia Finland Oyj +# Subject: CN=Telia Root CA v2 O=Telia Finland Oyj +# Label: "Telia Root CA v2" +# Serial: 7288924052977061235122729490515358 +# MD5 Fingerprint: 0e:8f:ac:aa:82:df:85:b1:f4:dc:10:1c:fc:99:d9:48 +# SHA1 Fingerprint: b9:99:cd:d1:73:50:8a:c4:47:05:08:9c:8c:88:fb:be:a0:2b:40:cd +# SHA256 Fingerprint: 24:2b:69:74:2f:cb:1e:5b:2a:bf:98:89:8b:94:57:21:87:54:4e:5b:4d:99:11:78:65:73:62:1f:6a:74:b8:2c +-----BEGIN CERTIFICATE----- +MIIFdDCCA1ygAwIBAgIPAWdfJ9b+euPkrL4JWwWeMA0GCSqGSIb3DQEBCwUAMEQx +CzAJBgNVBAYTAkZJMRowGAYDVQQKDBFUZWxpYSBGaW5sYW5kIE95ajEZMBcGA1UE +AwwQVGVsaWEgUm9vdCBDQSB2MjAeFw0xODExMjkxMTU1NTRaFw00MzExMjkxMTU1 +NTRaMEQxCzAJBgNVBAYTAkZJMRowGAYDVQQKDBFUZWxpYSBGaW5sYW5kIE95ajEZ +MBcGA1UEAwwQVGVsaWEgUm9vdCBDQSB2MjCCAiIwDQYJKoZIhvcNAQEBBQADggIP +ADCCAgoCggIBALLQPwe84nvQa5n44ndp586dpAO8gm2h/oFlH0wnrI4AuhZ76zBq +AMCzdGh+sq/H1WKzej9Qyow2RCRj0jbpDIX2Q3bVTKFgcmfiKDOlyzG4OiIjNLh9 +vVYiQJ3q9HsDrWj8soFPmNB06o3lfc1jw6P23pLCWBnglrvFxKk9pXSW/q/5iaq9 +lRdU2HhE8Qx3FZLgmEKnpNaqIJLNwaCzlrI6hEKNfdWV5Nbb6WLEWLN5xYzTNTOD +n3WhUidhOPFZPY5Q4L15POdslv5e2QJltI5c0BE0312/UqeBAMN/mUWZFdUXyApT +7GPzmX3MaRKGwhfwAZ6/hLzRUssbkmbOpFPlob/E2wnW5olWK8jjfN7j/4nlNW4o +6GwLI1GpJQXrSPjdscr6bAhR77cYbETKJuFzxokGgeWKrLDiKca5JLNrRBH0pUPC +TEPlcDaMtjNXepUugqD0XBCzYYP2AgWGLnwtbNwDRm41k9V6lS/eINhbfpSQBGq6 +WT0EBXWdN6IOLj3rwaRSg/7Qa9RmjtzG6RJOHSpXqhC8fF6CfaamyfItufUXJ63R +DolUK5X6wK0dmBR4M0KGCqlztft0DbcbMBnEWg4cJ7faGND/isgFuvGqHKI3t+ZI +pEYslOqodmJHixBTB0hXbOKSTbauBcvcwUpej6w9GU7C7WB1K9vBykLVAgMBAAGj +YzBhMB8GA1UdIwQYMBaAFHKs5DN5qkWH9v2sHZ7Wxy+G2CQ5MB0GA1UdDgQWBBRy +rOQzeapFh/b9rB2e1scvhtgkOTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUw +AwEB/zANBgkqhkiG9w0BAQsFAAOCAgEAoDtZpwmUPjaE0n4vOaWWl/oRrfxn83EJ +8rKJhGdEr7nv7ZbsnGTbMjBvZ5qsfl+yqwE2foH65IRe0qw24GtixX1LDoJt0nZi +0f6X+J8wfBj5tFJ3gh1229MdqfDBmgC9bXXYfef6xzijnHDoRnkDry5023X4blMM +A8iZGok1GTzTyVR8qPAs5m4HeW9q4ebqkYJpCh3DflminmtGFZhb069GHWLIzoBS +SRE/yQQSwxN8PzuKlts8oB4KtItUsiRnDe+Cy748fdHif64W1lZYudogsYMVoe+K +TTJvQS8TUoKU1xrBeKJR3Stwbbca+few4GeXVtt8YVMJAygCQMez2P2ccGrGKMOF +6eLtGpOg3kuYooQ+BXcBlj37tCAPnHICehIv1aO6UXivKitEZU61/Qrowc15h2Er +3oBXRb9n8ZuRXqWk7FlIEA04x7D6w0RtBPV4UBySllva9bguulvP5fBqnUsvWHMt +Ty3EHD70sz+rFQ47GUGKpMFXEmZxTPpT41frYpUJnlTd0cI8Vzy9OK2YZLe4A5pT +VmBds9hCG1xLEooc6+t9xnppxyd/pPiL8uSUZodL6ZQHCRJ5irLrdATczvREWeAW +ysUsWNc8e89ihmpQfTU2Zqf7N+cox9jQraVplI/owd8k+BsHMYeB2F326CjYSlKA +rBPuUBQemMc= +-----END CERTIFICATE----- + +# Issuer: CN=D-TRUST BR Root CA 1 2020 O=D-Trust GmbH +# Subject: CN=D-TRUST BR Root CA 1 2020 O=D-Trust GmbH +# Label: "D-TRUST BR Root CA 1 2020" +# Serial: 165870826978392376648679885835942448534 +# MD5 Fingerprint: b5:aa:4b:d5:ed:f7:e3:55:2e:8f:72:0a:f3:75:b8:ed +# SHA1 Fingerprint: 1f:5b:98:f0:e3:b5:f7:74:3c:ed:e6:b0:36:7d:32:cd:f4:09:41:67 +# SHA256 Fingerprint: e5:9a:aa:81:60:09:c2:2b:ff:5b:25:ba:d3:7d:f3:06:f0:49:79:7c:1f:81:d8:5a:b0:89:e6:57:bd:8f:00:44 +-----BEGIN CERTIFICATE----- +MIIC2zCCAmCgAwIBAgIQfMmPK4TX3+oPyWWa00tNljAKBggqhkjOPQQDAzBIMQsw +CQYDVQQGEwJERTEVMBMGA1UEChMMRC1UcnVzdCBHbWJIMSIwIAYDVQQDExlELVRS +VVNUIEJSIFJvb3QgQ0EgMSAyMDIwMB4XDTIwMDIxMTA5NDUwMFoXDTM1MDIxMTA5 +NDQ1OVowSDELMAkGA1UEBhMCREUxFTATBgNVBAoTDEQtVHJ1c3QgR21iSDEiMCAG +A1UEAxMZRC1UUlVTVCBCUiBSb290IENBIDEgMjAyMDB2MBAGByqGSM49AgEGBSuB +BAAiA2IABMbLxyjR+4T1mu9CFCDhQ2tuda38KwOE1HaTJddZO0Flax7mNCq7dPYS +zuht56vkPE4/RAiLzRZxy7+SmfSk1zxQVFKQhYN4lGdnoxwJGT11NIXe7WB9xwy0 +QVK5buXuQqOCAQ0wggEJMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFHOREKv/ +VbNafAkl1bK6CKBrqx9tMA4GA1UdDwEB/wQEAwIBBjCBxgYDVR0fBIG+MIG7MD6g +PKA6hjhodHRwOi8vY3JsLmQtdHJ1c3QubmV0L2NybC9kLXRydXN0X2JyX3Jvb3Rf +Y2FfMV8yMDIwLmNybDB5oHegdYZzbGRhcDovL2RpcmVjdG9yeS5kLXRydXN0Lm5l +dC9DTj1ELVRSVVNUJTIwQlIlMjBSb290JTIwQ0ElMjAxJTIwMjAyMCxPPUQtVHJ1 +c3QlMjBHbWJILEM9REU/Y2VydGlmaWNhdGVyZXZvY2F0aW9ubGlzdDAKBggqhkjO +PQQDAwNpADBmAjEAlJAtE/rhY/hhY+ithXhUkZy4kzg+GkHaQBZTQgjKL47xPoFW +wKrY7RjEsK70PvomAjEA8yjixtsrmfu3Ubgko6SUeho/5jbiA1czijDLgsfWFBHV +dWNbFJWcHwHP2NVypw87 +-----END CERTIFICATE----- + +# Issuer: CN=D-TRUST EV Root CA 1 2020 O=D-Trust GmbH +# Subject: CN=D-TRUST EV Root CA 1 2020 O=D-Trust GmbH +# Label: "D-TRUST EV Root CA 1 2020" +# Serial: 126288379621884218666039612629459926992 +# MD5 Fingerprint: 8c:2d:9d:70:9f:48:99:11:06:11:fb:e9:cb:30:c0:6e +# SHA1 Fingerprint: 61:db:8c:21:59:69:03:90:d8:7c:9c:12:86:54:cf:9d:3d:f4:dd:07 +# SHA256 Fingerprint: 08:17:0d:1a:a3:64:53:90:1a:2f:95:92:45:e3:47:db:0c:8d:37:ab:aa:bc:56:b8:1a:a1:00:dc:95:89:70:db +-----BEGIN CERTIFICATE----- +MIIC2zCCAmCgAwIBAgIQXwJB13qHfEwDo6yWjfv/0DAKBggqhkjOPQQDAzBIMQsw +CQYDVQQGEwJERTEVMBMGA1UEChMMRC1UcnVzdCBHbWJIMSIwIAYDVQQDExlELVRS +VVNUIEVWIFJvb3QgQ0EgMSAyMDIwMB4XDTIwMDIxMTEwMDAwMFoXDTM1MDIxMTA5 +NTk1OVowSDELMAkGA1UEBhMCREUxFTATBgNVBAoTDEQtVHJ1c3QgR21iSDEiMCAG +A1UEAxMZRC1UUlVTVCBFViBSb290IENBIDEgMjAyMDB2MBAGByqGSM49AgEGBSuB +BAAiA2IABPEL3YZDIBnfl4XoIkqbz52Yv7QFJsnL46bSj8WeeHsxiamJrSc8ZRCC +/N/DnU7wMyPE0jL1HLDfMxddxfCxivnvubcUyilKwg+pf3VlSSowZ/Rk99Yad9rD +wpdhQntJraOCAQ0wggEJMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFH8QARY3 +OqQo5FD4pPfsazK2/umLMA4GA1UdDwEB/wQEAwIBBjCBxgYDVR0fBIG+MIG7MD6g +PKA6hjhodHRwOi8vY3JsLmQtdHJ1c3QubmV0L2NybC9kLXRydXN0X2V2X3Jvb3Rf +Y2FfMV8yMDIwLmNybDB5oHegdYZzbGRhcDovL2RpcmVjdG9yeS5kLXRydXN0Lm5l +dC9DTj1ELVRSVVNUJTIwRVYlMjBSb290JTIwQ0ElMjAxJTIwMjAyMCxPPUQtVHJ1 +c3QlMjBHbWJILEM9REU/Y2VydGlmaWNhdGVyZXZvY2F0aW9ubGlzdDAKBggqhkjO +PQQDAwNpADBmAjEAyjzGKnXCXnViOTYAYFqLwZOZzNnbQTs7h5kXO9XMT8oi96CA +y/m0sRtW9XLS/BnRAjEAkfcwkz8QRitxpNA7RJvAKQIFskF3UfN5Wp6OFKBOQtJb +gfM0agPnIjhQW+0ZT0MW +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert TLS ECC P384 Root G5 O=DigiCert, Inc. +# Subject: CN=DigiCert TLS ECC P384 Root G5 O=DigiCert, Inc. +# Label: "DigiCert TLS ECC P384 Root G5" +# Serial: 13129116028163249804115411775095713523 +# MD5 Fingerprint: d3:71:04:6a:43:1c:db:a6:59:e1:a8:a3:aa:c5:71:ed +# SHA1 Fingerprint: 17:f3:de:5e:9f:0f:19:e9:8e:f6:1f:32:26:6e:20:c4:07:ae:30:ee +# SHA256 Fingerprint: 01:8e:13:f0:77:25:32:cf:80:9b:d1:b1:72:81:86:72:83:fc:48:c6:e1:3b:e9:c6:98:12:85:4a:49:0c:1b:05 +-----BEGIN CERTIFICATE----- +MIICGTCCAZ+gAwIBAgIQCeCTZaz32ci5PhwLBCou8zAKBggqhkjOPQQDAzBOMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xJjAkBgNVBAMTHURp +Z2lDZXJ0IFRMUyBFQ0MgUDM4NCBSb290IEc1MB4XDTIxMDExNTAwMDAwMFoXDTQ2 +MDExNDIzNTk1OVowTjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDkRpZ2lDZXJ0LCBJ +bmMuMSYwJAYDVQQDEx1EaWdpQ2VydCBUTFMgRUNDIFAzODQgUm9vdCBHNTB2MBAG +ByqGSM49AgEGBSuBBAAiA2IABMFEoc8Rl1Ca3iOCNQfN0MsYndLxf3c1TzvdlHJS +7cI7+Oz6e2tYIOyZrsn8aLN1udsJ7MgT9U7GCh1mMEy7H0cKPGEQQil8pQgO4CLp +0zVozptjn4S1mU1YoI71VOeVyaNCMEAwHQYDVR0OBBYEFMFRRVBZqz7nLFr6ICIS +B4CIfBFqMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MAoGCCqGSM49 +BAMDA2gAMGUCMQCJao1H5+z8blUD2WdsJk6Dxv3J+ysTvLd6jLRl0mlpYxNjOyZQ +LgGheQaRnUi/wr4CMEfDFXuxoJGZSZOoPHzoRgaLLPIxAJSdYsiJvRmEFOml+wG4 +DXZDjC5Ty3zfDBeWUA== +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert TLS RSA4096 Root G5 O=DigiCert, Inc. +# Subject: CN=DigiCert TLS RSA4096 Root G5 O=DigiCert, Inc. +# Label: "DigiCert TLS RSA4096 Root G5" +# Serial: 11930366277458970227240571539258396554 +# MD5 Fingerprint: ac:fe:f7:34:96:a9:f2:b3:b4:12:4b:e4:27:41:6f:e1 +# SHA1 Fingerprint: a7:88:49:dc:5d:7c:75:8c:8c:de:39:98:56:b3:aa:d0:b2:a5:71:35 +# SHA256 Fingerprint: 37:1a:00:dc:05:33:b3:72:1a:7e:eb:40:e8:41:9e:70:79:9d:2b:0a:0f:2c:1d:80:69:31:65:f7:ce:c4:ad:75 +-----BEGIN CERTIFICATE----- +MIIFZjCCA06gAwIBAgIQCPm0eKj6ftpqMzeJ3nzPijANBgkqhkiG9w0BAQwFADBN +MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xJTAjBgNVBAMT +HERpZ2lDZXJ0IFRMUyBSU0E0MDk2IFJvb3QgRzUwHhcNMjEwMTE1MDAwMDAwWhcN +NDYwMTE0MjM1OTU5WjBNMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQs +IEluYy4xJTAjBgNVBAMTHERpZ2lDZXJ0IFRMUyBSU0E0MDk2IFJvb3QgRzUwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCz0PTJeRGd/fxmgefM1eS87IE+ +ajWOLrfn3q/5B03PMJ3qCQuZvWxX2hhKuHisOjmopkisLnLlvevxGs3npAOpPxG0 +2C+JFvuUAT27L/gTBaF4HI4o4EXgg/RZG5Wzrn4DReW+wkL+7vI8toUTmDKdFqgp +wgscONyfMXdcvyej/Cestyu9dJsXLfKB2l2w4SMXPohKEiPQ6s+d3gMXsUJKoBZM +pG2T6T867jp8nVid9E6P/DsjyG244gXazOvswzH016cpVIDPRFtMbzCe88zdH5RD +nU1/cHAN1DrRN/BsnZvAFJNY781BOHW8EwOVfH/jXOnVDdXifBBiqmvwPXbzP6Po +sMH976pXTayGpxi0KcEsDr9kvimM2AItzVwv8n/vFfQMFawKsPHTDU9qTXeXAaDx +Zre3zu/O7Oyldcqs4+Fj97ihBMi8ez9dLRYiVu1ISf6nL3kwJZu6ay0/nTvEF+cd +Lvvyz6b84xQslpghjLSR6Rlgg/IwKwZzUNWYOwbpx4oMYIwo+FKbbuH2TbsGJJvX +KyY//SovcfXWJL5/MZ4PbeiPT02jP/816t9JXkGPhvnxd3lLG7SjXi/7RgLQZhNe +XoVPzthwiHvOAbWWl9fNff2C+MIkwcoBOU+NosEUQB+cZtUMCUbW8tDRSHZWOkPL +tgoRObqME2wGtZ7P6wIDAQABo0IwQDAdBgNVHQ4EFgQUUTMc7TZArxfTJc1paPKv +TiM+s0EwDgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcN +AQEMBQADggIBAGCmr1tfV9qJ20tQqcQjNSH/0GEwhJG3PxDPJY7Jv0Y02cEhJhxw +GXIeo8mH/qlDZJY6yFMECrZBu8RHANmfGBg7sg7zNOok992vIGCukihfNudd5N7H +PNtQOa27PShNlnx2xlv0wdsUpasZYgcYQF+Xkdycx6u1UQ3maVNVzDl92sURVXLF +O4uJ+DQtpBflF+aZfTCIITfNMBc9uPK8qHWgQ9w+iUuQrm0D4ByjoJYJu32jtyoQ +REtGBzRj7TG5BO6jm5qu5jF49OokYTurWGT/u4cnYiWB39yhL/btp/96j1EuMPik +AdKFOV8BmZZvWltwGUb+hmA+rYAQCd05JS9Yf7vSdPD3Rh9GOUrYU9DzLjtxpdRv +/PNn5AeP3SYZ4Y1b+qOTEZvpyDrDVWiakuFSdjjo4bq9+0/V77PnSIMx8IIh47a+ +p6tv75/fTM8BuGJqIz3nCU2AG3swpMPdB380vqQmsvZB6Akd4yCYqjdP//fx4ilw +MUc/dNAUFvohigLVigmUdy7yWSiLfFCSCmZ4OIN1xLVaqBHG5cGdZlXPU8Sv13WF +qUITVuwhd4GTWgzqltlJyqEI8pc7bZsEGCREjnwB8twl2F6GmrE52/WRMmrRpnCK +ovfepEWFJqgejF0pW8hL2JpqA15w8oVPbEtoL8pU9ozaMv7Da4M/OMZ+ +-----END CERTIFICATE----- + +# Issuer: CN=Certainly Root R1 O=Certainly +# Subject: CN=Certainly Root R1 O=Certainly +# Label: "Certainly Root R1" +# Serial: 188833316161142517227353805653483829216 +# MD5 Fingerprint: 07:70:d4:3e:82:87:a0:fa:33:36:13:f4:fa:33:e7:12 +# SHA1 Fingerprint: a0:50:ee:0f:28:71:f4:27:b2:12:6d:6f:50:96:25:ba:cc:86:42:af +# SHA256 Fingerprint: 77:b8:2c:d8:64:4c:43:05:f7:ac:c5:cb:15:6b:45:67:50:04:03:3d:51:c6:0c:62:02:a8:e0:c3:34:67:d3:a0 +-----BEGIN CERTIFICATE----- +MIIFRzCCAy+gAwIBAgIRAI4P+UuQcWhlM1T01EQ5t+AwDQYJKoZIhvcNAQELBQAw +PTELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCUNlcnRhaW5seTEaMBgGA1UEAxMRQ2Vy +dGFpbmx5IFJvb3QgUjEwHhcNMjEwNDAxMDAwMDAwWhcNNDYwNDAxMDAwMDAwWjA9 +MQswCQYDVQQGEwJVUzESMBAGA1UEChMJQ2VydGFpbmx5MRowGAYDVQQDExFDZXJ0 +YWlubHkgUm9vdCBSMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANA2 +1B/q3avk0bbm+yLA3RMNansiExyXPGhjZjKcA7WNpIGD2ngwEc/csiu+kr+O5MQT +vqRoTNoCaBZ0vrLdBORrKt03H2As2/X3oXyVtwxwhi7xOu9S98zTm/mLvg7fMbed +aFySpvXl8wo0tf97ouSHocavFwDvA5HtqRxOcT3Si2yJ9HiG5mpJoM610rCrm/b0 +1C7jcvk2xusVtyWMOvwlDbMicyF0yEqWYZL1LwsYpfSt4u5BvQF5+paMjRcCMLT5 +r3gajLQ2EBAHBXDQ9DGQilHFhiZ5shGIXsXwClTNSaa/ApzSRKft43jvRl5tcdF5 +cBxGX1HpyTfcX35pe0HfNEXgO4T0oYoKNp43zGJS4YkNKPl6I7ENPT2a/Z2B7yyQ +wHtETrtJ4A5KVpK8y7XdeReJkd5hiXSSqOMyhb5OhaRLWcsrxXiOcVTQAjeZjOVJ +6uBUcqQRBi8LjMFbvrWhsFNunLhgkR9Za/kt9JQKl7XsxXYDVBtlUrpMklZRNaBA +2CnbrlJ2Oy0wQJuK0EJWtLeIAaSHO1OWzaMWj/Nmqhexx2DgwUMFDO6bW2BvBlyH +Wyf5QBGenDPBt+U1VwV/J84XIIwc/PH72jEpSe31C4SnT8H2TsIonPru4K8H+zMR +eiFPCyEQtkA6qyI6BJyLm4SGcprSp6XEtHWRqSsjAgMBAAGjQjBAMA4GA1UdDwEB +/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTgqj8ljZ9EXME66C6u +d0yEPmcM9DANBgkqhkiG9w0BAQsFAAOCAgEAuVevuBLaV4OPaAszHQNTVfSVcOQr +PbA56/qJYv331hgELyE03fFo8NWWWt7CgKPBjcZq91l3rhVkz1t5BXdm6ozTaw3d +8VkswTOlMIAVRQdFGjEitpIAq5lNOo93r6kiyi9jyhXWx8bwPWz8HA2YEGGeEaIi +1wrykXprOQ4vMMM2SZ/g6Q8CRFA3lFV96p/2O7qUpUzpvD5RtOjKkjZUbVwlKNrd +rRT90+7iIgXr0PK3aBLXWopBGsaSpVo7Y0VPv+E6dyIvXL9G+VoDhRNCX8reU9di +taY1BMJH/5n9hN9czulegChB8n3nHpDYT3Y+gjwN/KUD+nsa2UUeYNrEjvn8K8l7 +lcUq/6qJ34IxD3L/DCfXCh5WAFAeDJDBlrXYFIW7pw0WwfgHJBu6haEaBQmAupVj +yTrsJZ9/nbqkRxWbRHDxakvWOF5D8xh+UG7pWijmZeZ3Gzr9Hb4DJqPb1OG7fpYn +Kx3upPvaJVQTA945xsMfTZDsjxtK0hzthZU4UHlG1sGQUDGpXJpuHfUzVounmdLy +yCwzk5Iwx06MZTMQZBf9JBeW0Y3COmor6xOLRPIh80oat3df1+2IpHLlOR+Vnb5n +wXARPbv0+Em34yaXOp/SX3z7wJl8OSngex2/DaeP0ik0biQVy96QXr8axGbqwua6 +OV+KmalBWQewLK8= +-----END CERTIFICATE----- + +# Issuer: CN=Certainly Root E1 O=Certainly +# Subject: CN=Certainly Root E1 O=Certainly +# Label: "Certainly Root E1" +# Serial: 8168531406727139161245376702891150584 +# MD5 Fingerprint: 0a:9e:ca:cd:3e:52:50:c6:36:f3:4b:a3:ed:a7:53:e9 +# SHA1 Fingerprint: f9:e1:6d:dc:01:89:cf:d5:82:45:63:3e:c5:37:7d:c2:eb:93:6f:2b +# SHA256 Fingerprint: b4:58:5f:22:e4:ac:75:6a:4e:86:12:a1:36:1c:5d:9d:03:1a:93:fd:84:fe:bb:77:8f:a3:06:8b:0f:c4:2d:c2 +-----BEGIN CERTIFICATE----- +MIIB9zCCAX2gAwIBAgIQBiUzsUcDMydc+Y2aub/M+DAKBggqhkjOPQQDAzA9MQsw +CQYDVQQGEwJVUzESMBAGA1UEChMJQ2VydGFpbmx5MRowGAYDVQQDExFDZXJ0YWlu +bHkgUm9vdCBFMTAeFw0yMTA0MDEwMDAwMDBaFw00NjA0MDEwMDAwMDBaMD0xCzAJ +BgNVBAYTAlVTMRIwEAYDVQQKEwlDZXJ0YWlubHkxGjAYBgNVBAMTEUNlcnRhaW5s +eSBSb290IEUxMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAE3m/4fxzf7flHh4axpMCK ++IKXgOqPyEpeKn2IaKcBYhSRJHpcnqMXfYqGITQYUBsQ3tA3SybHGWCA6TS9YBk2 +QNYphwk8kXr2vBMj3VlOBF7PyAIcGFPBMdjaIOlEjeR2o0IwQDAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU8ygYy2R17ikq6+2uI1g4 +hevIIgcwCgYIKoZIzj0EAwMDaAAwZQIxALGOWiDDshliTd6wT99u0nCK8Z9+aozm +ut6Dacpps6kFtZaSF4fC0urQe87YQVt8rgIwRt7qy12a7DLCZRawTDBcMPPaTnOG +BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Subject: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Label: "E-Tugra Global Root CA RSA v3" +# Serial: 75951268308633135324246244059508261641472512052 +# MD5 Fingerprint: 22:be:10:f6:c2:f8:03:88:73:5f:33:29:47:28:47:a4 +# SHA1 Fingerprint: e9:a8:5d:22:14:52:1c:5b:aa:0a:b4:be:24:6a:23:8a:c9:ba:e2:a9 +# SHA256 Fingerprint: ef:66:b0:b1:0a:3c:db:9f:2e:36:48:c7:6b:d2:af:18:ea:d2:bf:e6:f1:17:65:5e:28:c4:06:0d:a1:a3:f4:c2 +-----BEGIN CERTIFICATE----- +MIIF8zCCA9ugAwIBAgIUDU3FzRYilZYIfrgLfxUGNPt5EDQwDQYJKoZIhvcNAQEL +BQAwgYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUt +VHVncmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYw +JAYDVQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIFJTQSB2MzAeFw0yMDAzMTgw +OTA3MTdaFw00NTAzMTIwOTA3MTdaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMG +QW5rYXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1 +Z3JhIFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBD +QSBSU0EgdjMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCiZvCJt3J7 +7gnJY9LTQ91ew6aEOErxjYG7FL1H6EAX8z3DeEVypi6Q3po61CBxyryfHUuXCscx +uj7X/iWpKo429NEvx7epXTPcMHD4QGxLsqYxYdE0PD0xesevxKenhOGXpOhL9hd8 +7jwH7eKKV9y2+/hDJVDqJ4GohryPUkqWOmAalrv9c/SF/YP9f4RtNGx/ardLAQO/ +rWm31zLZ9Vdq6YaCPqVmMbMWPcLzJmAy01IesGykNz709a/r4d+ABs8qQedmCeFL +l+d3vSFtKbZnwy1+7dZ5ZdHPOrbRsV5WYVB6Ws5OUDGAA5hH5+QYfERaxqSzO8bG +wzrwbMOLyKSRBfP12baqBqG3q+Sx6iEUXIOk/P+2UNOMEiaZdnDpwA+mdPy70Bt4 +znKS4iicvObpCdg604nmvi533wEKb5b25Y08TVJ2Glbhc34XrD2tbKNSEhhw5oBO +M/J+JjKsBY04pOZ2PJ8QaQ5tndLBeSBrW88zjdGUdjXnXVXHt6woq0bM5zshtQoK +5EpZ3IE1S0SVEgpnpaH/WwAH0sDM+T/8nzPyAPiMbIedBi3x7+PmBvrFZhNb/FAH +nnGGstpvdDDPk1Po3CLW3iAfYY2jLqN4MpBs3KwytQXk9TwzDdbgh3cXTJ2w2Amo +DVf3RIXwyAS+XF1a4xeOVGNpf0l0ZAWMowIDAQABo2MwYTAPBgNVHRMBAf8EBTAD +AQH/MB8GA1UdIwQYMBaAFLK0ruYt9ybVqnUtdkvAG1Mh0EjvMB0GA1UdDgQWBBSy +tK7mLfcm1ap1LXZLwBtTIdBI7zAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEL +BQADggIBAImocn+M684uGMQQgC0QDP/7FM0E4BQ8Tpr7nym/Ip5XuYJzEmMmtcyQ +6dIqKe6cLcwsmb5FJ+Sxce3kOJUxQfJ9emN438o2Fi+CiJ+8EUdPdk3ILY7r3y18 +Tjvarvbj2l0Upq7ohUSdBm6O++96SmotKygY/r+QLHUWnw/qln0F7psTpURs+APQ +3SPh/QMSEgj0GDSz4DcLdxEBSL9htLX4GdnLTeqjjO/98Aa1bZL0SmFQhO3sSdPk +vmjmLuMxC1QLGpLWgti2omU8ZgT5Vdps+9u1FGZNlIM7zR6mK7L+d0CGq+ffCsn9 +9t2HVhjYsCxVYJb6CH5SkPVLpi6HfMsg2wY+oF0Dd32iPBMbKaITVaA9FCKvb7jQ +mhty3QUBjYZgv6Rn7rWlDdF/5horYmbDB7rnoEgcOMPpRfunf/ztAmgayncSd6YA +VSgU7NbHEqIbZULpkejLPoeJVF3Zr52XnGnnCv8PWniLYypMfUeUP95L6VPQMPHF +9p5J3zugkaOj/s1YzOrfr28oO6Bpm4/srK4rVJ2bBLFHIK+WEj5jlB0E5y67hscM +moi/dkfv97ALl2bSRM9gUgfh1SxKOidhd8rXj+eHDjD/DLsE4mHDosiXYY60MGo8 +bcIHX0pzLz/5FooBZu+6kcpSV3uu1OYP3Qt6f4ueJiDPO++BcYNZ +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Subject: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Label: "E-Tugra Global Root CA ECC v3" +# Serial: 218504919822255052842371958738296604628416471745 +# MD5 Fingerprint: 46:bc:81:bb:f1:b5:1e:f7:4b:96:bc:14:e2:e7:27:64 +# SHA1 Fingerprint: 8a:2f:af:57:53:b1:b0:e6:a1:04:ec:5b:6a:69:71:6d:f6:1c:e2:84 +# SHA256 Fingerprint: 87:3f:46:85:fa:7f:56:36:25:25:2e:6d:36:bc:d7:f1:6f:c2:49:51:f2:64:e4:7e:1b:95:4f:49:08:cd:ca:13 +-----BEGIN CERTIFICATE----- +MIICpTCCAiqgAwIBAgIUJkYZdzHhT28oNt45UYbm1JeIIsEwCgYIKoZIzj0EAwMw +gYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUtVHVn +cmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYwJAYD +VQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIEVDQyB2MzAeFw0yMDAzMTgwOTQ2 +NThaFw00NTAzMTIwOTQ2NThaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMGQW5r +YXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1Z3Jh +IFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBDQSBF +Q0MgdjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASOmCm/xxAeJ9urA8woLNheSBkQ +KczLWYHMjLiSF4mDKpL2w6QdTGLVn9agRtwcvHbB40fQWxPa56WzZkjnIZpKT4YK +fWzqTTKACrJ6CZtpS5iB4i7sAnCWH/31Rs7K3IKjYzBhMA8GA1UdEwEB/wQFMAMB +Af8wHwYDVR0jBBgwFoAU/4Ixcj75xGZsrTie0bBRiKWQzPUwHQYDVR0OBBYEFP+C +MXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNp +ADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/6 +7W4WAie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFx +vmjkI6TZraE3 +-----END CERTIFICATE----- + +# Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. +# Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. +# Label: "Security Communication RootCA3" +# Serial: 16247922307909811815 +# MD5 Fingerprint: 1c:9a:16:ff:9e:5c:e0:4d:8a:14:01:f4:35:5d:29:26 +# SHA1 Fingerprint: c3:03:c8:22:74:92:e5:61:a2:9c:5f:79:91:2b:1e:44:13:91:30:3a +# SHA256 Fingerprint: 24:a5:5c:2a:b0:51:44:2d:06:17:76:65:41:23:9a:4a:d0:32:d7:c5:51:75:aa:34:ff:de:2f:bc:4f:5c:52:94 +-----BEGIN CERTIFICATE----- +MIIFfzCCA2egAwIBAgIJAOF8N0D9G/5nMA0GCSqGSIb3DQEBDAUAMF0xCzAJBgNV +BAYTAkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMScw +JQYDVQQDEx5TZWN1cml0eSBDb21tdW5pY2F0aW9uIFJvb3RDQTMwHhcNMTYwNjE2 +MDYxNzE2WhcNMzgwMTE4MDYxNzE2WjBdMQswCQYDVQQGEwJKUDElMCMGA1UEChMc +U0VDT00gVHJ1c3QgU3lzdGVtcyBDTy4sTFRELjEnMCUGA1UEAxMeU2VjdXJpdHkg +Q29tbXVuaWNhdGlvbiBSb290Q0EzMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEA48lySfcw3gl8qUCBWNO0Ot26YQ+TUG5pPDXC7ltzkBtnTCHsXzW7OT4r +CmDvu20rhvtxosis5FaU+cmvsXLUIKx00rgVrVH+hXShuRD+BYD5UpOzQD11EKzA +lrenfna84xtSGc4RHwsENPXY9Wk8d/Nk9A2qhd7gCVAEF5aEt8iKvE1y/By7z/MG +TfmfZPd+pmaGNXHIEYBMwXFAWB6+oHP2/D5Q4eAvJj1+XCO1eXDe+uDRpdYMQXF7 +9+qMHIjH7Iv10S9VlkZ8WjtYO/u62C21Jdp6Ts9EriGmnpjKIG58u4iFW/vAEGK7 +8vknR+/RiTlDxN/e4UG/VHMgly1s2vPUB6PmudhvrvyMGS7TZ2crldtYXLVqAvO4 +g160a75BflcJdURQVc1aEWEhCmHCqYj9E7wtiS/NYeCVvsq1e+F7NGcLH7YMx3we +GVPKp7FKFSBWFHA9K4IsD50VHUeAR/94mQ4xr28+j+2GaR57GIgUssL8gjMunEst ++3A7caoreyYn8xrC3PsXuKHqy6C0rtOUfnrQq8PsOC0RLoi/1D+tEjtCrI8Cbn3M +0V9hvqG8OmpI6iZVIhZdXw3/JzOfGAN0iltSIEdrRU0id4xVJ/CvHozJgyJUt5rQ +T9nO/NkuHJYosQLTA70lUhw0Zk8jq/R3gpYd0VcwCBEF/VfR2ccCAwEAAaNCMEAw +HQYDVR0OBBYEFGQUfPxYchamCik0FW8qy7z8r6irMA4GA1UdDwEB/wQEAwIBBjAP +BgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBDAUAA4ICAQDcAiMI4u8hOscNtybS +YpOnpSNyByCCYN8Y11StaSWSntkUz5m5UoHPrmyKO1o5yGwBQ8IibQLwYs1OY0PA +FNr0Y/Dq9HHuTofjcan0yVflLl8cebsjqodEV+m9NU1Bu0soo5iyG9kLFwfl9+qd +9XbXv8S2gVj/yP9kaWJ5rW4OH3/uHWnlt3Jxs/6lATWUVCvAUm2PVcTJ0rjLyjQI +UYWg9by0F1jqClx6vWPGOi//lkkZhOpn2ASxYfQAW0q3nHE3GYV5v4GwxxMOdnE+ +OoAGrgYWp421wsTL/0ClXI2lyTrtcoHKXJg80jQDdwj98ClZXSEIx2C/pHF7uNke +gr4Jr2VvKKu/S7XuPghHJ6APbw+LP6yVGPO5DtxnVW5inkYO0QR4ynKudtml+LLf +iAlhi+8kTtFZP1rUPcmTPCtk9YENFpb3ksP+MW/oKjJ0DvRMmEoYDjBU1cXrvMUV +nuiZIesnKwkK2/HmcBhWuwzkvvnoEKQTkrgc4NtnHVMDpCKn3F2SEDzq//wbEBrD +2NCcnWXL0CsnMQMeNuE9dnUM/0Umud1RvCPHX9jYhxBAEg09ODfnRDwYwFMJZI// +1ZqmfHAuc1Uh6N//g7kdPjIe1qZ9LPFm6Vwdp6POXiUyK+OVrCoHzrQoeIY8Laad +TdJ0MN1kURXbg4NR16/9M51NZg== +-----END CERTIFICATE----- + +# Issuer: CN=Security Communication ECC RootCA1 O=SECOM Trust Systems CO.,LTD. +# Subject: CN=Security Communication ECC RootCA1 O=SECOM Trust Systems CO.,LTD. +# Label: "Security Communication ECC RootCA1" +# Serial: 15446673492073852651 +# MD5 Fingerprint: 7e:43:b0:92:68:ec:05:43:4c:98:ab:5d:35:2e:7e:86 +# SHA1 Fingerprint: b8:0e:26:a9:bf:d2:b2:3b:c0:ef:46:c9:ba:c7:bb:f6:1d:0d:41:41 +# SHA256 Fingerprint: e7:4f:bd:a5:5b:d5:64:c4:73:a3:6b:44:1a:a7:99:c8:a6:8e:07:74:40:e8:28:8b:9f:a1:e5:0e:4b:ba:ca:11 +-----BEGIN CERTIFICATE----- +MIICODCCAb6gAwIBAgIJANZdm7N4gS7rMAoGCCqGSM49BAMDMGExCzAJBgNVBAYT +AkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMSswKQYD +VQQDEyJTZWN1cml0eSBDb21tdW5pY2F0aW9uIEVDQyBSb290Q0ExMB4XDTE2MDYx +NjA1MTUyOFoXDTM4MDExODA1MTUyOFowYTELMAkGA1UEBhMCSlAxJTAjBgNVBAoT +HFNFQ09NIFRydXN0IFN5c3RlbXMgQ08uLExURC4xKzApBgNVBAMTIlNlY3VyaXR5 +IENvbW11bmljYXRpb24gRUNDIFJvb3RDQTEwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AASkpW9gAwPDvTH00xecK4R1rOX9PVdu12O/5gSJko6BnOPpR27KkBLIE+Cnnfdl +dB9sELLo5OnvbYUymUSxXv3MdhDYW72ixvnWQuRXdtyQwjWpS4g8EkdtXP9JTxpK +ULGjQjBAMB0GA1UdDgQWBBSGHOf+LaVKiwj+KBH6vqNm+GBZLzAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu +9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3LsnNdo4gIxwwCMQDAqy0O +be0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70eN9k= +-----END CERTIFICATE----- diff --git a/libs/common/certifi/core.py b/libs/common/certifi/core.py index 5d2b8cd3..de028981 100644 --- a/libs/common/certifi/core.py +++ b/libs/common/certifi/core.py @@ -1,20 +1,20 @@ -# -*- coding: utf-8 -*- - """ certifi.py ~~~~~~~~~~ This module returns the installation location of cacert.pem or its contents. """ -import os +import sys -try: - from importlib.resources import path as get_path, read_text + +if sys.version_info >= (3, 11): + + from importlib.resources import as_file, files _CACERT_CTX = None _CACERT_PATH = None - def where(): + def where() -> str: # This is slightly terrible, but we want to delay extracting the file # in cases where we're inside of a zipimport situation until someone # actually calls where(), but we don't want to re-extract the file @@ -33,28 +33,76 @@ try: # We also have to hold onto the actual context manager, because # it will do the cleanup whenever it gets garbage collected, so # we will also store that at the global level as well. + _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem")) + _CACERT_PATH = str(_CACERT_CTX.__enter__()) + + return _CACERT_PATH + + def contents() -> str: + return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii") + +elif sys.version_info >= (3, 7): + + from importlib.resources import path as get_path, read_text + + _CACERT_CTX = None + _CACERT_PATH = None + + def where() -> str: + # This is slightly terrible, but we want to delay extracting the + # file in cases where we're inside of a zipimport situation until + # someone actually calls where(), but we don't want to re-extract + # the file on every call of where(), so we'll do it once then store + # it in a global variable. + global _CACERT_CTX + global _CACERT_PATH + if _CACERT_PATH is None: + # This is slightly janky, the importlib.resources API wants you + # to manage the cleanup of this file, so it doesn't actually + # return a path, it returns a context manager that will give + # you the path when you enter it and will do any cleanup when + # you leave it. In the common case of not needing a temporary + # file, it will just return the file system location and the + # __exit__() is a no-op. + # + # We also have to hold onto the actual context manager, because + # it will do the cleanup whenever it gets garbage collected, so + # we will also store that at the global level as well. _CACERT_CTX = get_path("certifi", "cacert.pem") _CACERT_PATH = str(_CACERT_CTX.__enter__()) return _CACERT_PATH + def contents() -> str: + return read_text("certifi", "cacert.pem", encoding="ascii") + +else: + import os + import types + from typing import Union + + Package = Union[types.ModuleType, str] + Resource = Union[str, "os.PathLike"] -except ImportError: # This fallback will work for Python versions prior to 3.7 that lack the # importlib.resources module but relies on the existing `where` function # so won't address issues with environments like PyOxidizer that don't set # __file__ on modules. - def read_text(_module, _path, encoding="ascii"): - with open(where(), "r", encoding=encoding) as data: + def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict' + ) -> str: + with open(where(), encoding=encoding) as data: return data.read() # If we don't have importlib.resources, then we will just do the old logic # of assuming we're on the filesystem and munge the path directly. - def where(): + def where() -> str: f = os.path.dirname(__file__) return os.path.join(f, "cacert.pem") - -def contents(): - return read_text("certifi", "cacert.pem", encoding="ascii") + def contents() -> str: + return read_text("certifi", "cacert.pem", encoding="ascii") diff --git a/libs/common/more_itertools/tests/__init__.py b/libs/common/certifi/py.typed similarity index 100% rename from libs/common/more_itertools/tests/__init__.py rename to libs/common/certifi/py.typed diff --git a/libs/common/chardet/__init__.py b/libs/common/chardet/__init__.py index 80ad2546..fe581623 100644 --- a/libs/common/chardet/__init__.py +++ b/libs/common/chardet/__init__.py @@ -15,69 +15,101 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union -from .universaldetector import UniversalDetector +from .charsetgroupprober import CharSetGroupProber +from .charsetprober import CharSetProber from .enums import InputState -from .version import __version__, VERSION +from .resultdict import ResultDict +from .universaldetector import UniversalDetector +from .version import VERSION, __version__ + +__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"] -__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION'] - - -def detect(byte_str): +def detect( + byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False +) -> ResultDict: """ Detect the encoding of the given byte string. :param byte_str: The byte sequence to examine. :type byte_str: ``bytes`` or ``bytearray`` + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + :type should_rename_legacy: ``bool`` """ if not isinstance(byte_str, bytearray): if not isinstance(byte_str, bytes): - raise TypeError('Expected object of type bytes or bytearray, got: ' - '{}'.format(type(byte_str))) - else: - byte_str = bytearray(byte_str) - detector = UniversalDetector() + raise TypeError( + f"Expected object of type bytes or bytearray, got: {type(byte_str)}" + ) + byte_str = bytearray(byte_str) + detector = UniversalDetector(should_rename_legacy=should_rename_legacy) detector.feed(byte_str) return detector.close() -def detect_all(byte_str): +def detect_all( + byte_str: Union[bytes, bytearray], + ignore_threshold: bool = False, + should_rename_legacy: bool = False, +) -> List[ResultDict]: """ Detect all the possible encodings of the given byte string. - :param byte_str: The byte sequence to examine. - :type byte_str: ``bytes`` or ``bytearray`` + :param byte_str: The byte sequence to examine. + :type byte_str: ``bytes`` or ``bytearray`` + :param ignore_threshold: Include encodings that are below + ``UniversalDetector.MINIMUM_THRESHOLD`` + in results. + :type ignore_threshold: ``bool`` + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + :type should_rename_legacy: ``bool`` """ if not isinstance(byte_str, bytearray): if not isinstance(byte_str, bytes): - raise TypeError('Expected object of type bytes or bytearray, got: ' - '{}'.format(type(byte_str))) - else: - byte_str = bytearray(byte_str) + raise TypeError( + f"Expected object of type bytes or bytearray, got: {type(byte_str)}" + ) + byte_str = bytearray(byte_str) - detector = UniversalDetector() + detector = UniversalDetector(should_rename_legacy=should_rename_legacy) detector.feed(byte_str) detector.close() - if detector._input_state == InputState.HIGH_BYTE: - results = [] - for prober in detector._charset_probers: - if prober.get_confidence() > detector.MINIMUM_THRESHOLD: - charset_name = prober.charset_name - lower_charset_name = prober.charset_name.lower() + if detector.input_state == InputState.HIGH_BYTE: + results: List[ResultDict] = [] + probers: List[CharSetProber] = [] + for prober in detector.charset_probers: + if isinstance(prober, CharSetGroupProber): + probers.extend(p for p in prober.probers) + else: + probers.append(prober) + for prober in probers: + if ignore_threshold or prober.get_confidence() > detector.MINIMUM_THRESHOLD: + charset_name = prober.charset_name or "" + lower_charset_name = charset_name.lower() # Use Windows encoding name instead of ISO-8859 if we saw any # extra Windows-specific bytes - if lower_charset_name.startswith('iso-8859'): - if detector._has_win_bytes: - charset_name = detector.ISO_WIN_MAP.get(lower_charset_name, - charset_name) - results.append({ - 'encoding': charset_name, - 'confidence': prober.get_confidence(), - 'language': prober.language, - }) + if lower_charset_name.startswith("iso-8859") and detector.has_win_bytes: + charset_name = detector.ISO_WIN_MAP.get( + lower_charset_name, charset_name + ) + # Rename legacy encodings with superset encodings if asked + if should_rename_legacy: + charset_name = detector.LEGACY_MAP.get( + charset_name.lower(), charset_name + ) + results.append( + { + "encoding": charset_name, + "confidence": prober.get_confidence(), + "language": prober.language, + } + ) if len(results) > 0: - return sorted(results, key=lambda result: -result['confidence']) + return sorted(results, key=lambda result: -result["confidence"]) return [detector.result] diff --git a/libs/common/chardet/big5freq.py b/libs/common/chardet/big5freq.py index 38f32517..87d9f972 100644 --- a/libs/common/chardet/big5freq.py +++ b/libs/common/chardet/big5freq.py @@ -42,9 +42,9 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 -#Char to FreqOrder table +# Char to FreqOrder table BIG5_TABLE_SIZE = 5376 - +# fmt: off BIG5_CHAR_TO_FREQ_ORDER = ( 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 @@ -383,4 +383,4 @@ BIG5_CHAR_TO_FREQ_ORDER = ( 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 ) - +# fmt: on diff --git a/libs/common/chardet/big5prober.py b/libs/common/chardet/big5prober.py index 98f99701..ef09c60e 100644 --- a/libs/common/chardet/big5prober.py +++ b/libs/common/chardet/big5prober.py @@ -25,23 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import Big5DistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import BIG5_SM_MODEL class Big5Prober(MultiByteCharSetProber): - def __init__(self): - super(Big5Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) self.distribution_analyzer = Big5DistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "Big5" @property - def language(self): + def language(self) -> str: return "Chinese" diff --git a/libs/common/chardet/chardistribution.py b/libs/common/chardet/chardistribution.py index c0395f4a..176cb996 100644 --- a/libs/common/chardet/chardistribution.py +++ b/libs/common/chardet/chardistribution.py @@ -25,40 +25,58 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE, - EUCTW_TYPICAL_DISTRIBUTION_RATIO) -from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE, - EUCKR_TYPICAL_DISTRIBUTION_RATIO) -from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE, - GB2312_TYPICAL_DISTRIBUTION_RATIO) -from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, - BIG5_TYPICAL_DISTRIBUTION_RATIO) -from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE, - JIS_TYPICAL_DISTRIBUTION_RATIO) +from typing import Tuple, Union + +from .big5freq import ( + BIG5_CHAR_TO_FREQ_ORDER, + BIG5_TABLE_SIZE, + BIG5_TYPICAL_DISTRIBUTION_RATIO, +) +from .euckrfreq import ( + EUCKR_CHAR_TO_FREQ_ORDER, + EUCKR_TABLE_SIZE, + EUCKR_TYPICAL_DISTRIBUTION_RATIO, +) +from .euctwfreq import ( + EUCTW_CHAR_TO_FREQ_ORDER, + EUCTW_TABLE_SIZE, + EUCTW_TYPICAL_DISTRIBUTION_RATIO, +) +from .gb2312freq import ( + GB2312_CHAR_TO_FREQ_ORDER, + GB2312_TABLE_SIZE, + GB2312_TYPICAL_DISTRIBUTION_RATIO, +) +from .jisfreq import ( + JIS_CHAR_TO_FREQ_ORDER, + JIS_TABLE_SIZE, + JIS_TYPICAL_DISTRIBUTION_RATIO, +) +from .johabfreq import JOHAB_TO_EUCKR_ORDER_TABLE -class CharDistributionAnalysis(object): +class CharDistributionAnalysis: ENOUGH_DATA_THRESHOLD = 1024 SURE_YES = 0.99 SURE_NO = 0.01 MINIMUM_DATA_THRESHOLD = 3 - def __init__(self): + def __init__(self) -> None: # Mapping table to get frequency order from char order (get from # GetOrder()) - self._char_to_freq_order = None - self._table_size = None # Size of above table + self._char_to_freq_order: Tuple[int, ...] = tuple() + self._table_size = 0 # Size of above table # This is a constant value which varies from language to language, # used in calculating confidence. See # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html # for further detail. - self.typical_distribution_ratio = None - self._done = None - self._total_chars = None - self._freq_chars = None + self.typical_distribution_ratio = 0.0 + self._done = False + self._total_chars = 0 + self._freq_chars = 0 self.reset() - def reset(self): + def reset(self) -> None: """reset analyser, clear any state""" # If this flag is set to True, detection is done and conclusion has # been made @@ -67,7 +85,7 @@ class CharDistributionAnalysis(object): # The number of characters whose frequency order is less than 512 self._freq_chars = 0 - def feed(self, char, char_len): + def feed(self, char: Union[bytes, bytearray], char_len: int) -> None: """feed a character with known length""" if char_len == 2: # we only care about 2-bytes character in our distribution analysis @@ -81,7 +99,7 @@ class CharDistributionAnalysis(object): if 512 > self._char_to_freq_order[order]: self._freq_chars += 1 - def get_confidence(self): + def get_confidence(self) -> float: """return confidence based on existing data""" # if we didn't receive any character in our consideration range, # return negative answer @@ -89,20 +107,21 @@ class CharDistributionAnalysis(object): return self.SURE_NO if self._total_chars != self._freq_chars: - r = (self._freq_chars / ((self._total_chars - self._freq_chars) - * self.typical_distribution_ratio)) + r = self._freq_chars / ( + (self._total_chars - self._freq_chars) * self.typical_distribution_ratio + ) if r < self.SURE_YES: return r # normalize confidence (we don't want to be 100% sure) return self.SURE_YES - def got_enough_data(self): + def got_enough_data(self) -> bool: # It is not necessary to receive all data to draw conclusion. # For charset detection, certain amount of data is enough return self._total_chars > self.ENOUGH_DATA_THRESHOLD - def get_order(self, byte_str): + def get_order(self, _: Union[bytes, bytearray]) -> int: # We do not handle characters based on the original encoding string, # but convert this encoding string to a number, here called order. # This allows multiple encodings of a language to share one frequency @@ -111,13 +130,13 @@ class CharDistributionAnalysis(object): class EUCTWDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(EUCTWDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER self._table_size = EUCTW_TABLE_SIZE self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-TW encoding, we are interested # first byte range: 0xc4 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -125,18 +144,17 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis): first_char = byte_str[0] if first_char >= 0xC4: return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1 - else: - return -1 + return -1 class EUCKRDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(EUCKRDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER self._table_size = EUCKR_TABLE_SIZE self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-KR encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -144,18 +162,32 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis): first_char = byte_str[0] if first_char >= 0xB0: return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1 - else: - return -1 + return -1 + + +class JOHABDistributionAnalysis(CharDistributionAnalysis): + def __init__(self) -> None: + super().__init__() + self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER + self._table_size = EUCKR_TABLE_SIZE + self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: + first_char = byte_str[0] + if 0x88 <= first_char < 0xD4: + code = first_char * 256 + byte_str[1] + return JOHAB_TO_EUCKR_ORDER_TABLE.get(code, -1) + return -1 class GB2312DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(GB2312DistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER self._table_size = GB2312_TABLE_SIZE self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for GB2312 encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -163,18 +195,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis): first_char, second_char = byte_str[0], byte_str[1] if (first_char >= 0xB0) and (second_char >= 0xA1): return 94 * (first_char - 0xB0) + second_char - 0xA1 - else: - return -1 + return -1 class Big5DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(Big5DistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER self._table_size = BIG5_TABLE_SIZE self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for big5 encoding, we are interested # first byte range: 0xa4 -- 0xfe # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe @@ -183,28 +214,26 @@ class Big5DistributionAnalysis(CharDistributionAnalysis): if first_char >= 0xA4: if second_char >= 0xA1: return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 - else: - return 157 * (first_char - 0xA4) + second_char - 0x40 - else: - return -1 + return 157 * (first_char - 0xA4) + second_char - 0x40 + return -1 class SJISDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(SJISDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER self._table_size = JIS_TABLE_SIZE self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for sjis encoding, we are interested # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe # no validation needed here. State machine has done that first_char, second_char = byte_str[0], byte_str[1] - if (first_char >= 0x81) and (first_char <= 0x9F): + if 0x81 <= first_char <= 0x9F: order = 188 * (first_char - 0x81) - elif (first_char >= 0xE0) and (first_char <= 0xEF): + elif 0xE0 <= first_char <= 0xEF: order = 188 * (first_char - 0xE0 + 31) else: return -1 @@ -215,19 +244,18 @@ class SJISDistributionAnalysis(CharDistributionAnalysis): class EUCJPDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(EUCJPDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER self._table_size = JIS_TABLE_SIZE self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-JP encoding, we are interested # first byte range: 0xa0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that char = byte_str[0] if char >= 0xA0: - return 94 * (char - 0xA1) + byte_str[1] - 0xa1 - else: - return -1 + return 94 * (char - 0xA1) + byte_str[1] - 0xA1 + return -1 diff --git a/libs/common/chardet/charsetgroupprober.py b/libs/common/chardet/charsetgroupprober.py index 5812cef0..6def56b4 100644 --- a/libs/common/chardet/charsetgroupprober.py +++ b/libs/common/chardet/charsetgroupprober.py @@ -25,29 +25,30 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .enums import ProbingState +from typing import List, Optional, Union + from .charsetprober import CharSetProber +from .enums import LanguageFilter, ProbingState class CharSetGroupProber(CharSetProber): - def __init__(self, lang_filter=None): - super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) self._active_num = 0 - self.probers = [] - self._best_guess_prober = None + self.probers: List[CharSetProber] = [] + self._best_guess_prober: Optional[CharSetProber] = None - def reset(self): - super(CharSetGroupProber, self).reset() + def reset(self) -> None: + super().reset() self._active_num = 0 for prober in self.probers: - if prober: - prober.reset() - prober.active = True - self._active_num += 1 + prober.reset() + prober.active = True + self._active_num += 1 self._best_guess_prober = None @property - def charset_name(self): + def charset_name(self) -> Optional[str]: if not self._best_guess_prober: self.get_confidence() if not self._best_guess_prober: @@ -55,17 +56,15 @@ class CharSetGroupProber(CharSetProber): return self._best_guess_prober.charset_name @property - def language(self): + def language(self) -> Optional[str]: if not self._best_guess_prober: self.get_confidence() if not self._best_guess_prober: return None return self._best_guess_prober.language - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for prober in self.probers: - if not prober: - continue if not prober.active: continue state = prober.feed(byte_str) @@ -75,7 +74,7 @@ class CharSetGroupProber(CharSetProber): self._best_guess_prober = prober self._state = ProbingState.FOUND_IT return self.state - elif state == ProbingState.NOT_ME: + if state == ProbingState.NOT_ME: prober.active = False self._active_num -= 1 if self._active_num <= 0: @@ -83,22 +82,22 @@ class CharSetGroupProber(CharSetProber): return self.state return self.state - def get_confidence(self): + def get_confidence(self) -> float: state = self.state if state == ProbingState.FOUND_IT: return 0.99 - elif state == ProbingState.NOT_ME: + if state == ProbingState.NOT_ME: return 0.01 best_conf = 0.0 self._best_guess_prober = None for prober in self.probers: - if not prober: - continue if not prober.active: - self.logger.debug('%s not active', prober.charset_name) + self.logger.debug("%s not active", prober.charset_name) continue conf = prober.get_confidence() - self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) + self.logger.debug( + "%s %s confidence = %s", prober.charset_name, prober.language, conf + ) if best_conf < conf: best_conf = conf self._best_guess_prober = prober diff --git a/libs/common/chardet/charsetprober.py b/libs/common/chardet/charsetprober.py index eac4e598..a103ca11 100644 --- a/libs/common/chardet/charsetprober.py +++ b/libs/common/chardet/charsetprober.py @@ -28,54 +28,62 @@ import logging import re +from typing import Optional, Union -from .enums import ProbingState +from .enums import LanguageFilter, ProbingState + +INTERNATIONAL_WORDS_PATTERN = re.compile( + b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?" +) -class CharSetProber(object): +class CharSetProber: SHORTCUT_THRESHOLD = 0.95 - def __init__(self, lang_filter=None): - self._state = None + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + self._state = ProbingState.DETECTING + self.active = True self.lang_filter = lang_filter self.logger = logging.getLogger(__name__) - def reset(self): + def reset(self) -> None: self._state = ProbingState.DETECTING @property - def charset_name(self): + def charset_name(self) -> Optional[str]: return None - def feed(self, buf): - pass + @property + def language(self) -> Optional[str]: + raise NotImplementedError + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + raise NotImplementedError @property - def state(self): + def state(self) -> ProbingState: return self._state - def get_confidence(self): + def get_confidence(self) -> float: return 0.0 @staticmethod - def filter_high_byte_only(buf): - buf = re.sub(b'([\x00-\x7F])+', b' ', buf) + def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes: + buf = re.sub(b"([\x00-\x7F])+", b" ", buf) return buf @staticmethod - def filter_international_words(buf): + def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray: """ We define three types of bytes: alphabet: english alphabets [a-zA-Z] international: international characters [\x80-\xFF] marker: everything else [^a-zA-Z\x80-\xFF] - The input buffer can be thought to contain a series of words delimited by markers. This function works to filter all words that contain at least one international character. All contiguous sequences of markers are replaced by a single space ascii character. - This filter applies to all scripts which do not use English characters. """ filtered = bytearray() @@ -83,8 +91,7 @@ class CharSetProber(object): # This regex expression filters out only words that have at-least one # international character. The word may include one marker character at # the end. - words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', - buf) + words = INTERNATIONAL_WORDS_PATTERN.findall(buf) for word in words: filtered.extend(word[:-1]) @@ -94,20 +101,17 @@ class CharSetProber(object): # similarly across all languages and may thus have similar # frequencies). last_char = word[-1:] - if not last_char.isalpha() and last_char < b'\x80': - last_char = b' ' + if not last_char.isalpha() and last_char < b"\x80": + last_char = b" " filtered.extend(last_char) return filtered @staticmethod - def filter_with_english_letters(buf): + def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes: """ Returns a copy of ``buf`` that retains only the sequences of English alphabet and high byte characters that are not between <> characters. - Also retains English alphabet and high byte characters immediately - before occurrences of >. - This filter can be applied to all scripts which contain both English characters and extended ASCII characters, but is currently only used by ``Latin1Prober``. @@ -115,26 +119,24 @@ class CharSetProber(object): filtered = bytearray() in_tag = False prev = 0 + buf = memoryview(buf).cast("c") - for curr in range(len(buf)): - # Slice here to get bytes instead of an int with Python 3 - buf_char = buf[curr:curr + 1] - # Check if we're coming out of or entering an HTML tag - if buf_char == b'>': + for curr, buf_char in enumerate(buf): + # Check if we're coming out of or entering an XML tag + + # https://github.com/python/typeshed/issues/8182 + if buf_char == b">": # type: ignore[comparison-overlap] + prev = curr + 1 in_tag = False - elif buf_char == b'<': - in_tag = True - - # If current character is not extended-ASCII and not alphabetic... - if buf_char < b'\x80' and not buf_char.isalpha(): - # ...and we're not in a tag + # https://github.com/python/typeshed/issues/8182 + elif buf_char == b"<": # type: ignore[comparison-overlap] if curr > prev and not in_tag: # Keep everything after last non-extended-ASCII, # non-alphabetic character filtered.extend(buf[prev:curr]) # Output a space to delimit stretch we kept - filtered.extend(b' ') - prev = curr + 1 + filtered.extend(b" ") + in_tag = True # If we're not in a tag... if not in_tag: diff --git a/libs/common/chardet/cli/__init__.py b/libs/common/chardet/cli/__init__.py index 8b137891..e69de29b 100644 --- a/libs/common/chardet/cli/__init__.py +++ b/libs/common/chardet/cli/__init__.py @@ -1 +0,0 @@ - diff --git a/libs/common/chardet/cli/chardetect.py b/libs/common/chardet/cli/chardetect.py index e1d8cd69..43f6e144 100644 --- a/libs/common/chardet/cli/chardetect.py +++ b/libs/common/chardet/cli/chardetect.py @@ -12,17 +12,21 @@ If no paths are provided, it takes its input from stdin. """ -from __future__ import absolute_import, print_function, unicode_literals import argparse import sys +from typing import Iterable, List, Optional -from chardet import __version__ -from chardet.compat import PY2 -from chardet.universaldetector import UniversalDetector +from .. import __version__ +from ..universaldetector import UniversalDetector -def description_of(lines, name='stdin'): +def description_of( + lines: Iterable[bytes], + name: str = "stdin", + minimal: bool = False, + should_rename_legacy: bool = False, +) -> Optional[str]: """ Return a string describing the probable encoding of a file or list of strings. @@ -31,8 +35,11 @@ def description_of(lines, name='stdin'): :type lines: Iterable of bytes :param name: Name of file or collection of lines :type name: str + :param should_rename_legacy: Should we rename legacy encodings to + their more modern equivalents? + :type should_rename_legacy: ``bool`` """ - u = UniversalDetector() + u = UniversalDetector(should_rename_legacy=should_rename_legacy) for line in lines: line = bytearray(line) u.feed(line) @@ -41,16 +48,14 @@ def description_of(lines, name='stdin'): break u.close() result = u.result - if PY2: - name = name.decode(sys.getfilesystemencoding(), 'ignore') - if result['encoding']: - return '{}: {} with confidence {}'.format(name, result['encoding'], - result['confidence']) - else: - return '{}: no result'.format(name) + if minimal: + return result["encoding"] + if result["encoding"]: + return f'{name}: {result["encoding"]} with confidence {result["confidence"]}' + return f"{name}: no result" -def main(argv=None): +def main(argv: Optional[List[str]] = None) -> None: """ Handles command line arguments and gets things started. @@ -60,25 +65,48 @@ def main(argv=None): """ # Get command line arguments parser = argparse.ArgumentParser( - description="Takes one or more file paths and reports their detected \ - encodings") - parser.add_argument('input', - help='File whose encoding we would like to determine. \ - (default: stdin)', - type=argparse.FileType('rb'), nargs='*', - default=[sys.stdin if PY2 else sys.stdin.buffer]) - parser.add_argument('--version', action='version', - version='%(prog)s {}'.format(__version__)) + description=( + "Takes one or more file paths and reports their detected encodings" + ) + ) + parser.add_argument( + "input", + help="File whose encoding we would like to determine. (default: stdin)", + type=argparse.FileType("rb"), + nargs="*", + default=[sys.stdin.buffer], + ) + parser.add_argument( + "--minimal", + help="Print only the encoding to standard output", + action="store_true", + ) + parser.add_argument( + "-l", + "--legacy", + help="Rename legacy encodings to more modern ones.", + action="store_true", + ) + parser.add_argument( + "--version", action="version", version=f"%(prog)s {__version__}" + ) args = parser.parse_args(argv) for f in args.input: if f.isatty(): - print("You are running chardetect interactively. Press " + - "CTRL-D twice at the start of a blank line to signal the " + - "end of your input. If you want help, run chardetect " + - "--help\n", file=sys.stderr) - print(description_of(f, f.name)) + print( + "You are running chardetect interactively. Press " + "CTRL-D twice at the start of a blank line to signal the " + "end of your input. If you want help, run chardetect " + "--help\n", + file=sys.stderr, + ) + print( + description_of( + f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy + ) + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/libs/common/chardet/codingstatemachine.py b/libs/common/chardet/codingstatemachine.py index 68fba44f..8ed4a877 100644 --- a/libs/common/chardet/codingstatemachine.py +++ b/libs/common/chardet/codingstatemachine.py @@ -27,10 +27,11 @@ import logging +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState -class CodingStateMachine(object): +class CodingStateMachine: """ A state machine to verify a byte sequence for a particular encoding. For each byte the detector receives, it will feed that byte to every active @@ -52,37 +53,38 @@ class CodingStateMachine(object): negative answer for this encoding. Detector will exclude this encoding from consideration from here on. """ - def __init__(self, sm): + + def __init__(self, sm: CodingStateMachineDict) -> None: self._model = sm self._curr_byte_pos = 0 self._curr_char_len = 0 - self._curr_state = None + self._curr_state = MachineState.START + self.active = True self.logger = logging.getLogger(__name__) self.reset() - def reset(self): + def reset(self) -> None: self._curr_state = MachineState.START - def next_state(self, c): + def next_state(self, c: int) -> int: # for each byte we get its class # if it is first byte, we also get byte length - byte_class = self._model['class_table'][c] + byte_class = self._model["class_table"][c] if self._curr_state == MachineState.START: self._curr_byte_pos = 0 - self._curr_char_len = self._model['char_len_table'][byte_class] + self._curr_char_len = self._model["char_len_table"][byte_class] # from byte's class and state_table, we get its next state - curr_state = (self._curr_state * self._model['class_factor'] - + byte_class) - self._curr_state = self._model['state_table'][curr_state] + curr_state = self._curr_state * self._model["class_factor"] + byte_class + self._curr_state = self._model["state_table"][curr_state] self._curr_byte_pos += 1 return self._curr_state - def get_current_charlen(self): + def get_current_charlen(self) -> int: return self._curr_char_len - def get_coding_state_machine(self): - return self._model['name'] + def get_coding_state_machine(self) -> str: + return self._model["name"] @property - def language(self): - return self._model['language'] + def language(self) -> str: + return self._model["language"] diff --git a/libs/common/chardet/codingstatemachinedict.py b/libs/common/chardet/codingstatemachinedict.py new file mode 100644 index 00000000..7a3c4c7e --- /dev/null +++ b/libs/common/chardet/codingstatemachinedict.py @@ -0,0 +1,19 @@ +from typing import TYPE_CHECKING, Tuple + +if TYPE_CHECKING: + # TypedDict was introduced in Python 3.8. + # + # TODO: Remove the else block and TYPE_CHECKING check when dropping support + # for Python 3.7. + from typing import TypedDict + + class CodingStateMachineDict(TypedDict, total=False): + class_table: Tuple[int, ...] + class_factor: int + state_table: Tuple[int, ...] + char_len_table: Tuple[int, ...] + name: str + language: str # Optional key + +else: + CodingStateMachineDict = dict diff --git a/libs/common/chardet/cp949prober.py b/libs/common/chardet/cp949prober.py index efd793ab..fa7307ed 100644 --- a/libs/common/chardet/cp949prober.py +++ b/libs/common/chardet/cp949prober.py @@ -32,8 +32,8 @@ from .mbcssm import CP949_SM_MODEL class CP949Prober(MultiByteCharSetProber): - def __init__(self): - super(CP949Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(CP949_SM_MODEL) # NOTE: CP949 is a superset of EUC-KR, so the distribution should be # not different. @@ -41,9 +41,9 @@ class CP949Prober(MultiByteCharSetProber): self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "CP949" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/libs/common/chardet/enums.py b/libs/common/chardet/enums.py index 04512072..5e3e1982 100644 --- a/libs/common/chardet/enums.py +++ b/libs/common/chardet/enums.py @@ -4,21 +4,26 @@ All of the Enums that are used throughout the chardet package. :author: Dan Blanchard (dan.blanchard@gmail.com) """ +from enum import Enum, Flag -class InputState(object): + +class InputState: """ This enum represents the different states a universal detector can be in. """ + PURE_ASCII = 0 ESC_ASCII = 1 HIGH_BYTE = 2 -class LanguageFilter(object): +class LanguageFilter(Flag): """ This enum represents the different language filters we can apply to a ``UniversalDetector``. """ + + NONE = 0x00 CHINESE_SIMPLIFIED = 0x01 CHINESE_TRADITIONAL = 0x02 JAPANESE = 0x04 @@ -29,46 +34,50 @@ class LanguageFilter(object): CJK = CHINESE | JAPANESE | KOREAN -class ProbingState(object): +class ProbingState(Enum): """ This enum represents the different states a prober can be in. """ + DETECTING = 0 FOUND_IT = 1 NOT_ME = 2 -class MachineState(object): +class MachineState: """ This enum represents the different states a state machine can be in. """ + START = 0 ERROR = 1 ITS_ME = 2 -class SequenceLikelihood(object): +class SequenceLikelihood: """ This enum represents the likelihood of a character following the previous one. """ + NEGATIVE = 0 UNLIKELY = 1 LIKELY = 2 POSITIVE = 3 @classmethod - def get_num_categories(cls): + def get_num_categories(cls) -> int: """:returns: The number of likelihood categories in the enum.""" return 4 -class CharacterCategory(object): +class CharacterCategory: """ This enum represents the different categories language models for ``SingleByteCharsetProber`` put characters into. Anything less than CONTROL is considered a letter. """ + UNDEFINED = 255 LINE_BREAK = 254 SYMBOL = 253 diff --git a/libs/common/chardet/escprober.py b/libs/common/chardet/escprober.py index c70493f2..fd713830 100644 --- a/libs/common/chardet/escprober.py +++ b/libs/common/chardet/escprober.py @@ -25,11 +25,17 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + from .charsetprober import CharSetProber from .codingstatemachine import CodingStateMachine -from .enums import LanguageFilter, ProbingState, MachineState -from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, - ISO2022KR_SM_MODEL) +from .enums import LanguageFilter, MachineState, ProbingState +from .escsm import ( + HZ_SM_MODEL, + ISO2022CN_SM_MODEL, + ISO2022JP_SM_MODEL, + ISO2022KR_SM_MODEL, +) class EscCharSetProber(CharSetProber): @@ -39,8 +45,8 @@ class EscCharSetProber(CharSetProber): identify these encodings. """ - def __init__(self, lang_filter=None): - super(EscCharSetProber, self).__init__(lang_filter=lang_filter) + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) self.coding_sm = [] if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) @@ -49,17 +55,15 @@ class EscCharSetProber(CharSetProber): self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) if self.lang_filter & LanguageFilter.KOREAN: self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) - self.active_sm_count = None - self._detected_charset = None - self._detected_language = None - self._state = None + self.active_sm_count = 0 + self._detected_charset: Optional[str] = None + self._detected_language: Optional[str] = None + self._state = ProbingState.DETECTING self.reset() - def reset(self): - super(EscCharSetProber, self).reset() + def reset(self) -> None: + super().reset() for coding_sm in self.coding_sm: - if not coding_sm: - continue coding_sm.active = True coding_sm.reset() self.active_sm_count = len(self.coding_sm) @@ -67,23 +71,20 @@ class EscCharSetProber(CharSetProber): self._detected_language = None @property - def charset_name(self): + def charset_name(self) -> Optional[str]: return self._detected_charset @property - def language(self): + def language(self) -> Optional[str]: return self._detected_language - def get_confidence(self): - if self._detected_charset: - return 0.99 - else: - return 0.00 + def get_confidence(self) -> float: + return 0.99 if self._detected_charset else 0.00 - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: for coding_sm in self.coding_sm: - if not coding_sm or not coding_sm.active: + if not coding_sm.active: continue coding_state = coding_sm.next_state(c) if coding_state == MachineState.ERROR: diff --git a/libs/common/chardet/escsm.py b/libs/common/chardet/escsm.py index 0069523a..11d4adf7 100644 --- a/libs/common/chardet/escsm.py +++ b/libs/common/chardet/escsm.py @@ -12,7 +12,7 @@ # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. +# version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,227 +20,242 @@ # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState +# fmt: off HZ_CLS = ( -1,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,0,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,4,0,5,2,0, # 78 - 7f -1,1,1,1,1,1,1,1, # 80 - 87 -1,1,1,1,1,1,1,1, # 88 - 8f -1,1,1,1,1,1,1,1, # 90 - 97 -1,1,1,1,1,1,1,1, # 98 - 9f -1,1,1,1,1,1,1,1, # a0 - a7 -1,1,1,1,1,1,1,1, # a8 - af -1,1,1,1,1,1,1,1, # b0 - b7 -1,1,1,1,1,1,1,1, # b8 - bf -1,1,1,1,1,1,1,1, # c0 - c7 -1,1,1,1,1,1,1,1, # c8 - cf -1,1,1,1,1,1,1,1, # d0 - d7 -1,1,1,1,1,1,1,1, # d8 - df -1,1,1,1,1,1,1,1, # e0 - e7 -1,1,1,1,1,1,1,1, # e8 - ef -1,1,1,1,1,1,1,1, # f0 - f7 -1,1,1,1,1,1,1,1, # f8 - ff + 1, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 4, 0, 5, 2, 0, # 78 - 7f + 1, 1, 1, 1, 1, 1, 1, 1, # 80 - 87 + 1, 1, 1, 1, 1, 1, 1, 1, # 88 - 8f + 1, 1, 1, 1, 1, 1, 1, 1, # 90 - 97 + 1, 1, 1, 1, 1, 1, 1, 1, # 98 - 9f + 1, 1, 1, 1, 1, 1, 1, 1, # a0 - a7 + 1, 1, 1, 1, 1, 1, 1, 1, # a8 - af + 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7 + 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf + 1, 1, 1, 1, 1, 1, 1, 1, # c0 - c7 + 1, 1, 1, 1, 1, 1, 1, 1, # c8 - cf + 1, 1, 1, 1, 1, 1, 1, 1, # d0 - d7 + 1, 1, 1, 1, 1, 1, 1, 1, # d8 - df + 1, 1, 1, 1, 1, 1, 1, 1, # e0 - e7 + 1, 1, 1, 1, 1, 1, 1, 1, # e8 - ef + 1, 1, 1, 1, 1, 1, 1, 1, # f0 - f7 + 1, 1, 1, 1, 1, 1, 1, 1, # f8 - ff ) HZ_ST = ( -MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17 - 5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f - 4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27 - 4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f +MachineState.START, MachineState.ERROR, 3, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07 +MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f +MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.START, MachineState.START, 4, MachineState.ERROR, # 10-17 + 5, MachineState.ERROR, 6, MachineState.ERROR, 5, 5, 4, MachineState.ERROR, # 18-1f + 4, MachineState.ERROR, 4, 4, 4, MachineState.ERROR, 4, MachineState.ERROR, # 20-27 + 4, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 28-2f ) +# fmt: on HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -HZ_SM_MODEL = {'class_table': HZ_CLS, - 'class_factor': 6, - 'state_table': HZ_ST, - 'char_len_table': HZ_CHAR_LEN_TABLE, - 'name': "HZ-GB-2312", - 'language': 'Chinese'} +HZ_SM_MODEL: CodingStateMachineDict = { + "class_table": HZ_CLS, + "class_factor": 6, + "state_table": HZ_ST, + "char_len_table": HZ_CHAR_LEN_TABLE, + "name": "HZ-GB-2312", + "language": "Chinese", +} +# fmt: off ISO2022CN_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,3,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,4,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff + 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 3, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 4, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87 + 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f + 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97 + 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff ) ISO2022CN_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 -MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f -MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27 - 5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f + MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07 + MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f + MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17 + MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, # 18-1f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 20-27 + 5, 6, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 28-2f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 30-37 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, # 38-3f ) +# fmt: on ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS, - 'class_factor': 9, - 'state_table': ISO2022CN_ST, - 'char_len_table': ISO2022CN_CHAR_LEN_TABLE, - 'name': "ISO-2022-CN", - 'language': 'Chinese'} +ISO2022CN_SM_MODEL: CodingStateMachineDict = { + "class_table": ISO2022CN_CLS, + "class_factor": 9, + "state_table": ISO2022CN_ST, + "char_len_table": ISO2022CN_CHAR_LEN_TABLE, + "name": "ISO-2022-CN", + "language": "Chinese", +} +# fmt: off ISO2022JP_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,2,2, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,7,0,0,0, # 20 - 27 -3,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -6,0,4,0,8,0,0,0, # 40 - 47 -0,9,5,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff + 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 2, 2, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 7, 0, 0, 0, # 20 - 27 + 3, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 6, 0, 4, 0, 8, 0, 0, 0, # 40 - 47 + 0, 9, 5, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87 + 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f + 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97 + 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff ) ISO2022JP_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 -MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f -MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47 + MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07 + MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17 + MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, # 18-1f + MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 20-27 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 6, MachineState.ITS_ME, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, # 28-2f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, # 30-37 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 38-3f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, MachineState.START, # 40-47 ) +# fmt: on ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS, - 'class_factor': 10, - 'state_table': ISO2022JP_ST, - 'char_len_table': ISO2022JP_CHAR_LEN_TABLE, - 'name': "ISO-2022-JP", - 'language': 'Japanese'} +ISO2022JP_SM_MODEL: CodingStateMachineDict = { + "class_table": ISO2022JP_CLS, + "class_factor": 10, + "state_table": ISO2022JP_ST, + "char_len_table": ISO2022JP_CHAR_LEN_TABLE, + "name": "ISO-2022-JP", + "language": "Japanese", +} +# fmt: off ISO2022KR_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,3,0,0,0, # 20 - 27 -0,4,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,5,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff + 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 3, 0, 0, 0, # 20 - 27 + 0, 4, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 5, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87 + 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f + 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97 + 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff ) ISO2022KR_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27 + MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f + MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 10-17 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 18-1f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 20-27 ) +# fmt: on ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS, - 'class_factor': 6, - 'state_table': ISO2022KR_ST, - 'char_len_table': ISO2022KR_CHAR_LEN_TABLE, - 'name': "ISO-2022-KR", - 'language': 'Korean'} - - +ISO2022KR_SM_MODEL: CodingStateMachineDict = { + "class_table": ISO2022KR_CLS, + "class_factor": 6, + "state_table": ISO2022KR_ST, + "char_len_table": ISO2022KR_CHAR_LEN_TABLE, + "name": "ISO-2022-KR", + "language": "Korean", +} diff --git a/libs/common/chardet/eucjpprober.py b/libs/common/chardet/eucjpprober.py index 20ce8f7d..39487f40 100644 --- a/libs/common/chardet/eucjpprober.py +++ b/libs/common/chardet/eucjpprober.py @@ -25,68 +25,78 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .enums import ProbingState, MachineState -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine +from typing import Union + from .chardistribution import EUCJPDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .enums import MachineState, ProbingState from .jpcntx import EUCJPContextAnalysis +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import EUCJP_SM_MODEL class EUCJPProber(MultiByteCharSetProber): - def __init__(self): - super(EUCJPProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) self.distribution_analyzer = EUCJPDistributionAnalysis() self.context_analyzer = EUCJPContextAnalysis() self.reset() - def reset(self): - super(EUCJPProber, self).reset() + def reset(self) -> None: + super().reset() self.context_analyzer.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-JP" @property - def language(self): + def language(self) -> str: return "Japanese" - def feed(self, byte_str): - for i in range(len(byte_str)): - # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte - coding_state = self.coding_sm.next_state(byte_str[i]) + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None + + for i, byte in enumerate(byte_str): + # PY3K: byte_str is a byte array, so byte is an int, not a byte + coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: - self.logger.debug('%s %s prober hit error at byte %s', - self.charset_name, self.language, i) + self.logger.debug( + "%s %s prober hit error at byte %s", + self.charset_name, + self.language, + i, + ) self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: char_len = self.coding_sm.get_current_charlen() if i == 0: - self._last_char[1] = byte_str[0] + self._last_char[1] = byte self.context_analyzer.feed(self._last_char, char_len) self.distribution_analyzer.feed(self._last_char, char_len) else: - self.context_analyzer.feed(byte_str[i - 1:i + 1], - char_len) - self.distribution_analyzer.feed(byte_str[i - 1:i + 1], - char_len) + self.context_analyzer.feed(byte_str[i - 1 : i + 1], char_len) + self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len) self._last_char[0] = byte_str[-1] if self.state == ProbingState.DETECTING: - if (self.context_analyzer.got_enough_data() and - (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + if self.context_analyzer.got_enough_data() and ( + self.get_confidence() > self.SHORTCUT_THRESHOLD + ): self._state = ProbingState.FOUND_IT return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None + context_conf = self.context_analyzer.get_confidence() distrib_conf = self.distribution_analyzer.get_confidence() return max(context_conf, distrib_conf) diff --git a/libs/common/chardet/euckrfreq.py b/libs/common/chardet/euckrfreq.py index b68078cb..7dc3b103 100644 --- a/libs/common/chardet/euckrfreq.py +++ b/libs/common/chardet/euckrfreq.py @@ -43,6 +43,7 @@ EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 EUCKR_TABLE_SIZE = 2352 # Char to FreqOrder table , +# fmt: off EUCKR_CHAR_TO_FREQ_ORDER = ( 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, 1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, @@ -192,4 +193,4 @@ EUCKR_CHAR_TO_FREQ_ORDER = ( 2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 ) - +# fmt: on diff --git a/libs/common/chardet/euckrprober.py b/libs/common/chardet/euckrprober.py index 345a060d..1fc5de04 100644 --- a/libs/common/chardet/euckrprober.py +++ b/libs/common/chardet/euckrprober.py @@ -25,23 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import EUCKRDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import EUCKR_SM_MODEL class EUCKRProber(MultiByteCharSetProber): - def __init__(self): - super(EUCKRProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) self.distribution_analyzer = EUCKRDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-KR" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/libs/common/chardet/euctwfreq.py b/libs/common/chardet/euctwfreq.py index ed7a995a..4900ccc1 100644 --- a/libs/common/chardet/euctwfreq.py +++ b/libs/common/chardet/euctwfreq.py @@ -43,345 +43,346 @@ EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 -# Char to FreqOrder table , +# Char to FreqOrder table EUCTW_TABLE_SIZE = 5376 +# fmt: off EUCTW_CHAR_TO_FREQ_ORDER = ( - 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 -3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 -1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 - 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 -3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 -4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 -7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 - 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 - 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 - 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 -2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 -1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 -3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 - 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 -1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 -3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 -2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 - 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 -3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 -1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 -7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 - 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 -7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 -1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 - 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 - 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 -3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 -3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 - 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 -2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 -2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 - 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 - 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 -3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 -1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 -1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 -1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 -2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 - 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 -4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 -1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 -7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 -2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 - 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 - 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 - 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 - 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 -7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 - 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 -1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 - 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 - 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 -7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 -1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 - 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 -3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 -4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 -3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 - 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 - 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 -1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 -4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 -3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 -3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 -2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 -7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 -3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 -7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 -1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 -2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 -1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 - 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 -1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 -4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 -3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 - 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 - 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 - 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 -2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 -7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 -1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 -2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 -1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 -1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 -7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 -7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 -7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 -3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 -4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 -1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 -7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 -2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 -7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 -3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 -3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 -7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 -2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 -7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 - 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 -4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 -2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 -7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 -3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 -2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 -2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 - 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 -2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 -1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 -1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 -2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 -1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 -7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 -7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 -2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 -4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 -1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 -7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 - 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 -4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 - 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 -2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 - 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 -1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 -1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 - 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 -3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 -3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 -1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 -3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 -7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 -7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 -1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 -2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 -1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 -3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 -2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 -3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 -2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 -4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 -4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 -3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 - 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 -3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 - 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 -3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 -3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 -3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 -1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 -7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 - 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 -7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 -1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 - 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 -4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 -3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 - 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 -2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 -2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 -3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 -1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 -4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 -2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 -1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 -1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 -2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 -3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 -1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 -7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 -1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 -4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 -1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 - 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 -1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 -3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 -3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 -2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 -1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 -4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 - 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 -7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 -2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 -3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 -4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 - 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 -7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 -7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 -1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 -4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 -3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 -2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 -3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 -3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 -2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 -1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 -4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 -3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 -3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 -2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 -4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 -7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 -3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 -2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 -3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 -1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 -2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 -3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 -4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 -2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 -2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 -7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 -1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 -2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 -1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 -3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 -4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 -2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 -3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 -3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 -2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 -4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 -2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 -3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 -4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 -7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 -3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 - 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 -1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 -4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 -1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 -4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 -7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 - 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 -7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 -2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 -1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 -1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 -3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 - 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 - 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 - 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 -3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 -2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 - 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 -7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 -1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 -3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 -7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 -1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 -7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 -4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 -1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 -2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 -2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 -4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 - 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 - 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 -3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 -3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 -1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 -2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 -7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 -1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 -1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 -3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 - 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 -1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 -4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 -7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 -2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 -3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 - 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 -1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 -2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 -2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 -7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 -7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 -7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 -2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 -2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 -1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 -4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 -3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 -3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 -4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 -4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 -2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 -2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 -7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 -4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 -7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 -2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 -1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 -3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 -4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 -2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 - 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 -2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 -1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 -2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 -2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 -4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 -7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 -1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 -3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 -7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 -1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 -8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 -2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 -8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 -2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 -2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 -8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 -8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 -8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 - 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 -8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 -4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 -3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 -8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 -1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 -8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 - 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 -1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 - 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 -4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 -1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 -4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 -1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 - 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 -3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 -4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 -8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 - 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 -3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 - 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 -2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 + 1, 1800, 1506, 255, 1431, 198, 9, 82, 6, 7310, 177, 202, 3615, 1256, 2808, 110, # 2742 + 3735, 33, 3241, 261, 76, 44, 2113, 16, 2931, 2184, 1176, 659, 3868, 26, 3404, 2643, # 2758 + 1198, 3869, 3313, 4060, 410, 2211, 302, 590, 361, 1963, 8, 204, 58, 4296, 7311, 1931, # 2774 + 63, 7312, 7313, 317, 1614, 75, 222, 159, 4061, 2412, 1480, 7314, 3500, 3068, 224, 2809, # 2790 + 3616, 3, 10, 3870, 1471, 29, 2774, 1135, 2852, 1939, 873, 130, 3242, 1123, 312, 7315, # 2806 + 4297, 2051, 507, 252, 682, 7316, 142, 1914, 124, 206, 2932, 34, 3501, 3173, 64, 604, # 2822 + 7317, 2494, 1976, 1977, 155, 1990, 645, 641, 1606, 7318, 3405, 337, 72, 406, 7319, 80, # 2838 + 630, 238, 3174, 1509, 263, 939, 1092, 2644, 756, 1440, 1094, 3406, 449, 69, 2969, 591, # 2854 + 179, 2095, 471, 115, 2034, 1843, 60, 50, 2970, 134, 806, 1868, 734, 2035, 3407, 180, # 2870 + 995, 1607, 156, 537, 2893, 688, 7320, 319, 1305, 779, 2144, 514, 2374, 298, 4298, 359, # 2886 + 2495, 90, 2707, 1338, 663, 11, 906, 1099, 2545, 20, 2436, 182, 532, 1716, 7321, 732, # 2902 + 1376, 4062, 1311, 1420, 3175, 25, 2312, 1056, 113, 399, 382, 1949, 242, 3408, 2467, 529, # 2918 + 3243, 475, 1447, 3617, 7322, 117, 21, 656, 810, 1297, 2295, 2329, 3502, 7323, 126, 4063, # 2934 + 706, 456, 150, 613, 4299, 71, 1118, 2036, 4064, 145, 3069, 85, 835, 486, 2114, 1246, # 2950 + 1426, 428, 727, 1285, 1015, 800, 106, 623, 303, 1281, 7324, 2127, 2354, 347, 3736, 221, # 2966 + 3503, 3110, 7325, 1955, 1153, 4065, 83, 296, 1199, 3070, 192, 624, 93, 7326, 822, 1897, # 2982 + 2810, 3111, 795, 2064, 991, 1554, 1542, 1592, 27, 43, 2853, 859, 139, 1456, 860, 4300, # 2998 + 437, 712, 3871, 164, 2392, 3112, 695, 211, 3017, 2096, 195, 3872, 1608, 3504, 3505, 3618, # 3014 + 3873, 234, 811, 2971, 2097, 3874, 2229, 1441, 3506, 1615, 2375, 668, 2076, 1638, 305, 228, # 3030 + 1664, 4301, 467, 415, 7327, 262, 2098, 1593, 239, 108, 300, 200, 1033, 512, 1247, 2077, # 3046 + 7328, 7329, 2173, 3176, 3619, 2673, 593, 845, 1062, 3244, 88, 1723, 2037, 3875, 1950, 212, # 3062 + 266, 152, 149, 468, 1898, 4066, 4302, 77, 187, 7330, 3018, 37, 5, 2972, 7331, 3876, # 3078 + 7332, 7333, 39, 2517, 4303, 2894, 3177, 2078, 55, 148, 74, 4304, 545, 483, 1474, 1029, # 3094 + 1665, 217, 1869, 1531, 3113, 1104, 2645, 4067, 24, 172, 3507, 900, 3877, 3508, 3509, 4305, # 3110 + 32, 1408, 2811, 1312, 329, 487, 2355, 2247, 2708, 784, 2674, 4, 3019, 3314, 1427, 1788, # 3126 + 188, 109, 499, 7334, 3620, 1717, 1789, 888, 1217, 3020, 4306, 7335, 3510, 7336, 3315, 1520, # 3142 + 3621, 3878, 196, 1034, 775, 7337, 7338, 929, 1815, 249, 439, 38, 7339, 1063, 7340, 794, # 3158 + 3879, 1435, 2296, 46, 178, 3245, 2065, 7341, 2376, 7342, 214, 1709, 4307, 804, 35, 707, # 3174 + 324, 3622, 1601, 2546, 140, 459, 4068, 7343, 7344, 1365, 839, 272, 978, 2257, 2572, 3409, # 3190 + 2128, 1363, 3623, 1423, 697, 100, 3071, 48, 70, 1231, 495, 3114, 2193, 7345, 1294, 7346, # 3206 + 2079, 462, 586, 1042, 3246, 853, 256, 988, 185, 2377, 3410, 1698, 434, 1084, 7347, 3411, # 3222 + 314, 2615, 2775, 4308, 2330, 2331, 569, 2280, 637, 1816, 2518, 757, 1162, 1878, 1616, 3412, # 3238 + 287, 1577, 2115, 768, 4309, 1671, 2854, 3511, 2519, 1321, 3737, 909, 2413, 7348, 4069, 933, # 3254 + 3738, 7349, 2052, 2356, 1222, 4310, 765, 2414, 1322, 786, 4311, 7350, 1919, 1462, 1677, 2895, # 3270 + 1699, 7351, 4312, 1424, 2437, 3115, 3624, 2590, 3316, 1774, 1940, 3413, 3880, 4070, 309, 1369, # 3286 + 1130, 2812, 364, 2230, 1653, 1299, 3881, 3512, 3882, 3883, 2646, 525, 1085, 3021, 902, 2000, # 3302 + 1475, 964, 4313, 421, 1844, 1415, 1057, 2281, 940, 1364, 3116, 376, 4314, 4315, 1381, 7, # 3318 + 2520, 983, 2378, 336, 1710, 2675, 1845, 321, 3414, 559, 1131, 3022, 2742, 1808, 1132, 1313, # 3334 + 265, 1481, 1857, 7352, 352, 1203, 2813, 3247, 167, 1089, 420, 2814, 776, 792, 1724, 3513, # 3350 + 4071, 2438, 3248, 7353, 4072, 7354, 446, 229, 333, 2743, 901, 3739, 1200, 1557, 4316, 2647, # 3366 + 1920, 395, 2744, 2676, 3740, 4073, 1835, 125, 916, 3178, 2616, 4317, 7355, 7356, 3741, 7357, # 3382 + 7358, 7359, 4318, 3117, 3625, 1133, 2547, 1757, 3415, 1510, 2313, 1409, 3514, 7360, 2145, 438, # 3398 + 2591, 2896, 2379, 3317, 1068, 958, 3023, 461, 311, 2855, 2677, 4074, 1915, 3179, 4075, 1978, # 3414 + 383, 750, 2745, 2617, 4076, 274, 539, 385, 1278, 1442, 7361, 1154, 1964, 384, 561, 210, # 3430 + 98, 1295, 2548, 3515, 7362, 1711, 2415, 1482, 3416, 3884, 2897, 1257, 129, 7363, 3742, 642, # 3446 + 523, 2776, 2777, 2648, 7364, 141, 2231, 1333, 68, 176, 441, 876, 907, 4077, 603, 2592, # 3462 + 710, 171, 3417, 404, 549, 18, 3118, 2393, 1410, 3626, 1666, 7365, 3516, 4319, 2898, 4320, # 3478 + 7366, 2973, 368, 7367, 146, 366, 99, 871, 3627, 1543, 748, 807, 1586, 1185, 22, 2258, # 3494 + 379, 3743, 3180, 7368, 3181, 505, 1941, 2618, 1991, 1382, 2314, 7369, 380, 2357, 218, 702, # 3510 + 1817, 1248, 3418, 3024, 3517, 3318, 3249, 7370, 2974, 3628, 930, 3250, 3744, 7371, 59, 7372, # 3526 + 585, 601, 4078, 497, 3419, 1112, 1314, 4321, 1801, 7373, 1223, 1472, 2174, 7374, 749, 1836, # 3542 + 690, 1899, 3745, 1772, 3885, 1476, 429, 1043, 1790, 2232, 2116, 917, 4079, 447, 1086, 1629, # 3558 + 7375, 556, 7376, 7377, 2020, 1654, 844, 1090, 105, 550, 966, 1758, 2815, 1008, 1782, 686, # 3574 + 1095, 7378, 2282, 793, 1602, 7379, 3518, 2593, 4322, 4080, 2933, 2297, 4323, 3746, 980, 2496, # 3590 + 544, 353, 527, 4324, 908, 2678, 2899, 7380, 381, 2619, 1942, 1348, 7381, 1341, 1252, 560, # 3606 + 3072, 7382, 3420, 2856, 7383, 2053, 973, 886, 2080, 143, 4325, 7384, 7385, 157, 3886, 496, # 3622 + 4081, 57, 840, 540, 2038, 4326, 4327, 3421, 2117, 1445, 970, 2259, 1748, 1965, 2081, 4082, # 3638 + 3119, 1234, 1775, 3251, 2816, 3629, 773, 1206, 2129, 1066, 2039, 1326, 3887, 1738, 1725, 4083, # 3654 + 279, 3120, 51, 1544, 2594, 423, 1578, 2130, 2066, 173, 4328, 1879, 7386, 7387, 1583, 264, # 3670 + 610, 3630, 4329, 2439, 280, 154, 7388, 7389, 7390, 1739, 338, 1282, 3073, 693, 2857, 1411, # 3686 + 1074, 3747, 2440, 7391, 4330, 7392, 7393, 1240, 952, 2394, 7394, 2900, 1538, 2679, 685, 1483, # 3702 + 4084, 2468, 1436, 953, 4085, 2054, 4331, 671, 2395, 79, 4086, 2441, 3252, 608, 567, 2680, # 3718 + 3422, 4087, 4088, 1691, 393, 1261, 1791, 2396, 7395, 4332, 7396, 7397, 7398, 7399, 1383, 1672, # 3734 + 3748, 3182, 1464, 522, 1119, 661, 1150, 216, 675, 4333, 3888, 1432, 3519, 609, 4334, 2681, # 3750 + 2397, 7400, 7401, 7402, 4089, 3025, 0, 7403, 2469, 315, 231, 2442, 301, 3319, 4335, 2380, # 3766 + 7404, 233, 4090, 3631, 1818, 4336, 4337, 7405, 96, 1776, 1315, 2082, 7406, 257, 7407, 1809, # 3782 + 3632, 2709, 1139, 1819, 4091, 2021, 1124, 2163, 2778, 1777, 2649, 7408, 3074, 363, 1655, 3183, # 3798 + 7409, 2975, 7410, 7411, 7412, 3889, 1567, 3890, 718, 103, 3184, 849, 1443, 341, 3320, 2934, # 3814 + 1484, 7413, 1712, 127, 67, 339, 4092, 2398, 679, 1412, 821, 7414, 7415, 834, 738, 351, # 3830 + 2976, 2146, 846, 235, 1497, 1880, 418, 1992, 3749, 2710, 186, 1100, 2147, 2746, 3520, 1545, # 3846 + 1355, 2935, 2858, 1377, 583, 3891, 4093, 2573, 2977, 7416, 1298, 3633, 1078, 2549, 3634, 2358, # 3862 + 78, 3750, 3751, 267, 1289, 2099, 2001, 1594, 4094, 348, 369, 1274, 2194, 2175, 1837, 4338, # 3878 + 1820, 2817, 3635, 2747, 2283, 2002, 4339, 2936, 2748, 144, 3321, 882, 4340, 3892, 2749, 3423, # 3894 + 4341, 2901, 7417, 4095, 1726, 320, 7418, 3893, 3026, 788, 2978, 7419, 2818, 1773, 1327, 2859, # 3910 + 3894, 2819, 7420, 1306, 4342, 2003, 1700, 3752, 3521, 2359, 2650, 787, 2022, 506, 824, 3636, # 3926 + 534, 323, 4343, 1044, 3322, 2023, 1900, 946, 3424, 7421, 1778, 1500, 1678, 7422, 1881, 4344, # 3942 + 165, 243, 4345, 3637, 2521, 123, 683, 4096, 764, 4346, 36, 3895, 1792, 589, 2902, 816, # 3958 + 626, 1667, 3027, 2233, 1639, 1555, 1622, 3753, 3896, 7423, 3897, 2860, 1370, 1228, 1932, 891, # 3974 + 2083, 2903, 304, 4097, 7424, 292, 2979, 2711, 3522, 691, 2100, 4098, 1115, 4347, 118, 662, # 3990 + 7425, 611, 1156, 854, 2381, 1316, 2861, 2, 386, 515, 2904, 7426, 7427, 3253, 868, 2234, # 4006 + 1486, 855, 2651, 785, 2212, 3028, 7428, 1040, 3185, 3523, 7429, 3121, 448, 7430, 1525, 7431, # 4022 + 2164, 4348, 7432, 3754, 7433, 4099, 2820, 3524, 3122, 503, 818, 3898, 3123, 1568, 814, 676, # 4038 + 1444, 306, 1749, 7434, 3755, 1416, 1030, 197, 1428, 805, 2821, 1501, 4349, 7435, 7436, 7437, # 4054 + 1993, 7438, 4350, 7439, 7440, 2195, 13, 2779, 3638, 2980, 3124, 1229, 1916, 7441, 3756, 2131, # 4070 + 7442, 4100, 4351, 2399, 3525, 7443, 2213, 1511, 1727, 1120, 7444, 7445, 646, 3757, 2443, 307, # 4086 + 7446, 7447, 1595, 3186, 7448, 7449, 7450, 3639, 1113, 1356, 3899, 1465, 2522, 2523, 7451, 519, # 4102 + 7452, 128, 2132, 92, 2284, 1979, 7453, 3900, 1512, 342, 3125, 2196, 7454, 2780, 2214, 1980, # 4118 + 3323, 7455, 290, 1656, 1317, 789, 827, 2360, 7456, 3758, 4352, 562, 581, 3901, 7457, 401, # 4134 + 4353, 2248, 94, 4354, 1399, 2781, 7458, 1463, 2024, 4355, 3187, 1943, 7459, 828, 1105, 4101, # 4150 + 1262, 1394, 7460, 4102, 605, 4356, 7461, 1783, 2862, 7462, 2822, 819, 2101, 578, 2197, 2937, # 4166 + 7463, 1502, 436, 3254, 4103, 3255, 2823, 3902, 2905, 3425, 3426, 7464, 2712, 2315, 7465, 7466, # 4182 + 2332, 2067, 23, 4357, 193, 826, 3759, 2102, 699, 1630, 4104, 3075, 390, 1793, 1064, 3526, # 4198 + 7467, 1579, 3076, 3077, 1400, 7468, 4105, 1838, 1640, 2863, 7469, 4358, 4359, 137, 4106, 598, # 4214 + 3078, 1966, 780, 104, 974, 2938, 7470, 278, 899, 253, 402, 572, 504, 493, 1339, 7471, # 4230 + 3903, 1275, 4360, 2574, 2550, 7472, 3640, 3029, 3079, 2249, 565, 1334, 2713, 863, 41, 7473, # 4246 + 7474, 4361, 7475, 1657, 2333, 19, 463, 2750, 4107, 606, 7476, 2981, 3256, 1087, 2084, 1323, # 4262 + 2652, 2982, 7477, 1631, 1623, 1750, 4108, 2682, 7478, 2864, 791, 2714, 2653, 2334, 232, 2416, # 4278 + 7479, 2983, 1498, 7480, 2654, 2620, 755, 1366, 3641, 3257, 3126, 2025, 1609, 119, 1917, 3427, # 4294 + 862, 1026, 4109, 7481, 3904, 3760, 4362, 3905, 4363, 2260, 1951, 2470, 7482, 1125, 817, 4110, # 4310 + 4111, 3906, 1513, 1766, 2040, 1487, 4112, 3030, 3258, 2824, 3761, 3127, 7483, 7484, 1507, 7485, # 4326 + 2683, 733, 40, 1632, 1106, 2865, 345, 4113, 841, 2524, 230, 4364, 2984, 1846, 3259, 3428, # 4342 + 7486, 1263, 986, 3429, 7487, 735, 879, 254, 1137, 857, 622, 1300, 1180, 1388, 1562, 3907, # 4358 + 3908, 2939, 967, 2751, 2655, 1349, 592, 2133, 1692, 3324, 2985, 1994, 4114, 1679, 3909, 1901, # 4374 + 2185, 7488, 739, 3642, 2715, 1296, 1290, 7489, 4115, 2198, 2199, 1921, 1563, 2595, 2551, 1870, # 4390 + 2752, 2986, 7490, 435, 7491, 343, 1108, 596, 17, 1751, 4365, 2235, 3430, 3643, 7492, 4366, # 4406 + 294, 3527, 2940, 1693, 477, 979, 281, 2041, 3528, 643, 2042, 3644, 2621, 2782, 2261, 1031, # 4422 + 2335, 2134, 2298, 3529, 4367, 367, 1249, 2552, 7493, 3530, 7494, 4368, 1283, 3325, 2004, 240, # 4438 + 1762, 3326, 4369, 4370, 836, 1069, 3128, 474, 7495, 2148, 2525, 268, 3531, 7496, 3188, 1521, # 4454 + 1284, 7497, 1658, 1546, 4116, 7498, 3532, 3533, 7499, 4117, 3327, 2684, 1685, 4118, 961, 1673, # 4470 + 2622, 190, 2005, 2200, 3762, 4371, 4372, 7500, 570, 2497, 3645, 1490, 7501, 4373, 2623, 3260, # 4486 + 1956, 4374, 584, 1514, 396, 1045, 1944, 7502, 4375, 1967, 2444, 7503, 7504, 4376, 3910, 619, # 4502 + 7505, 3129, 3261, 215, 2006, 2783, 2553, 3189, 4377, 3190, 4378, 763, 4119, 3763, 4379, 7506, # 4518 + 7507, 1957, 1767, 2941, 3328, 3646, 1174, 452, 1477, 4380, 3329, 3130, 7508, 2825, 1253, 2382, # 4534 + 2186, 1091, 2285, 4120, 492, 7509, 638, 1169, 1824, 2135, 1752, 3911, 648, 926, 1021, 1324, # 4550 + 4381, 520, 4382, 997, 847, 1007, 892, 4383, 3764, 2262, 1871, 3647, 7510, 2400, 1784, 4384, # 4566 + 1952, 2942, 3080, 3191, 1728, 4121, 2043, 3648, 4385, 2007, 1701, 3131, 1551, 30, 2263, 4122, # 4582 + 7511, 2026, 4386, 3534, 7512, 501, 7513, 4123, 594, 3431, 2165, 1821, 3535, 3432, 3536, 3192, # 4598 + 829, 2826, 4124, 7514, 1680, 3132, 1225, 4125, 7515, 3262, 4387, 4126, 3133, 2336, 7516, 4388, # 4614 + 4127, 7517, 3912, 3913, 7518, 1847, 2383, 2596, 3330, 7519, 4389, 374, 3914, 652, 4128, 4129, # 4630 + 375, 1140, 798, 7520, 7521, 7522, 2361, 4390, 2264, 546, 1659, 138, 3031, 2445, 4391, 7523, # 4646 + 2250, 612, 1848, 910, 796, 3765, 1740, 1371, 825, 3766, 3767, 7524, 2906, 2554, 7525, 692, # 4662 + 444, 3032, 2624, 801, 4392, 4130, 7526, 1491, 244, 1053, 3033, 4131, 4132, 340, 7527, 3915, # 4678 + 1041, 2987, 293, 1168, 87, 1357, 7528, 1539, 959, 7529, 2236, 721, 694, 4133, 3768, 219, # 4694 + 1478, 644, 1417, 3331, 2656, 1413, 1401, 1335, 1389, 3916, 7530, 7531, 2988, 2362, 3134, 1825, # 4710 + 730, 1515, 184, 2827, 66, 4393, 7532, 1660, 2943, 246, 3332, 378, 1457, 226, 3433, 975, # 4726 + 3917, 2944, 1264, 3537, 674, 696, 7533, 163, 7534, 1141, 2417, 2166, 713, 3538, 3333, 4394, # 4742 + 3918, 7535, 7536, 1186, 15, 7537, 1079, 1070, 7538, 1522, 3193, 3539, 276, 1050, 2716, 758, # 4758 + 1126, 653, 2945, 3263, 7539, 2337, 889, 3540, 3919, 3081, 2989, 903, 1250, 4395, 3920, 3434, # 4774 + 3541, 1342, 1681, 1718, 766, 3264, 286, 89, 2946, 3649, 7540, 1713, 7541, 2597, 3334, 2990, # 4790 + 7542, 2947, 2215, 3194, 2866, 7543, 4396, 2498, 2526, 181, 387, 1075, 3921, 731, 2187, 3335, # 4806 + 7544, 3265, 310, 313, 3435, 2299, 770, 4134, 54, 3034, 189, 4397, 3082, 3769, 3922, 7545, # 4822 + 1230, 1617, 1849, 355, 3542, 4135, 4398, 3336, 111, 4136, 3650, 1350, 3135, 3436, 3035, 4137, # 4838 + 2149, 3266, 3543, 7546, 2784, 3923, 3924, 2991, 722, 2008, 7547, 1071, 247, 1207, 2338, 2471, # 4854 + 1378, 4399, 2009, 864, 1437, 1214, 4400, 373, 3770, 1142, 2216, 667, 4401, 442, 2753, 2555, # 4870 + 3771, 3925, 1968, 4138, 3267, 1839, 837, 170, 1107, 934, 1336, 1882, 7548, 7549, 2118, 4139, # 4886 + 2828, 743, 1569, 7550, 4402, 4140, 582, 2384, 1418, 3437, 7551, 1802, 7552, 357, 1395, 1729, # 4902 + 3651, 3268, 2418, 1564, 2237, 7553, 3083, 3772, 1633, 4403, 1114, 2085, 4141, 1532, 7554, 482, # 4918 + 2446, 4404, 7555, 7556, 1492, 833, 1466, 7557, 2717, 3544, 1641, 2829, 7558, 1526, 1272, 3652, # 4934 + 4142, 1686, 1794, 416, 2556, 1902, 1953, 1803, 7559, 3773, 2785, 3774, 1159, 2316, 7560, 2867, # 4950 + 4405, 1610, 1584, 3036, 2419, 2754, 443, 3269, 1163, 3136, 7561, 7562, 3926, 7563, 4143, 2499, # 4966 + 3037, 4406, 3927, 3137, 2103, 1647, 3545, 2010, 1872, 4144, 7564, 4145, 431, 3438, 7565, 250, # 4982 + 97, 81, 4146, 7566, 1648, 1850, 1558, 160, 848, 7567, 866, 740, 1694, 7568, 2201, 2830, # 4998 + 3195, 4147, 4407, 3653, 1687, 950, 2472, 426, 469, 3196, 3654, 3655, 3928, 7569, 7570, 1188, # 5014 + 424, 1995, 861, 3546, 4148, 3775, 2202, 2685, 168, 1235, 3547, 4149, 7571, 2086, 1674, 4408, # 5030 + 3337, 3270, 220, 2557, 1009, 7572, 3776, 670, 2992, 332, 1208, 717, 7573, 7574, 3548, 2447, # 5046 + 3929, 3338, 7575, 513, 7576, 1209, 2868, 3339, 3138, 4409, 1080, 7577, 7578, 7579, 7580, 2527, # 5062 + 3656, 3549, 815, 1587, 3930, 3931, 7581, 3550, 3439, 3777, 1254, 4410, 1328, 3038, 1390, 3932, # 5078 + 1741, 3933, 3778, 3934, 7582, 236, 3779, 2448, 3271, 7583, 7584, 3657, 3780, 1273, 3781, 4411, # 5094 + 7585, 308, 7586, 4412, 245, 4413, 1851, 2473, 1307, 2575, 430, 715, 2136, 2449, 7587, 270, # 5110 + 199, 2869, 3935, 7588, 3551, 2718, 1753, 761, 1754, 725, 1661, 1840, 4414, 3440, 3658, 7589, # 5126 + 7590, 587, 14, 3272, 227, 2598, 326, 480, 2265, 943, 2755, 3552, 291, 650, 1883, 7591, # 5142 + 1702, 1226, 102, 1547, 62, 3441, 904, 4415, 3442, 1164, 4150, 7592, 7593, 1224, 1548, 2756, # 5158 + 391, 498, 1493, 7594, 1386, 1419, 7595, 2055, 1177, 4416, 813, 880, 1081, 2363, 566, 1145, # 5174 + 4417, 2286, 1001, 1035, 2558, 2599, 2238, 394, 1286, 7596, 7597, 2068, 7598, 86, 1494, 1730, # 5190 + 3936, 491, 1588, 745, 897, 2948, 843, 3340, 3937, 2757, 2870, 3273, 1768, 998, 2217, 2069, # 5206 + 397, 1826, 1195, 1969, 3659, 2993, 3341, 284, 7599, 3782, 2500, 2137, 2119, 1903, 7600, 3938, # 5222 + 2150, 3939, 4151, 1036, 3443, 1904, 114, 2559, 4152, 209, 1527, 7601, 7602, 2949, 2831, 2625, # 5238 + 2385, 2719, 3139, 812, 2560, 7603, 3274, 7604, 1559, 737, 1884, 3660, 1210, 885, 28, 2686, # 5254 + 3553, 3783, 7605, 4153, 1004, 1779, 4418, 7606, 346, 1981, 2218, 2687, 4419, 3784, 1742, 797, # 5270 + 1642, 3940, 1933, 1072, 1384, 2151, 896, 3941, 3275, 3661, 3197, 2871, 3554, 7607, 2561, 1958, # 5286 + 4420, 2450, 1785, 7608, 7609, 7610, 3942, 4154, 1005, 1308, 3662, 4155, 2720, 4421, 4422, 1528, # 5302 + 2600, 161, 1178, 4156, 1982, 987, 4423, 1101, 4157, 631, 3943, 1157, 3198, 2420, 1343, 1241, # 5318 + 1016, 2239, 2562, 372, 877, 2339, 2501, 1160, 555, 1934, 911, 3944, 7611, 466, 1170, 169, # 5334 + 1051, 2907, 2688, 3663, 2474, 2994, 1182, 2011, 2563, 1251, 2626, 7612, 992, 2340, 3444, 1540, # 5350 + 2721, 1201, 2070, 2401, 1996, 2475, 7613, 4424, 528, 1922, 2188, 1503, 1873, 1570, 2364, 3342, # 5366 + 3276, 7614, 557, 1073, 7615, 1827, 3445, 2087, 2266, 3140, 3039, 3084, 767, 3085, 2786, 4425, # 5382 + 1006, 4158, 4426, 2341, 1267, 2176, 3664, 3199, 778, 3945, 3200, 2722, 1597, 2657, 7616, 4427, # 5398 + 7617, 3446, 7618, 7619, 7620, 3277, 2689, 1433, 3278, 131, 95, 1504, 3946, 723, 4159, 3141, # 5414 + 1841, 3555, 2758, 2189, 3947, 2027, 2104, 3665, 7621, 2995, 3948, 1218, 7622, 3343, 3201, 3949, # 5430 + 4160, 2576, 248, 1634, 3785, 912, 7623, 2832, 3666, 3040, 3786, 654, 53, 7624, 2996, 7625, # 5446 + 1688, 4428, 777, 3447, 1032, 3950, 1425, 7626, 191, 820, 2120, 2833, 971, 4429, 931, 3202, # 5462 + 135, 664, 783, 3787, 1997, 772, 2908, 1935, 3951, 3788, 4430, 2909, 3203, 282, 2723, 640, # 5478 + 1372, 3448, 1127, 922, 325, 3344, 7627, 7628, 711, 2044, 7629, 7630, 3952, 2219, 2787, 1936, # 5494 + 3953, 3345, 2220, 2251, 3789, 2300, 7631, 4431, 3790, 1258, 3279, 3954, 3204, 2138, 2950, 3955, # 5510 + 3956, 7632, 2221, 258, 3205, 4432, 101, 1227, 7633, 3280, 1755, 7634, 1391, 3281, 7635, 2910, # 5526 + 2056, 893, 7636, 7637, 7638, 1402, 4161, 2342, 7639, 7640, 3206, 3556, 7641, 7642, 878, 1325, # 5542 + 1780, 2788, 4433, 259, 1385, 2577, 744, 1183, 2267, 4434, 7643, 3957, 2502, 7644, 684, 1024, # 5558 + 4162, 7645, 472, 3557, 3449, 1165, 3282, 3958, 3959, 322, 2152, 881, 455, 1695, 1152, 1340, # 5574 + 660, 554, 2153, 4435, 1058, 4436, 4163, 830, 1065, 3346, 3960, 4437, 1923, 7646, 1703, 1918, # 5590 + 7647, 932, 2268, 122, 7648, 4438, 947, 677, 7649, 3791, 2627, 297, 1905, 1924, 2269, 4439, # 5606 + 2317, 3283, 7650, 7651, 4164, 7652, 4165, 84, 4166, 112, 989, 7653, 547, 1059, 3961, 701, # 5622 + 3558, 1019, 7654, 4167, 7655, 3450, 942, 639, 457, 2301, 2451, 993, 2951, 407, 851, 494, # 5638 + 4440, 3347, 927, 7656, 1237, 7657, 2421, 3348, 573, 4168, 680, 921, 2911, 1279, 1874, 285, # 5654 + 790, 1448, 1983, 719, 2167, 7658, 7659, 4441, 3962, 3963, 1649, 7660, 1541, 563, 7661, 1077, # 5670 + 7662, 3349, 3041, 3451, 511, 2997, 3964, 3965, 3667, 3966, 1268, 2564, 3350, 3207, 4442, 4443, # 5686 + 7663, 535, 1048, 1276, 1189, 2912, 2028, 3142, 1438, 1373, 2834, 2952, 1134, 2012, 7664, 4169, # 5702 + 1238, 2578, 3086, 1259, 7665, 700, 7666, 2953, 3143, 3668, 4170, 7667, 4171, 1146, 1875, 1906, # 5718 + 4444, 2601, 3967, 781, 2422, 132, 1589, 203, 147, 273, 2789, 2402, 898, 1786, 2154, 3968, # 5734 + 3969, 7668, 3792, 2790, 7669, 7670, 4445, 4446, 7671, 3208, 7672, 1635, 3793, 965, 7673, 1804, # 5750 + 2690, 1516, 3559, 1121, 1082, 1329, 3284, 3970, 1449, 3794, 65, 1128, 2835, 2913, 2759, 1590, # 5766 + 3795, 7674, 7675, 12, 2658, 45, 976, 2579, 3144, 4447, 517, 2528, 1013, 1037, 3209, 7676, # 5782 + 3796, 2836, 7677, 3797, 7678, 3452, 7679, 2602, 614, 1998, 2318, 3798, 3087, 2724, 2628, 7680, # 5798 + 2580, 4172, 599, 1269, 7681, 1810, 3669, 7682, 2691, 3088, 759, 1060, 489, 1805, 3351, 3285, # 5814 + 1358, 7683, 7684, 2386, 1387, 1215, 2629, 2252, 490, 7685, 7686, 4173, 1759, 2387, 2343, 7687, # 5830 + 4448, 3799, 1907, 3971, 2630, 1806, 3210, 4449, 3453, 3286, 2760, 2344, 874, 7688, 7689, 3454, # 5846 + 3670, 1858, 91, 2914, 3671, 3042, 3800, 4450, 7690, 3145, 3972, 2659, 7691, 3455, 1202, 1403, # 5862 + 3801, 2954, 2529, 1517, 2503, 4451, 3456, 2504, 7692, 4452, 7693, 2692, 1885, 1495, 1731, 3973, # 5878 + 2365, 4453, 7694, 2029, 7695, 7696, 3974, 2693, 1216, 237, 2581, 4174, 2319, 3975, 3802, 4454, # 5894 + 4455, 2694, 3560, 3457, 445, 4456, 7697, 7698, 7699, 7700, 2761, 61, 3976, 3672, 1822, 3977, # 5910 + 7701, 687, 2045, 935, 925, 405, 2660, 703, 1096, 1859, 2725, 4457, 3978, 1876, 1367, 2695, # 5926 + 3352, 918, 2105, 1781, 2476, 334, 3287, 1611, 1093, 4458, 564, 3146, 3458, 3673, 3353, 945, # 5942 + 2631, 2057, 4459, 7702, 1925, 872, 4175, 7703, 3459, 2696, 3089, 349, 4176, 3674, 3979, 4460, # 5958 + 3803, 4177, 3675, 2155, 3980, 4461, 4462, 4178, 4463, 2403, 2046, 782, 3981, 400, 251, 4179, # 5974 + 1624, 7704, 7705, 277, 3676, 299, 1265, 476, 1191, 3804, 2121, 4180, 4181, 1109, 205, 7706, # 5990 + 2582, 1000, 2156, 3561, 1860, 7707, 7708, 7709, 4464, 7710, 4465, 2565, 107, 2477, 2157, 3982, # 6006 + 3460, 3147, 7711, 1533, 541, 1301, 158, 753, 4182, 2872, 3562, 7712, 1696, 370, 1088, 4183, # 6022 + 4466, 3563, 579, 327, 440, 162, 2240, 269, 1937, 1374, 3461, 968, 3043, 56, 1396, 3090, # 6038 + 2106, 3288, 3354, 7713, 1926, 2158, 4467, 2998, 7714, 3564, 7715, 7716, 3677, 4468, 2478, 7717, # 6054 + 2791, 7718, 1650, 4469, 7719, 2603, 7720, 7721, 3983, 2661, 3355, 1149, 3356, 3984, 3805, 3985, # 6070 + 7722, 1076, 49, 7723, 951, 3211, 3289, 3290, 450, 2837, 920, 7724, 1811, 2792, 2366, 4184, # 6086 + 1908, 1138, 2367, 3806, 3462, 7725, 3212, 4470, 1909, 1147, 1518, 2423, 4471, 3807, 7726, 4472, # 6102 + 2388, 2604, 260, 1795, 3213, 7727, 7728, 3808, 3291, 708, 7729, 3565, 1704, 7730, 3566, 1351, # 6118 + 1618, 3357, 2999, 1886, 944, 4185, 3358, 4186, 3044, 3359, 4187, 7731, 3678, 422, 413, 1714, # 6134 + 3292, 500, 2058, 2345, 4188, 2479, 7732, 1344, 1910, 954, 7733, 1668, 7734, 7735, 3986, 2404, # 6150 + 4189, 3567, 3809, 4190, 7736, 2302, 1318, 2505, 3091, 133, 3092, 2873, 4473, 629, 31, 2838, # 6166 + 2697, 3810, 4474, 850, 949, 4475, 3987, 2955, 1732, 2088, 4191, 1496, 1852, 7737, 3988, 620, # 6182 + 3214, 981, 1242, 3679, 3360, 1619, 3680, 1643, 3293, 2139, 2452, 1970, 1719, 3463, 2168, 7738, # 6198 + 3215, 7739, 7740, 3361, 1828, 7741, 1277, 4476, 1565, 2047, 7742, 1636, 3568, 3093, 7743, 869, # 6214 + 2839, 655, 3811, 3812, 3094, 3989, 3000, 3813, 1310, 3569, 4477, 7744, 7745, 7746, 1733, 558, # 6230 + 4478, 3681, 335, 1549, 3045, 1756, 4192, 3682, 1945, 3464, 1829, 1291, 1192, 470, 2726, 2107, # 6246 + 2793, 913, 1054, 3990, 7747, 1027, 7748, 3046, 3991, 4479, 982, 2662, 3362, 3148, 3465, 3216, # 6262 + 3217, 1946, 2794, 7749, 571, 4480, 7750, 1830, 7751, 3570, 2583, 1523, 2424, 7752, 2089, 984, # 6278 + 4481, 3683, 1959, 7753, 3684, 852, 923, 2795, 3466, 3685, 969, 1519, 999, 2048, 2320, 1705, # 6294 + 7754, 3095, 615, 1662, 151, 597, 3992, 2405, 2321, 1049, 275, 4482, 3686, 4193, 568, 3687, # 6310 + 3571, 2480, 4194, 3688, 7755, 2425, 2270, 409, 3218, 7756, 1566, 2874, 3467, 1002, 769, 2840, # 6326 + 194, 2090, 3149, 3689, 2222, 3294, 4195, 628, 1505, 7757, 7758, 1763, 2177, 3001, 3993, 521, # 6342 + 1161, 2584, 1787, 2203, 2406, 4483, 3994, 1625, 4196, 4197, 412, 42, 3096, 464, 7759, 2632, # 6358 + 4484, 3363, 1760, 1571, 2875, 3468, 2530, 1219, 2204, 3814, 2633, 2140, 2368, 4485, 4486, 3295, # 6374 + 1651, 3364, 3572, 7760, 7761, 3573, 2481, 3469, 7762, 3690, 7763, 7764, 2271, 2091, 460, 7765, # 6390 + 4487, 7766, 3002, 962, 588, 3574, 289, 3219, 2634, 1116, 52, 7767, 3047, 1796, 7768, 7769, # 6406 + 7770, 1467, 7771, 1598, 1143, 3691, 4198, 1984, 1734, 1067, 4488, 1280, 3365, 465, 4489, 1572, # 6422 + 510, 7772, 1927, 2241, 1812, 1644, 3575, 7773, 4490, 3692, 7774, 7775, 2663, 1573, 1534, 7776, # 6438 + 7777, 4199, 536, 1807, 1761, 3470, 3815, 3150, 2635, 7778, 7779, 7780, 4491, 3471, 2915, 1911, # 6454 + 2796, 7781, 3296, 1122, 377, 3220, 7782, 360, 7783, 7784, 4200, 1529, 551, 7785, 2059, 3693, # 6470 + 1769, 2426, 7786, 2916, 4201, 3297, 3097, 2322, 2108, 2030, 4492, 1404, 136, 1468, 1479, 672, # 6486 + 1171, 3221, 2303, 271, 3151, 7787, 2762, 7788, 2049, 678, 2727, 865, 1947, 4493, 7789, 2013, # 6502 + 3995, 2956, 7790, 2728, 2223, 1397, 3048, 3694, 4494, 4495, 1735, 2917, 3366, 3576, 7791, 3816, # 6518 + 509, 2841, 2453, 2876, 3817, 7792, 7793, 3152, 3153, 4496, 4202, 2531, 4497, 2304, 1166, 1010, # 6534 + 552, 681, 1887, 7794, 7795, 2957, 2958, 3996, 1287, 1596, 1861, 3154, 358, 453, 736, 175, # 6550 + 478, 1117, 905, 1167, 1097, 7796, 1853, 1530, 7797, 1706, 7798, 2178, 3472, 2287, 3695, 3473, # 6566 + 3577, 4203, 2092, 4204, 7799, 3367, 1193, 2482, 4205, 1458, 2190, 2205, 1862, 1888, 1421, 3298, # 6582 + 2918, 3049, 2179, 3474, 595, 2122, 7800, 3997, 7801, 7802, 4206, 1707, 2636, 223, 3696, 1359, # 6598 + 751, 3098, 183, 3475, 7803, 2797, 3003, 419, 2369, 633, 704, 3818, 2389, 241, 7804, 7805, # 6614 + 7806, 838, 3004, 3697, 2272, 2763, 2454, 3819, 1938, 2050, 3998, 1309, 3099, 2242, 1181, 7807, # 6630 + 1136, 2206, 3820, 2370, 1446, 4207, 2305, 4498, 7808, 7809, 4208, 1055, 2605, 484, 3698, 7810, # 6646 + 3999, 625, 4209, 2273, 3368, 1499, 4210, 4000, 7811, 4001, 4211, 3222, 2274, 2275, 3476, 7812, # 6662 + 7813, 2764, 808, 2606, 3699, 3369, 4002, 4212, 3100, 2532, 526, 3370, 3821, 4213, 955, 7814, # 6678 + 1620, 4214, 2637, 2427, 7815, 1429, 3700, 1669, 1831, 994, 928, 7816, 3578, 1260, 7817, 7818, # 6694 + 7819, 1948, 2288, 741, 2919, 1626, 4215, 2729, 2455, 867, 1184, 362, 3371, 1392, 7820, 7821, # 6710 + 4003, 4216, 1770, 1736, 3223, 2920, 4499, 4500, 1928, 2698, 1459, 1158, 7822, 3050, 3372, 2877, # 6726 + 1292, 1929, 2506, 2842, 3701, 1985, 1187, 2071, 2014, 2607, 4217, 7823, 2566, 2507, 2169, 3702, # 6742 + 2483, 3299, 7824, 3703, 4501, 7825, 7826, 666, 1003, 3005, 1022, 3579, 4218, 7827, 4502, 1813, # 6758 + 2253, 574, 3822, 1603, 295, 1535, 705, 3823, 4219, 283, 858, 417, 7828, 7829, 3224, 4503, # 6774 + 4504, 3051, 1220, 1889, 1046, 2276, 2456, 4004, 1393, 1599, 689, 2567, 388, 4220, 7830, 2484, # 6790 + 802, 7831, 2798, 3824, 2060, 1405, 2254, 7832, 4505, 3825, 2109, 1052, 1345, 3225, 1585, 7833, # 6806 + 809, 7834, 7835, 7836, 575, 2730, 3477, 956, 1552, 1469, 1144, 2323, 7837, 2324, 1560, 2457, # 6822 + 3580, 3226, 4005, 616, 2207, 3155, 2180, 2289, 7838, 1832, 7839, 3478, 4506, 7840, 1319, 3704, # 6838 + 3705, 1211, 3581, 1023, 3227, 1293, 2799, 7841, 7842, 7843, 3826, 607, 2306, 3827, 762, 2878, # 6854 + 1439, 4221, 1360, 7844, 1485, 3052, 7845, 4507, 1038, 4222, 1450, 2061, 2638, 4223, 1379, 4508, # 6870 + 2585, 7846, 7847, 4224, 1352, 1414, 2325, 2921, 1172, 7848, 7849, 3828, 3829, 7850, 1797, 1451, # 6886 + 7851, 7852, 7853, 7854, 2922, 4006, 4007, 2485, 2346, 411, 4008, 4009, 3582, 3300, 3101, 4509, # 6902 + 1561, 2664, 1452, 4010, 1375, 7855, 7856, 47, 2959, 316, 7857, 1406, 1591, 2923, 3156, 7858, # 6918 + 1025, 2141, 3102, 3157, 354, 2731, 884, 2224, 4225, 2407, 508, 3706, 726, 3583, 996, 2428, # 6934 + 3584, 729, 7859, 392, 2191, 1453, 4011, 4510, 3707, 7860, 7861, 2458, 3585, 2608, 1675, 2800, # 6950 + 919, 2347, 2960, 2348, 1270, 4511, 4012, 73, 7862, 7863, 647, 7864, 3228, 2843, 2255, 1550, # 6966 + 1346, 3006, 7865, 1332, 883, 3479, 7866, 7867, 7868, 7869, 3301, 2765, 7870, 1212, 831, 1347, # 6982 + 4226, 4512, 2326, 3830, 1863, 3053, 720, 3831, 4513, 4514, 3832, 7871, 4227, 7872, 7873, 4515, # 6998 + 7874, 7875, 1798, 4516, 3708, 2609, 4517, 3586, 1645, 2371, 7876, 7877, 2924, 669, 2208, 2665, # 7014 + 2429, 7878, 2879, 7879, 7880, 1028, 3229, 7881, 4228, 2408, 7882, 2256, 1353, 7883, 7884, 4518, # 7030 + 3158, 518, 7885, 4013, 7886, 4229, 1960, 7887, 2142, 4230, 7888, 7889, 3007, 2349, 2350, 3833, # 7046 + 516, 1833, 1454, 4014, 2699, 4231, 4519, 2225, 2610, 1971, 1129, 3587, 7890, 2766, 7891, 2961, # 7062 + 1422, 577, 1470, 3008, 1524, 3373, 7892, 7893, 432, 4232, 3054, 3480, 7894, 2586, 1455, 2508, # 7078 + 2226, 1972, 1175, 7895, 1020, 2732, 4015, 3481, 4520, 7896, 2733, 7897, 1743, 1361, 3055, 3482, # 7094 + 2639, 4016, 4233, 4521, 2290, 895, 924, 4234, 2170, 331, 2243, 3056, 166, 1627, 3057, 1098, # 7110 + 7898, 1232, 2880, 2227, 3374, 4522, 657, 403, 1196, 2372, 542, 3709, 3375, 1600, 4235, 3483, # 7126 + 7899, 4523, 2767, 3230, 576, 530, 1362, 7900, 4524, 2533, 2666, 3710, 4017, 7901, 842, 3834, # 7142 + 7902, 2801, 2031, 1014, 4018, 213, 2700, 3376, 665, 621, 4236, 7903, 3711, 2925, 2430, 7904, # 7158 + 2431, 3302, 3588, 3377, 7905, 4237, 2534, 4238, 4525, 3589, 1682, 4239, 3484, 1380, 7906, 724, # 7174 + 2277, 600, 1670, 7907, 1337, 1233, 4526, 3103, 2244, 7908, 1621, 4527, 7909, 651, 4240, 7910, # 7190 + 1612, 4241, 2611, 7911, 2844, 7912, 2734, 2307, 3058, 7913, 716, 2459, 3059, 174, 1255, 2701, # 7206 + 4019, 3590, 548, 1320, 1398, 728, 4020, 1574, 7914, 1890, 1197, 3060, 4021, 7915, 3061, 3062, # 7222 + 3712, 3591, 3713, 747, 7916, 635, 4242, 4528, 7917, 7918, 7919, 4243, 7920, 7921, 4529, 7922, # 7238 + 3378, 4530, 2432, 451, 7923, 3714, 2535, 2072, 4244, 2735, 4245, 4022, 7924, 1764, 4531, 7925, # 7254 + 4246, 350, 7926, 2278, 2390, 2486, 7927, 4247, 4023, 2245, 1434, 4024, 488, 4532, 458, 4248, # 7270 + 4025, 3715, 771, 1330, 2391, 3835, 2568, 3159, 2159, 2409, 1553, 2667, 3160, 4249, 7928, 2487, # 7286 + 2881, 2612, 1720, 2702, 4250, 3379, 4533, 7929, 2536, 4251, 7930, 3231, 4252, 2768, 7931, 2015, # 7302 + 2736, 7932, 1155, 1017, 3716, 3836, 7933, 3303, 2308, 201, 1864, 4253, 1430, 7934, 4026, 7935, # 7318 + 7936, 7937, 7938, 7939, 4254, 1604, 7940, 414, 1865, 371, 2587, 4534, 4535, 3485, 2016, 3104, # 7334 + 4536, 1708, 960, 4255, 887, 389, 2171, 1536, 1663, 1721, 7941, 2228, 4027, 2351, 2926, 1580, # 7350 + 7942, 7943, 7944, 1744, 7945, 2537, 4537, 4538, 7946, 4539, 7947, 2073, 7948, 7949, 3592, 3380, # 7366 + 2882, 4256, 7950, 4257, 2640, 3381, 2802, 673, 2703, 2460, 709, 3486, 4028, 3593, 4258, 7951, # 7382 + 1148, 502, 634, 7952, 7953, 1204, 4540, 3594, 1575, 4541, 2613, 3717, 7954, 3718, 3105, 948, # 7398 + 3232, 121, 1745, 3837, 1110, 7955, 4259, 3063, 2509, 3009, 4029, 3719, 1151, 1771, 3838, 1488, # 7414 + 4030, 1986, 7956, 2433, 3487, 7957, 7958, 2093, 7959, 4260, 3839, 1213, 1407, 2803, 531, 2737, # 7430 + 2538, 3233, 1011, 1537, 7960, 2769, 4261, 3106, 1061, 7961, 3720, 3721, 1866, 2883, 7962, 2017, # 7446 + 120, 4262, 4263, 2062, 3595, 3234, 2309, 3840, 2668, 3382, 1954, 4542, 7963, 7964, 3488, 1047, # 7462 + 2704, 1266, 7965, 1368, 4543, 2845, 649, 3383, 3841, 2539, 2738, 1102, 2846, 2669, 7966, 7967, # 7478 + 1999, 7968, 1111, 3596, 2962, 7969, 2488, 3842, 3597, 2804, 1854, 3384, 3722, 7970, 7971, 3385, # 7494 + 2410, 2884, 3304, 3235, 3598, 7972, 2569, 7973, 3599, 2805, 4031, 1460, 856, 7974, 3600, 7975, # 7510 + 2885, 2963, 7976, 2886, 3843, 7977, 4264, 632, 2510, 875, 3844, 1697, 3845, 2291, 7978, 7979, # 7526 + 4544, 3010, 1239, 580, 4545, 4265, 7980, 914, 936, 2074, 1190, 4032, 1039, 2123, 7981, 7982, # 7542 + 7983, 3386, 1473, 7984, 1354, 4266, 3846, 7985, 2172, 3064, 4033, 915, 3305, 4267, 4268, 3306, # 7558 + 1605, 1834, 7986, 2739, 398, 3601, 4269, 3847, 4034, 328, 1912, 2847, 4035, 3848, 1331, 4270, # 7574 + 3011, 937, 4271, 7987, 3602, 4036, 4037, 3387, 2160, 4546, 3388, 524, 742, 538, 3065, 1012, # 7590 + 7988, 7989, 3849, 2461, 7990, 658, 1103, 225, 3850, 7991, 7992, 4547, 7993, 4548, 7994, 3236, # 7606 + 1243, 7995, 4038, 963, 2246, 4549, 7996, 2705, 3603, 3161, 7997, 7998, 2588, 2327, 7999, 4550, # 7622 + 8000, 8001, 8002, 3489, 3307, 957, 3389, 2540, 2032, 1930, 2927, 2462, 870, 2018, 3604, 1746, # 7638 + 2770, 2771, 2434, 2463, 8003, 3851, 8004, 3723, 3107, 3724, 3490, 3390, 3725, 8005, 1179, 3066, # 7654 + 8006, 3162, 2373, 4272, 3726, 2541, 3163, 3108, 2740, 4039, 8007, 3391, 1556, 2542, 2292, 977, # 7670 + 2887, 2033, 4040, 1205, 3392, 8008, 1765, 3393, 3164, 2124, 1271, 1689, 714, 4551, 3491, 8009, # 7686 + 2328, 3852, 533, 4273, 3605, 2181, 617, 8010, 2464, 3308, 3492, 2310, 8011, 8012, 3165, 8013, # 7702 + 8014, 3853, 1987, 618, 427, 2641, 3493, 3394, 8015, 8016, 1244, 1690, 8017, 2806, 4274, 4552, # 7718 + 8018, 3494, 8019, 8020, 2279, 1576, 473, 3606, 4275, 3395, 972, 8021, 3607, 8022, 3067, 8023, # 7734 + 8024, 4553, 4554, 8025, 3727, 4041, 4042, 8026, 153, 4555, 356, 8027, 1891, 2888, 4276, 2143, # 7750 + 408, 803, 2352, 8028, 3854, 8029, 4277, 1646, 2570, 2511, 4556, 4557, 3855, 8030, 3856, 4278, # 7766 + 8031, 2411, 3396, 752, 8032, 8033, 1961, 2964, 8034, 746, 3012, 2465, 8035, 4279, 3728, 698, # 7782 + 4558, 1892, 4280, 3608, 2543, 4559, 3609, 3857, 8036, 3166, 3397, 8037, 1823, 1302, 4043, 2706, # 7798 + 3858, 1973, 4281, 8038, 4282, 3167, 823, 1303, 1288, 1236, 2848, 3495, 4044, 3398, 774, 3859, # 7814 + 8039, 1581, 4560, 1304, 2849, 3860, 4561, 8040, 2435, 2161, 1083, 3237, 4283, 4045, 4284, 344, # 7830 + 1173, 288, 2311, 454, 1683, 8041, 8042, 1461, 4562, 4046, 2589, 8043, 8044, 4563, 985, 894, # 7846 + 8045, 3399, 3168, 8046, 1913, 2928, 3729, 1988, 8047, 2110, 1974, 8048, 4047, 8049, 2571, 1194, # 7862 + 425, 8050, 4564, 3169, 1245, 3730, 4285, 8051, 8052, 2850, 8053, 636, 4565, 1855, 3861, 760, # 7878 + 1799, 8054, 4286, 2209, 1508, 4566, 4048, 1893, 1684, 2293, 8055, 8056, 8057, 4287, 4288, 2210, # 7894 + 479, 8058, 8059, 832, 8060, 4049, 2489, 8061, 2965, 2490, 3731, 990, 3109, 627, 1814, 2642, # 7910 + 4289, 1582, 4290, 2125, 2111, 3496, 4567, 8062, 799, 4291, 3170, 8063, 4568, 2112, 1737, 3013, # 7926 + 1018, 543, 754, 4292, 3309, 1676, 4569, 4570, 4050, 8064, 1489, 8065, 3497, 8066, 2614, 2889, # 7942 + 4051, 8067, 8068, 2966, 8069, 8070, 8071, 8072, 3171, 4571, 4572, 2182, 1722, 8073, 3238, 3239, # 7958 + 1842, 3610, 1715, 481, 365, 1975, 1856, 8074, 8075, 1962, 2491, 4573, 8076, 2126, 3611, 3240, # 7974 + 433, 1894, 2063, 2075, 8077, 602, 2741, 8078, 8079, 8080, 8081, 8082, 3014, 1628, 3400, 8083, # 7990 + 3172, 4574, 4052, 2890, 4575, 2512, 8084, 2544, 2772, 8085, 8086, 8087, 3310, 4576, 2891, 8088, # 8006 + 4577, 8089, 2851, 4578, 4579, 1221, 2967, 4053, 2513, 8090, 8091, 8092, 1867, 1989, 8093, 8094, # 8022 + 8095, 1895, 8096, 8097, 4580, 1896, 4054, 318, 8098, 2094, 4055, 4293, 8099, 8100, 485, 8101, # 8038 + 938, 3862, 553, 2670, 116, 8102, 3863, 3612, 8103, 3498, 2671, 2773, 3401, 3311, 2807, 8104, # 8054 + 3613, 2929, 4056, 1747, 2930, 2968, 8105, 8106, 207, 8107, 8108, 2672, 4581, 2514, 8109, 3015, # 8070 + 890, 3614, 3864, 8110, 1877, 3732, 3402, 8111, 2183, 2353, 3403, 1652, 8112, 8113, 8114, 941, # 8086 + 2294, 208, 3499, 4057, 2019, 330, 4294, 3865, 2892, 2492, 3733, 4295, 8115, 8116, 8117, 8118, # 8102 ) - +# fmt: on diff --git a/libs/common/chardet/euctwprober.py b/libs/common/chardet/euctwprober.py index 35669cc4..a37ab189 100644 --- a/libs/common/chardet/euctwprober.py +++ b/libs/common/chardet/euctwprober.py @@ -25,22 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import EUCTWDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import EUCTW_SM_MODEL + class EUCTWProber(MultiByteCharSetProber): - def __init__(self): - super(EUCTWProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) self.distribution_analyzer = EUCTWDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-TW" @property - def language(self): + def language(self) -> str: return "Taiwan" diff --git a/libs/common/chardet/gb2312freq.py b/libs/common/chardet/gb2312freq.py index 697837bd..b32bfc74 100644 --- a/libs/common/chardet/gb2312freq.py +++ b/libs/common/chardet/gb2312freq.py @@ -43,6 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 GB2312_TABLE_SIZE = 3760 +# fmt: off GB2312_CHAR_TO_FREQ_ORDER = ( 1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, 2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, @@ -280,4 +281,4 @@ GB2312_CHAR_TO_FREQ_ORDER = ( 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512 ) - +# fmt: on diff --git a/libs/common/chardet/gb2312prober.py b/libs/common/chardet/gb2312prober.py index 8446d2dd..d423e731 100644 --- a/libs/common/chardet/gb2312prober.py +++ b/libs/common/chardet/gb2312prober.py @@ -25,22 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import GB2312DistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import GB2312_SM_MODEL + class GB2312Prober(MultiByteCharSetProber): - def __init__(self): - super(GB2312Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) self.distribution_analyzer = GB2312DistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "GB2312" @property - def language(self): + def language(self) -> str: return "Chinese" diff --git a/libs/common/chardet/hebrewprober.py b/libs/common/chardet/hebrewprober.py index b0e1bf49..785d0057 100644 --- a/libs/common/chardet/hebrewprober.py +++ b/libs/common/chardet/hebrewprober.py @@ -25,8 +25,11 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + from .charsetprober import CharSetProber from .enums import ProbingState +from .sbcharsetprober import SingleByteCharSetProber # This prober doesn't actually recognize a language or a charset. # It is a helper prober for the use of the Hebrew model probers @@ -125,18 +128,20 @@ from .enums import ProbingState # model probers scores. The answer is returned in the form of the name of the # charset identified, either "windows-1255" or "ISO-8859-8". + class HebrewProber(CharSetProber): + SPACE = 0x20 # windows-1255 / ISO-8859-8 code points of interest - FINAL_KAF = 0xea - NORMAL_KAF = 0xeb - FINAL_MEM = 0xed - NORMAL_MEM = 0xee - FINAL_NUN = 0xef - NORMAL_NUN = 0xf0 - FINAL_PE = 0xf3 - NORMAL_PE = 0xf4 - FINAL_TSADI = 0xf5 - NORMAL_TSADI = 0xf6 + FINAL_KAF = 0xEA + NORMAL_KAF = 0xEB + FINAL_MEM = 0xED + NORMAL_MEM = 0xEE + FINAL_NUN = 0xEF + NORMAL_NUN = 0xF0 + FINAL_PE = 0xF3 + NORMAL_PE = 0xF4 + FINAL_TSADI = 0xF5 + NORMAL_TSADI = 0xF6 # Minimum Visual vs Logical final letter score difference. # If the difference is below this, don't rely solely on the final letter score @@ -151,35 +156,44 @@ class HebrewProber(CharSetProber): VISUAL_HEBREW_NAME = "ISO-8859-8" LOGICAL_HEBREW_NAME = "windows-1255" - def __init__(self): - super(HebrewProber, self).__init__() - self._final_char_logical_score = None - self._final_char_visual_score = None - self._prev = None - self._before_prev = None - self._logical_prober = None - self._visual_prober = None + def __init__(self) -> None: + super().__init__() + self._final_char_logical_score = 0 + self._final_char_visual_score = 0 + self._prev = self.SPACE + self._before_prev = self.SPACE + self._logical_prober: Optional[SingleByteCharSetProber] = None + self._visual_prober: Optional[SingleByteCharSetProber] = None self.reset() - def reset(self): + def reset(self) -> None: self._final_char_logical_score = 0 self._final_char_visual_score = 0 # The two last characters seen in the previous buffer, # mPrev and mBeforePrev are initialized to space in order to simulate # a word delimiter at the beginning of the data - self._prev = ' ' - self._before_prev = ' ' + self._prev = self.SPACE + self._before_prev = self.SPACE # These probers are owned by the group prober. - def set_model_probers(self, logicalProber, visualProber): - self._logical_prober = logicalProber - self._visual_prober = visualProber + def set_model_probers( + self, + logical_prober: SingleByteCharSetProber, + visual_prober: SingleByteCharSetProber, + ) -> None: + self._logical_prober = logical_prober + self._visual_prober = visual_prober - def is_final(self, c): - return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN, - self.FINAL_PE, self.FINAL_TSADI] + def is_final(self, c: int) -> bool: + return c in [ + self.FINAL_KAF, + self.FINAL_MEM, + self.FINAL_NUN, + self.FINAL_PE, + self.FINAL_TSADI, + ] - def is_non_final(self, c): + def is_non_final(self, c: int) -> bool: # The normal Tsadi is not a good Non-Final letter due to words like # 'lechotet' (to chat) containing an apostrophe after the tsadi. This # apostrophe is converted to a space in FilterWithoutEnglishLetters @@ -190,10 +204,9 @@ class HebrewProber(CharSetProber): # for example legally end with a Non-Final Pe or Kaf. However, the # benefit of these letters as Non-Final letters outweighs the damage # since these words are quite rare. - return c in [self.NORMAL_KAF, self.NORMAL_MEM, - self.NORMAL_NUN, self.NORMAL_PE] + return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE] - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: # Final letter analysis for logical-visual decision. # Look for evidence that the received buffer is either logical Hebrew # or visual Hebrew. @@ -227,9 +240,9 @@ class HebrewProber(CharSetProber): byte_str = self.filter_high_byte_only(byte_str) for cur in byte_str: - if cur == ' ': + if cur == self.SPACE: # We stand on a space - a word just ended - if self._before_prev != ' ': + if self._before_prev != self.SPACE: # next-to-last char was not a space so self._prev is not a # 1 letter word if self.is_final(self._prev): @@ -241,8 +254,11 @@ class HebrewProber(CharSetProber): self._final_char_visual_score += 1 else: # Not standing on a space - if ((self._before_prev == ' ') and - (self.is_final(self._prev)) and (cur != ' ')): + if ( + (self._before_prev == self.SPACE) + and (self.is_final(self._prev)) + and (cur != self.SPACE) + ): # case (3) [-2:space][-1:final letter][cur:not space] self._final_char_visual_score += 1 self._before_prev = self._prev @@ -253,7 +269,10 @@ class HebrewProber(CharSetProber): return ProbingState.DETECTING @property - def charset_name(self): + def charset_name(self) -> str: + assert self._logical_prober is not None + assert self._visual_prober is not None + # Make the decision: is it Logical or Visual? # If the final letter score distance is dominant enough, rely on it. finalsub = self._final_char_logical_score - self._final_char_visual_score @@ -263,8 +282,9 @@ class HebrewProber(CharSetProber): return self.VISUAL_HEBREW_NAME # It's not dominant enough, try to rely on the model scores instead. - modelsub = (self._logical_prober.get_confidence() - - self._visual_prober.get_confidence()) + modelsub = ( + self._logical_prober.get_confidence() - self._visual_prober.get_confidence() + ) if modelsub > self.MIN_MODEL_DISTANCE: return self.LOGICAL_HEBREW_NAME if modelsub < -self.MIN_MODEL_DISTANCE: @@ -280,13 +300,17 @@ class HebrewProber(CharSetProber): return self.LOGICAL_HEBREW_NAME @property - def language(self): - return 'Hebrew' + def language(self) -> str: + return "Hebrew" @property - def state(self): + def state(self) -> ProbingState: + assert self._logical_prober is not None + assert self._visual_prober is not None + # Remain active as long as any of the model probers are active. - if (self._logical_prober.state == ProbingState.NOT_ME) and \ - (self._visual_prober.state == ProbingState.NOT_ME): + if (self._logical_prober.state == ProbingState.NOT_ME) and ( + self._visual_prober.state == ProbingState.NOT_ME + ): return ProbingState.NOT_ME return ProbingState.DETECTING diff --git a/libs/common/chardet/jisfreq.py b/libs/common/chardet/jisfreq.py index 83fc082b..3293576e 100644 --- a/libs/common/chardet/jisfreq.py +++ b/libs/common/chardet/jisfreq.py @@ -46,6 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 # Char to FreqOrder table , JIS_TABLE_SIZE = 4368 +# fmt: off JIS_CHAR_TO_FREQ_ORDER = ( 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 @@ -321,5 +322,4 @@ JIS_CHAR_TO_FREQ_ORDER = ( 1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 ) - - +# fmt: on diff --git a/libs/common/chardet/johabfreq.py b/libs/common/chardet/johabfreq.py new file mode 100644 index 00000000..c1296999 --- /dev/null +++ b/libs/common/chardet/johabfreq.py @@ -0,0 +1,2382 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# The frequency data itself is the same as euc-kr. +# This is just a mapping table to euc-kr. + +JOHAB_TO_EUCKR_ORDER_TABLE = { + 0x8861: 0, + 0x8862: 1, + 0x8865: 2, + 0x8868: 3, + 0x8869: 4, + 0x886A: 5, + 0x886B: 6, + 0x8871: 7, + 0x8873: 8, + 0x8874: 9, + 0x8875: 10, + 0x8876: 11, + 0x8877: 12, + 0x8878: 13, + 0x8879: 14, + 0x887B: 15, + 0x887C: 16, + 0x887D: 17, + 0x8881: 18, + 0x8882: 19, + 0x8885: 20, + 0x8889: 21, + 0x8891: 22, + 0x8893: 23, + 0x8895: 24, + 0x8896: 25, + 0x8897: 26, + 0x88A1: 27, + 0x88A2: 28, + 0x88A5: 29, + 0x88A9: 30, + 0x88B5: 31, + 0x88B7: 32, + 0x88C1: 33, + 0x88C5: 34, + 0x88C9: 35, + 0x88E1: 36, + 0x88E2: 37, + 0x88E5: 38, + 0x88E8: 39, + 0x88E9: 40, + 0x88EB: 41, + 0x88F1: 42, + 0x88F3: 43, + 0x88F5: 44, + 0x88F6: 45, + 0x88F7: 46, + 0x88F8: 47, + 0x88FB: 48, + 0x88FC: 49, + 0x88FD: 50, + 0x8941: 51, + 0x8945: 52, + 0x8949: 53, + 0x8951: 54, + 0x8953: 55, + 0x8955: 56, + 0x8956: 57, + 0x8957: 58, + 0x8961: 59, + 0x8962: 60, + 0x8963: 61, + 0x8965: 62, + 0x8968: 63, + 0x8969: 64, + 0x8971: 65, + 0x8973: 66, + 0x8975: 67, + 0x8976: 68, + 0x8977: 69, + 0x897B: 70, + 0x8981: 71, + 0x8985: 72, + 0x8989: 73, + 0x8993: 74, + 0x8995: 75, + 0x89A1: 76, + 0x89A2: 77, + 0x89A5: 78, + 0x89A8: 79, + 0x89A9: 80, + 0x89AB: 81, + 0x89AD: 82, + 0x89B0: 83, + 0x89B1: 84, + 0x89B3: 85, + 0x89B5: 86, + 0x89B7: 87, + 0x89B8: 88, + 0x89C1: 89, + 0x89C2: 90, + 0x89C5: 91, + 0x89C9: 92, + 0x89CB: 93, + 0x89D1: 94, + 0x89D3: 95, + 0x89D5: 96, + 0x89D7: 97, + 0x89E1: 98, + 0x89E5: 99, + 0x89E9: 100, + 0x89F3: 101, + 0x89F6: 102, + 0x89F7: 103, + 0x8A41: 104, + 0x8A42: 105, + 0x8A45: 106, + 0x8A49: 107, + 0x8A51: 108, + 0x8A53: 109, + 0x8A55: 110, + 0x8A57: 111, + 0x8A61: 112, + 0x8A65: 113, + 0x8A69: 114, + 0x8A73: 115, + 0x8A75: 116, + 0x8A81: 117, + 0x8A82: 118, + 0x8A85: 119, + 0x8A88: 120, + 0x8A89: 121, + 0x8A8A: 122, + 0x8A8B: 123, + 0x8A90: 124, + 0x8A91: 125, + 0x8A93: 126, + 0x8A95: 127, + 0x8A97: 128, + 0x8A98: 129, + 0x8AA1: 130, + 0x8AA2: 131, + 0x8AA5: 132, + 0x8AA9: 133, + 0x8AB6: 134, + 0x8AB7: 135, + 0x8AC1: 136, + 0x8AD5: 137, + 0x8AE1: 138, + 0x8AE2: 139, + 0x8AE5: 140, + 0x8AE9: 141, + 0x8AF1: 142, + 0x8AF3: 143, + 0x8AF5: 144, + 0x8B41: 145, + 0x8B45: 146, + 0x8B49: 147, + 0x8B61: 148, + 0x8B62: 149, + 0x8B65: 150, + 0x8B68: 151, + 0x8B69: 152, + 0x8B6A: 153, + 0x8B71: 154, + 0x8B73: 155, + 0x8B75: 156, + 0x8B77: 157, + 0x8B81: 158, + 0x8BA1: 159, + 0x8BA2: 160, + 0x8BA5: 161, + 0x8BA8: 162, + 0x8BA9: 163, + 0x8BAB: 164, + 0x8BB1: 165, + 0x8BB3: 166, + 0x8BB5: 167, + 0x8BB7: 168, + 0x8BB8: 169, + 0x8BBC: 170, + 0x8C61: 171, + 0x8C62: 172, + 0x8C63: 173, + 0x8C65: 174, + 0x8C69: 175, + 0x8C6B: 176, + 0x8C71: 177, + 0x8C73: 178, + 0x8C75: 179, + 0x8C76: 180, + 0x8C77: 181, + 0x8C7B: 182, + 0x8C81: 183, + 0x8C82: 184, + 0x8C85: 185, + 0x8C89: 186, + 0x8C91: 187, + 0x8C93: 188, + 0x8C95: 189, + 0x8C96: 190, + 0x8C97: 191, + 0x8CA1: 192, + 0x8CA2: 193, + 0x8CA9: 194, + 0x8CE1: 195, + 0x8CE2: 196, + 0x8CE3: 197, + 0x8CE5: 198, + 0x8CE9: 199, + 0x8CF1: 200, + 0x8CF3: 201, + 0x8CF5: 202, + 0x8CF6: 203, + 0x8CF7: 204, + 0x8D41: 205, + 0x8D42: 206, + 0x8D45: 207, + 0x8D51: 208, + 0x8D55: 209, + 0x8D57: 210, + 0x8D61: 211, + 0x8D65: 212, + 0x8D69: 213, + 0x8D75: 214, + 0x8D76: 215, + 0x8D7B: 216, + 0x8D81: 217, + 0x8DA1: 218, + 0x8DA2: 219, + 0x8DA5: 220, + 0x8DA7: 221, + 0x8DA9: 222, + 0x8DB1: 223, + 0x8DB3: 224, + 0x8DB5: 225, + 0x8DB7: 226, + 0x8DB8: 227, + 0x8DB9: 228, + 0x8DC1: 229, + 0x8DC2: 230, + 0x8DC9: 231, + 0x8DD6: 232, + 0x8DD7: 233, + 0x8DE1: 234, + 0x8DE2: 235, + 0x8DF7: 236, + 0x8E41: 237, + 0x8E45: 238, + 0x8E49: 239, + 0x8E51: 240, + 0x8E53: 241, + 0x8E57: 242, + 0x8E61: 243, + 0x8E81: 244, + 0x8E82: 245, + 0x8E85: 246, + 0x8E89: 247, + 0x8E90: 248, + 0x8E91: 249, + 0x8E93: 250, + 0x8E95: 251, + 0x8E97: 252, + 0x8E98: 253, + 0x8EA1: 254, + 0x8EA9: 255, + 0x8EB6: 256, + 0x8EB7: 257, + 0x8EC1: 258, + 0x8EC2: 259, + 0x8EC5: 260, + 0x8EC9: 261, + 0x8ED1: 262, + 0x8ED3: 263, + 0x8ED6: 264, + 0x8EE1: 265, + 0x8EE5: 266, + 0x8EE9: 267, + 0x8EF1: 268, + 0x8EF3: 269, + 0x8F41: 270, + 0x8F61: 271, + 0x8F62: 272, + 0x8F65: 273, + 0x8F67: 274, + 0x8F69: 275, + 0x8F6B: 276, + 0x8F70: 277, + 0x8F71: 278, + 0x8F73: 279, + 0x8F75: 280, + 0x8F77: 281, + 0x8F7B: 282, + 0x8FA1: 283, + 0x8FA2: 284, + 0x8FA5: 285, + 0x8FA9: 286, + 0x8FB1: 287, + 0x8FB3: 288, + 0x8FB5: 289, + 0x8FB7: 290, + 0x9061: 291, + 0x9062: 292, + 0x9063: 293, + 0x9065: 294, + 0x9068: 295, + 0x9069: 296, + 0x906A: 297, + 0x906B: 298, + 0x9071: 299, + 0x9073: 300, + 0x9075: 301, + 0x9076: 302, + 0x9077: 303, + 0x9078: 304, + 0x9079: 305, + 0x907B: 306, + 0x907D: 307, + 0x9081: 308, + 0x9082: 309, + 0x9085: 310, + 0x9089: 311, + 0x9091: 312, + 0x9093: 313, + 0x9095: 314, + 0x9096: 315, + 0x9097: 316, + 0x90A1: 317, + 0x90A2: 318, + 0x90A5: 319, + 0x90A9: 320, + 0x90B1: 321, + 0x90B7: 322, + 0x90E1: 323, + 0x90E2: 324, + 0x90E4: 325, + 0x90E5: 326, + 0x90E9: 327, + 0x90EB: 328, + 0x90EC: 329, + 0x90F1: 330, + 0x90F3: 331, + 0x90F5: 332, + 0x90F6: 333, + 0x90F7: 334, + 0x90FD: 335, + 0x9141: 336, + 0x9142: 337, + 0x9145: 338, + 0x9149: 339, + 0x9151: 340, + 0x9153: 341, + 0x9155: 342, + 0x9156: 343, + 0x9157: 344, + 0x9161: 345, + 0x9162: 346, + 0x9165: 347, + 0x9169: 348, + 0x9171: 349, + 0x9173: 350, + 0x9176: 351, + 0x9177: 352, + 0x917A: 353, + 0x9181: 354, + 0x9185: 355, + 0x91A1: 356, + 0x91A2: 357, + 0x91A5: 358, + 0x91A9: 359, + 0x91AB: 360, + 0x91B1: 361, + 0x91B3: 362, + 0x91B5: 363, + 0x91B7: 364, + 0x91BC: 365, + 0x91BD: 366, + 0x91C1: 367, + 0x91C5: 368, + 0x91C9: 369, + 0x91D6: 370, + 0x9241: 371, + 0x9245: 372, + 0x9249: 373, + 0x9251: 374, + 0x9253: 375, + 0x9255: 376, + 0x9261: 377, + 0x9262: 378, + 0x9265: 379, + 0x9269: 380, + 0x9273: 381, + 0x9275: 382, + 0x9277: 383, + 0x9281: 384, + 0x9282: 385, + 0x9285: 386, + 0x9288: 387, + 0x9289: 388, + 0x9291: 389, + 0x9293: 390, + 0x9295: 391, + 0x9297: 392, + 0x92A1: 393, + 0x92B6: 394, + 0x92C1: 395, + 0x92E1: 396, + 0x92E5: 397, + 0x92E9: 398, + 0x92F1: 399, + 0x92F3: 400, + 0x9341: 401, + 0x9342: 402, + 0x9349: 403, + 0x9351: 404, + 0x9353: 405, + 0x9357: 406, + 0x9361: 407, + 0x9362: 408, + 0x9365: 409, + 0x9369: 410, + 0x936A: 411, + 0x936B: 412, + 0x9371: 413, + 0x9373: 414, + 0x9375: 415, + 0x9377: 416, + 0x9378: 417, + 0x937C: 418, + 0x9381: 419, + 0x9385: 420, + 0x9389: 421, + 0x93A1: 422, + 0x93A2: 423, + 0x93A5: 424, + 0x93A9: 425, + 0x93AB: 426, + 0x93B1: 427, + 0x93B3: 428, + 0x93B5: 429, + 0x93B7: 430, + 0x93BC: 431, + 0x9461: 432, + 0x9462: 433, + 0x9463: 434, + 0x9465: 435, + 0x9468: 436, + 0x9469: 437, + 0x946A: 438, + 0x946B: 439, + 0x946C: 440, + 0x9470: 441, + 0x9471: 442, + 0x9473: 443, + 0x9475: 444, + 0x9476: 445, + 0x9477: 446, + 0x9478: 447, + 0x9479: 448, + 0x947D: 449, + 0x9481: 450, + 0x9482: 451, + 0x9485: 452, + 0x9489: 453, + 0x9491: 454, + 0x9493: 455, + 0x9495: 456, + 0x9496: 457, + 0x9497: 458, + 0x94A1: 459, + 0x94E1: 460, + 0x94E2: 461, + 0x94E3: 462, + 0x94E5: 463, + 0x94E8: 464, + 0x94E9: 465, + 0x94EB: 466, + 0x94EC: 467, + 0x94F1: 468, + 0x94F3: 469, + 0x94F5: 470, + 0x94F7: 471, + 0x94F9: 472, + 0x94FC: 473, + 0x9541: 474, + 0x9542: 475, + 0x9545: 476, + 0x9549: 477, + 0x9551: 478, + 0x9553: 479, + 0x9555: 480, + 0x9556: 481, + 0x9557: 482, + 0x9561: 483, + 0x9565: 484, + 0x9569: 485, + 0x9576: 486, + 0x9577: 487, + 0x9581: 488, + 0x9585: 489, + 0x95A1: 490, + 0x95A2: 491, + 0x95A5: 492, + 0x95A8: 493, + 0x95A9: 494, + 0x95AB: 495, + 0x95AD: 496, + 0x95B1: 497, + 0x95B3: 498, + 0x95B5: 499, + 0x95B7: 500, + 0x95B9: 501, + 0x95BB: 502, + 0x95C1: 503, + 0x95C5: 504, + 0x95C9: 505, + 0x95E1: 506, + 0x95F6: 507, + 0x9641: 508, + 0x9645: 509, + 0x9649: 510, + 0x9651: 511, + 0x9653: 512, + 0x9655: 513, + 0x9661: 514, + 0x9681: 515, + 0x9682: 516, + 0x9685: 517, + 0x9689: 518, + 0x9691: 519, + 0x9693: 520, + 0x9695: 521, + 0x9697: 522, + 0x96A1: 523, + 0x96B6: 524, + 0x96C1: 525, + 0x96D7: 526, + 0x96E1: 527, + 0x96E5: 528, + 0x96E9: 529, + 0x96F3: 530, + 0x96F5: 531, + 0x96F7: 532, + 0x9741: 533, + 0x9745: 534, + 0x9749: 535, + 0x9751: 536, + 0x9757: 537, + 0x9761: 538, + 0x9762: 539, + 0x9765: 540, + 0x9768: 541, + 0x9769: 542, + 0x976B: 543, + 0x9771: 544, + 0x9773: 545, + 0x9775: 546, + 0x9777: 547, + 0x9781: 548, + 0x97A1: 549, + 0x97A2: 550, + 0x97A5: 551, + 0x97A8: 552, + 0x97A9: 553, + 0x97B1: 554, + 0x97B3: 555, + 0x97B5: 556, + 0x97B6: 557, + 0x97B7: 558, + 0x97B8: 559, + 0x9861: 560, + 0x9862: 561, + 0x9865: 562, + 0x9869: 563, + 0x9871: 564, + 0x9873: 565, + 0x9875: 566, + 0x9876: 567, + 0x9877: 568, + 0x987D: 569, + 0x9881: 570, + 0x9882: 571, + 0x9885: 572, + 0x9889: 573, + 0x9891: 574, + 0x9893: 575, + 0x9895: 576, + 0x9896: 577, + 0x9897: 578, + 0x98E1: 579, + 0x98E2: 580, + 0x98E5: 581, + 0x98E9: 582, + 0x98EB: 583, + 0x98EC: 584, + 0x98F1: 585, + 0x98F3: 586, + 0x98F5: 587, + 0x98F6: 588, + 0x98F7: 589, + 0x98FD: 590, + 0x9941: 591, + 0x9942: 592, + 0x9945: 593, + 0x9949: 594, + 0x9951: 595, + 0x9953: 596, + 0x9955: 597, + 0x9956: 598, + 0x9957: 599, + 0x9961: 600, + 0x9976: 601, + 0x99A1: 602, + 0x99A2: 603, + 0x99A5: 604, + 0x99A9: 605, + 0x99B7: 606, + 0x99C1: 607, + 0x99C9: 608, + 0x99E1: 609, + 0x9A41: 610, + 0x9A45: 611, + 0x9A81: 612, + 0x9A82: 613, + 0x9A85: 614, + 0x9A89: 615, + 0x9A90: 616, + 0x9A91: 617, + 0x9A97: 618, + 0x9AC1: 619, + 0x9AE1: 620, + 0x9AE5: 621, + 0x9AE9: 622, + 0x9AF1: 623, + 0x9AF3: 624, + 0x9AF7: 625, + 0x9B61: 626, + 0x9B62: 627, + 0x9B65: 628, + 0x9B68: 629, + 0x9B69: 630, + 0x9B71: 631, + 0x9B73: 632, + 0x9B75: 633, + 0x9B81: 634, + 0x9B85: 635, + 0x9B89: 636, + 0x9B91: 637, + 0x9B93: 638, + 0x9BA1: 639, + 0x9BA5: 640, + 0x9BA9: 641, + 0x9BB1: 642, + 0x9BB3: 643, + 0x9BB5: 644, + 0x9BB7: 645, + 0x9C61: 646, + 0x9C62: 647, + 0x9C65: 648, + 0x9C69: 649, + 0x9C71: 650, + 0x9C73: 651, + 0x9C75: 652, + 0x9C76: 653, + 0x9C77: 654, + 0x9C78: 655, + 0x9C7C: 656, + 0x9C7D: 657, + 0x9C81: 658, + 0x9C82: 659, + 0x9C85: 660, + 0x9C89: 661, + 0x9C91: 662, + 0x9C93: 663, + 0x9C95: 664, + 0x9C96: 665, + 0x9C97: 666, + 0x9CA1: 667, + 0x9CA2: 668, + 0x9CA5: 669, + 0x9CB5: 670, + 0x9CB7: 671, + 0x9CE1: 672, + 0x9CE2: 673, + 0x9CE5: 674, + 0x9CE9: 675, + 0x9CF1: 676, + 0x9CF3: 677, + 0x9CF5: 678, + 0x9CF6: 679, + 0x9CF7: 680, + 0x9CFD: 681, + 0x9D41: 682, + 0x9D42: 683, + 0x9D45: 684, + 0x9D49: 685, + 0x9D51: 686, + 0x9D53: 687, + 0x9D55: 688, + 0x9D57: 689, + 0x9D61: 690, + 0x9D62: 691, + 0x9D65: 692, + 0x9D69: 693, + 0x9D71: 694, + 0x9D73: 695, + 0x9D75: 696, + 0x9D76: 697, + 0x9D77: 698, + 0x9D81: 699, + 0x9D85: 700, + 0x9D93: 701, + 0x9D95: 702, + 0x9DA1: 703, + 0x9DA2: 704, + 0x9DA5: 705, + 0x9DA9: 706, + 0x9DB1: 707, + 0x9DB3: 708, + 0x9DB5: 709, + 0x9DB7: 710, + 0x9DC1: 711, + 0x9DC5: 712, + 0x9DD7: 713, + 0x9DF6: 714, + 0x9E41: 715, + 0x9E45: 716, + 0x9E49: 717, + 0x9E51: 718, + 0x9E53: 719, + 0x9E55: 720, + 0x9E57: 721, + 0x9E61: 722, + 0x9E65: 723, + 0x9E69: 724, + 0x9E73: 725, + 0x9E75: 726, + 0x9E77: 727, + 0x9E81: 728, + 0x9E82: 729, + 0x9E85: 730, + 0x9E89: 731, + 0x9E91: 732, + 0x9E93: 733, + 0x9E95: 734, + 0x9E97: 735, + 0x9EA1: 736, + 0x9EB6: 737, + 0x9EC1: 738, + 0x9EE1: 739, + 0x9EE2: 740, + 0x9EE5: 741, + 0x9EE9: 742, + 0x9EF1: 743, + 0x9EF5: 744, + 0x9EF7: 745, + 0x9F41: 746, + 0x9F42: 747, + 0x9F45: 748, + 0x9F49: 749, + 0x9F51: 750, + 0x9F53: 751, + 0x9F55: 752, + 0x9F57: 753, + 0x9F61: 754, + 0x9F62: 755, + 0x9F65: 756, + 0x9F69: 757, + 0x9F71: 758, + 0x9F73: 759, + 0x9F75: 760, + 0x9F77: 761, + 0x9F78: 762, + 0x9F7B: 763, + 0x9F7C: 764, + 0x9FA1: 765, + 0x9FA2: 766, + 0x9FA5: 767, + 0x9FA9: 768, + 0x9FB1: 769, + 0x9FB3: 770, + 0x9FB5: 771, + 0x9FB7: 772, + 0xA061: 773, + 0xA062: 774, + 0xA065: 775, + 0xA067: 776, + 0xA068: 777, + 0xA069: 778, + 0xA06A: 779, + 0xA06B: 780, + 0xA071: 781, + 0xA073: 782, + 0xA075: 783, + 0xA077: 784, + 0xA078: 785, + 0xA07B: 786, + 0xA07D: 787, + 0xA081: 788, + 0xA082: 789, + 0xA085: 790, + 0xA089: 791, + 0xA091: 792, + 0xA093: 793, + 0xA095: 794, + 0xA096: 795, + 0xA097: 796, + 0xA098: 797, + 0xA0A1: 798, + 0xA0A2: 799, + 0xA0A9: 800, + 0xA0B7: 801, + 0xA0E1: 802, + 0xA0E2: 803, + 0xA0E5: 804, + 0xA0E9: 805, + 0xA0EB: 806, + 0xA0F1: 807, + 0xA0F3: 808, + 0xA0F5: 809, + 0xA0F7: 810, + 0xA0F8: 811, + 0xA0FD: 812, + 0xA141: 813, + 0xA142: 814, + 0xA145: 815, + 0xA149: 816, + 0xA151: 817, + 0xA153: 818, + 0xA155: 819, + 0xA156: 820, + 0xA157: 821, + 0xA161: 822, + 0xA162: 823, + 0xA165: 824, + 0xA169: 825, + 0xA175: 826, + 0xA176: 827, + 0xA177: 828, + 0xA179: 829, + 0xA181: 830, + 0xA1A1: 831, + 0xA1A2: 832, + 0xA1A4: 833, + 0xA1A5: 834, + 0xA1A9: 835, + 0xA1AB: 836, + 0xA1B1: 837, + 0xA1B3: 838, + 0xA1B5: 839, + 0xA1B7: 840, + 0xA1C1: 841, + 0xA1C5: 842, + 0xA1D6: 843, + 0xA1D7: 844, + 0xA241: 845, + 0xA245: 846, + 0xA249: 847, + 0xA253: 848, + 0xA255: 849, + 0xA257: 850, + 0xA261: 851, + 0xA265: 852, + 0xA269: 853, + 0xA273: 854, + 0xA275: 855, + 0xA281: 856, + 0xA282: 857, + 0xA283: 858, + 0xA285: 859, + 0xA288: 860, + 0xA289: 861, + 0xA28A: 862, + 0xA28B: 863, + 0xA291: 864, + 0xA293: 865, + 0xA295: 866, + 0xA297: 867, + 0xA29B: 868, + 0xA29D: 869, + 0xA2A1: 870, + 0xA2A5: 871, + 0xA2A9: 872, + 0xA2B3: 873, + 0xA2B5: 874, + 0xA2C1: 875, + 0xA2E1: 876, + 0xA2E5: 877, + 0xA2E9: 878, + 0xA341: 879, + 0xA345: 880, + 0xA349: 881, + 0xA351: 882, + 0xA355: 883, + 0xA361: 884, + 0xA365: 885, + 0xA369: 886, + 0xA371: 887, + 0xA375: 888, + 0xA3A1: 889, + 0xA3A2: 890, + 0xA3A5: 891, + 0xA3A8: 892, + 0xA3A9: 893, + 0xA3AB: 894, + 0xA3B1: 895, + 0xA3B3: 896, + 0xA3B5: 897, + 0xA3B6: 898, + 0xA3B7: 899, + 0xA3B9: 900, + 0xA3BB: 901, + 0xA461: 902, + 0xA462: 903, + 0xA463: 904, + 0xA464: 905, + 0xA465: 906, + 0xA468: 907, + 0xA469: 908, + 0xA46A: 909, + 0xA46B: 910, + 0xA46C: 911, + 0xA471: 912, + 0xA473: 913, + 0xA475: 914, + 0xA477: 915, + 0xA47B: 916, + 0xA481: 917, + 0xA482: 918, + 0xA485: 919, + 0xA489: 920, + 0xA491: 921, + 0xA493: 922, + 0xA495: 923, + 0xA496: 924, + 0xA497: 925, + 0xA49B: 926, + 0xA4A1: 927, + 0xA4A2: 928, + 0xA4A5: 929, + 0xA4B3: 930, + 0xA4E1: 931, + 0xA4E2: 932, + 0xA4E5: 933, + 0xA4E8: 934, + 0xA4E9: 935, + 0xA4EB: 936, + 0xA4F1: 937, + 0xA4F3: 938, + 0xA4F5: 939, + 0xA4F7: 940, + 0xA4F8: 941, + 0xA541: 942, + 0xA542: 943, + 0xA545: 944, + 0xA548: 945, + 0xA549: 946, + 0xA551: 947, + 0xA553: 948, + 0xA555: 949, + 0xA556: 950, + 0xA557: 951, + 0xA561: 952, + 0xA562: 953, + 0xA565: 954, + 0xA569: 955, + 0xA573: 956, + 0xA575: 957, + 0xA576: 958, + 0xA577: 959, + 0xA57B: 960, + 0xA581: 961, + 0xA585: 962, + 0xA5A1: 963, + 0xA5A2: 964, + 0xA5A3: 965, + 0xA5A5: 966, + 0xA5A9: 967, + 0xA5B1: 968, + 0xA5B3: 969, + 0xA5B5: 970, + 0xA5B7: 971, + 0xA5C1: 972, + 0xA5C5: 973, + 0xA5D6: 974, + 0xA5E1: 975, + 0xA5F6: 976, + 0xA641: 977, + 0xA642: 978, + 0xA645: 979, + 0xA649: 980, + 0xA651: 981, + 0xA653: 982, + 0xA661: 983, + 0xA665: 984, + 0xA681: 985, + 0xA682: 986, + 0xA685: 987, + 0xA688: 988, + 0xA689: 989, + 0xA68A: 990, + 0xA68B: 991, + 0xA691: 992, + 0xA693: 993, + 0xA695: 994, + 0xA697: 995, + 0xA69B: 996, + 0xA69C: 997, + 0xA6A1: 998, + 0xA6A9: 999, + 0xA6B6: 1000, + 0xA6C1: 1001, + 0xA6E1: 1002, + 0xA6E2: 1003, + 0xA6E5: 1004, + 0xA6E9: 1005, + 0xA6F7: 1006, + 0xA741: 1007, + 0xA745: 1008, + 0xA749: 1009, + 0xA751: 1010, + 0xA755: 1011, + 0xA757: 1012, + 0xA761: 1013, + 0xA762: 1014, + 0xA765: 1015, + 0xA769: 1016, + 0xA771: 1017, + 0xA773: 1018, + 0xA775: 1019, + 0xA7A1: 1020, + 0xA7A2: 1021, + 0xA7A5: 1022, + 0xA7A9: 1023, + 0xA7AB: 1024, + 0xA7B1: 1025, + 0xA7B3: 1026, + 0xA7B5: 1027, + 0xA7B7: 1028, + 0xA7B8: 1029, + 0xA7B9: 1030, + 0xA861: 1031, + 0xA862: 1032, + 0xA865: 1033, + 0xA869: 1034, + 0xA86B: 1035, + 0xA871: 1036, + 0xA873: 1037, + 0xA875: 1038, + 0xA876: 1039, + 0xA877: 1040, + 0xA87D: 1041, + 0xA881: 1042, + 0xA882: 1043, + 0xA885: 1044, + 0xA889: 1045, + 0xA891: 1046, + 0xA893: 1047, + 0xA895: 1048, + 0xA896: 1049, + 0xA897: 1050, + 0xA8A1: 1051, + 0xA8A2: 1052, + 0xA8B1: 1053, + 0xA8E1: 1054, + 0xA8E2: 1055, + 0xA8E5: 1056, + 0xA8E8: 1057, + 0xA8E9: 1058, + 0xA8F1: 1059, + 0xA8F5: 1060, + 0xA8F6: 1061, + 0xA8F7: 1062, + 0xA941: 1063, + 0xA957: 1064, + 0xA961: 1065, + 0xA962: 1066, + 0xA971: 1067, + 0xA973: 1068, + 0xA975: 1069, + 0xA976: 1070, + 0xA977: 1071, + 0xA9A1: 1072, + 0xA9A2: 1073, + 0xA9A5: 1074, + 0xA9A9: 1075, + 0xA9B1: 1076, + 0xA9B3: 1077, + 0xA9B7: 1078, + 0xAA41: 1079, + 0xAA61: 1080, + 0xAA77: 1081, + 0xAA81: 1082, + 0xAA82: 1083, + 0xAA85: 1084, + 0xAA89: 1085, + 0xAA91: 1086, + 0xAA95: 1087, + 0xAA97: 1088, + 0xAB41: 1089, + 0xAB57: 1090, + 0xAB61: 1091, + 0xAB65: 1092, + 0xAB69: 1093, + 0xAB71: 1094, + 0xAB73: 1095, + 0xABA1: 1096, + 0xABA2: 1097, + 0xABA5: 1098, + 0xABA9: 1099, + 0xABB1: 1100, + 0xABB3: 1101, + 0xABB5: 1102, + 0xABB7: 1103, + 0xAC61: 1104, + 0xAC62: 1105, + 0xAC64: 1106, + 0xAC65: 1107, + 0xAC68: 1108, + 0xAC69: 1109, + 0xAC6A: 1110, + 0xAC6B: 1111, + 0xAC71: 1112, + 0xAC73: 1113, + 0xAC75: 1114, + 0xAC76: 1115, + 0xAC77: 1116, + 0xAC7B: 1117, + 0xAC81: 1118, + 0xAC82: 1119, + 0xAC85: 1120, + 0xAC89: 1121, + 0xAC91: 1122, + 0xAC93: 1123, + 0xAC95: 1124, + 0xAC96: 1125, + 0xAC97: 1126, + 0xACA1: 1127, + 0xACA2: 1128, + 0xACA5: 1129, + 0xACA9: 1130, + 0xACB1: 1131, + 0xACB3: 1132, + 0xACB5: 1133, + 0xACB7: 1134, + 0xACC1: 1135, + 0xACC5: 1136, + 0xACC9: 1137, + 0xACD1: 1138, + 0xACD7: 1139, + 0xACE1: 1140, + 0xACE2: 1141, + 0xACE3: 1142, + 0xACE4: 1143, + 0xACE5: 1144, + 0xACE8: 1145, + 0xACE9: 1146, + 0xACEB: 1147, + 0xACEC: 1148, + 0xACF1: 1149, + 0xACF3: 1150, + 0xACF5: 1151, + 0xACF6: 1152, + 0xACF7: 1153, + 0xACFC: 1154, + 0xAD41: 1155, + 0xAD42: 1156, + 0xAD45: 1157, + 0xAD49: 1158, + 0xAD51: 1159, + 0xAD53: 1160, + 0xAD55: 1161, + 0xAD56: 1162, + 0xAD57: 1163, + 0xAD61: 1164, + 0xAD62: 1165, + 0xAD65: 1166, + 0xAD69: 1167, + 0xAD71: 1168, + 0xAD73: 1169, + 0xAD75: 1170, + 0xAD76: 1171, + 0xAD77: 1172, + 0xAD81: 1173, + 0xAD85: 1174, + 0xAD89: 1175, + 0xAD97: 1176, + 0xADA1: 1177, + 0xADA2: 1178, + 0xADA3: 1179, + 0xADA5: 1180, + 0xADA9: 1181, + 0xADAB: 1182, + 0xADB1: 1183, + 0xADB3: 1184, + 0xADB5: 1185, + 0xADB7: 1186, + 0xADBB: 1187, + 0xADC1: 1188, + 0xADC2: 1189, + 0xADC5: 1190, + 0xADC9: 1191, + 0xADD7: 1192, + 0xADE1: 1193, + 0xADE5: 1194, + 0xADE9: 1195, + 0xADF1: 1196, + 0xADF5: 1197, + 0xADF6: 1198, + 0xAE41: 1199, + 0xAE45: 1200, + 0xAE49: 1201, + 0xAE51: 1202, + 0xAE53: 1203, + 0xAE55: 1204, + 0xAE61: 1205, + 0xAE62: 1206, + 0xAE65: 1207, + 0xAE69: 1208, + 0xAE71: 1209, + 0xAE73: 1210, + 0xAE75: 1211, + 0xAE77: 1212, + 0xAE81: 1213, + 0xAE82: 1214, + 0xAE85: 1215, + 0xAE88: 1216, + 0xAE89: 1217, + 0xAE91: 1218, + 0xAE93: 1219, + 0xAE95: 1220, + 0xAE97: 1221, + 0xAE99: 1222, + 0xAE9B: 1223, + 0xAE9C: 1224, + 0xAEA1: 1225, + 0xAEB6: 1226, + 0xAEC1: 1227, + 0xAEC2: 1228, + 0xAEC5: 1229, + 0xAEC9: 1230, + 0xAED1: 1231, + 0xAED7: 1232, + 0xAEE1: 1233, + 0xAEE2: 1234, + 0xAEE5: 1235, + 0xAEE9: 1236, + 0xAEF1: 1237, + 0xAEF3: 1238, + 0xAEF5: 1239, + 0xAEF7: 1240, + 0xAF41: 1241, + 0xAF42: 1242, + 0xAF49: 1243, + 0xAF51: 1244, + 0xAF55: 1245, + 0xAF57: 1246, + 0xAF61: 1247, + 0xAF62: 1248, + 0xAF65: 1249, + 0xAF69: 1250, + 0xAF6A: 1251, + 0xAF71: 1252, + 0xAF73: 1253, + 0xAF75: 1254, + 0xAF77: 1255, + 0xAFA1: 1256, + 0xAFA2: 1257, + 0xAFA5: 1258, + 0xAFA8: 1259, + 0xAFA9: 1260, + 0xAFB0: 1261, + 0xAFB1: 1262, + 0xAFB3: 1263, + 0xAFB5: 1264, + 0xAFB7: 1265, + 0xAFBC: 1266, + 0xB061: 1267, + 0xB062: 1268, + 0xB064: 1269, + 0xB065: 1270, + 0xB069: 1271, + 0xB071: 1272, + 0xB073: 1273, + 0xB076: 1274, + 0xB077: 1275, + 0xB07D: 1276, + 0xB081: 1277, + 0xB082: 1278, + 0xB085: 1279, + 0xB089: 1280, + 0xB091: 1281, + 0xB093: 1282, + 0xB096: 1283, + 0xB097: 1284, + 0xB0B7: 1285, + 0xB0E1: 1286, + 0xB0E2: 1287, + 0xB0E5: 1288, + 0xB0E9: 1289, + 0xB0EB: 1290, + 0xB0F1: 1291, + 0xB0F3: 1292, + 0xB0F6: 1293, + 0xB0F7: 1294, + 0xB141: 1295, + 0xB145: 1296, + 0xB149: 1297, + 0xB185: 1298, + 0xB1A1: 1299, + 0xB1A2: 1300, + 0xB1A5: 1301, + 0xB1A8: 1302, + 0xB1A9: 1303, + 0xB1AB: 1304, + 0xB1B1: 1305, + 0xB1B3: 1306, + 0xB1B7: 1307, + 0xB1C1: 1308, + 0xB1C2: 1309, + 0xB1C5: 1310, + 0xB1D6: 1311, + 0xB1E1: 1312, + 0xB1F6: 1313, + 0xB241: 1314, + 0xB245: 1315, + 0xB249: 1316, + 0xB251: 1317, + 0xB253: 1318, + 0xB261: 1319, + 0xB281: 1320, + 0xB282: 1321, + 0xB285: 1322, + 0xB289: 1323, + 0xB291: 1324, + 0xB293: 1325, + 0xB297: 1326, + 0xB2A1: 1327, + 0xB2B6: 1328, + 0xB2C1: 1329, + 0xB2E1: 1330, + 0xB2E5: 1331, + 0xB357: 1332, + 0xB361: 1333, + 0xB362: 1334, + 0xB365: 1335, + 0xB369: 1336, + 0xB36B: 1337, + 0xB370: 1338, + 0xB371: 1339, + 0xB373: 1340, + 0xB381: 1341, + 0xB385: 1342, + 0xB389: 1343, + 0xB391: 1344, + 0xB3A1: 1345, + 0xB3A2: 1346, + 0xB3A5: 1347, + 0xB3A9: 1348, + 0xB3B1: 1349, + 0xB3B3: 1350, + 0xB3B5: 1351, + 0xB3B7: 1352, + 0xB461: 1353, + 0xB462: 1354, + 0xB465: 1355, + 0xB466: 1356, + 0xB467: 1357, + 0xB469: 1358, + 0xB46A: 1359, + 0xB46B: 1360, + 0xB470: 1361, + 0xB471: 1362, + 0xB473: 1363, + 0xB475: 1364, + 0xB476: 1365, + 0xB477: 1366, + 0xB47B: 1367, + 0xB47C: 1368, + 0xB481: 1369, + 0xB482: 1370, + 0xB485: 1371, + 0xB489: 1372, + 0xB491: 1373, + 0xB493: 1374, + 0xB495: 1375, + 0xB496: 1376, + 0xB497: 1377, + 0xB4A1: 1378, + 0xB4A2: 1379, + 0xB4A5: 1380, + 0xB4A9: 1381, + 0xB4AC: 1382, + 0xB4B1: 1383, + 0xB4B3: 1384, + 0xB4B5: 1385, + 0xB4B7: 1386, + 0xB4BB: 1387, + 0xB4BD: 1388, + 0xB4C1: 1389, + 0xB4C5: 1390, + 0xB4C9: 1391, + 0xB4D3: 1392, + 0xB4E1: 1393, + 0xB4E2: 1394, + 0xB4E5: 1395, + 0xB4E6: 1396, + 0xB4E8: 1397, + 0xB4E9: 1398, + 0xB4EA: 1399, + 0xB4EB: 1400, + 0xB4F1: 1401, + 0xB4F3: 1402, + 0xB4F4: 1403, + 0xB4F5: 1404, + 0xB4F6: 1405, + 0xB4F7: 1406, + 0xB4F8: 1407, + 0xB4FA: 1408, + 0xB4FC: 1409, + 0xB541: 1410, + 0xB542: 1411, + 0xB545: 1412, + 0xB549: 1413, + 0xB551: 1414, + 0xB553: 1415, + 0xB555: 1416, + 0xB557: 1417, + 0xB561: 1418, + 0xB562: 1419, + 0xB563: 1420, + 0xB565: 1421, + 0xB569: 1422, + 0xB56B: 1423, + 0xB56C: 1424, + 0xB571: 1425, + 0xB573: 1426, + 0xB574: 1427, + 0xB575: 1428, + 0xB576: 1429, + 0xB577: 1430, + 0xB57B: 1431, + 0xB57C: 1432, + 0xB57D: 1433, + 0xB581: 1434, + 0xB585: 1435, + 0xB589: 1436, + 0xB591: 1437, + 0xB593: 1438, + 0xB595: 1439, + 0xB596: 1440, + 0xB5A1: 1441, + 0xB5A2: 1442, + 0xB5A5: 1443, + 0xB5A9: 1444, + 0xB5AA: 1445, + 0xB5AB: 1446, + 0xB5AD: 1447, + 0xB5B0: 1448, + 0xB5B1: 1449, + 0xB5B3: 1450, + 0xB5B5: 1451, + 0xB5B7: 1452, + 0xB5B9: 1453, + 0xB5C1: 1454, + 0xB5C2: 1455, + 0xB5C5: 1456, + 0xB5C9: 1457, + 0xB5D1: 1458, + 0xB5D3: 1459, + 0xB5D5: 1460, + 0xB5D6: 1461, + 0xB5D7: 1462, + 0xB5E1: 1463, + 0xB5E2: 1464, + 0xB5E5: 1465, + 0xB5F1: 1466, + 0xB5F5: 1467, + 0xB5F7: 1468, + 0xB641: 1469, + 0xB642: 1470, + 0xB645: 1471, + 0xB649: 1472, + 0xB651: 1473, + 0xB653: 1474, + 0xB655: 1475, + 0xB657: 1476, + 0xB661: 1477, + 0xB662: 1478, + 0xB665: 1479, + 0xB669: 1480, + 0xB671: 1481, + 0xB673: 1482, + 0xB675: 1483, + 0xB677: 1484, + 0xB681: 1485, + 0xB682: 1486, + 0xB685: 1487, + 0xB689: 1488, + 0xB68A: 1489, + 0xB68B: 1490, + 0xB691: 1491, + 0xB693: 1492, + 0xB695: 1493, + 0xB697: 1494, + 0xB6A1: 1495, + 0xB6A2: 1496, + 0xB6A5: 1497, + 0xB6A9: 1498, + 0xB6B1: 1499, + 0xB6B3: 1500, + 0xB6B6: 1501, + 0xB6B7: 1502, + 0xB6C1: 1503, + 0xB6C2: 1504, + 0xB6C5: 1505, + 0xB6C9: 1506, + 0xB6D1: 1507, + 0xB6D3: 1508, + 0xB6D7: 1509, + 0xB6E1: 1510, + 0xB6E2: 1511, + 0xB6E5: 1512, + 0xB6E9: 1513, + 0xB6F1: 1514, + 0xB6F3: 1515, + 0xB6F5: 1516, + 0xB6F7: 1517, + 0xB741: 1518, + 0xB742: 1519, + 0xB745: 1520, + 0xB749: 1521, + 0xB751: 1522, + 0xB753: 1523, + 0xB755: 1524, + 0xB757: 1525, + 0xB759: 1526, + 0xB761: 1527, + 0xB762: 1528, + 0xB765: 1529, + 0xB769: 1530, + 0xB76F: 1531, + 0xB771: 1532, + 0xB773: 1533, + 0xB775: 1534, + 0xB777: 1535, + 0xB778: 1536, + 0xB779: 1537, + 0xB77A: 1538, + 0xB77B: 1539, + 0xB77C: 1540, + 0xB77D: 1541, + 0xB781: 1542, + 0xB785: 1543, + 0xB789: 1544, + 0xB791: 1545, + 0xB795: 1546, + 0xB7A1: 1547, + 0xB7A2: 1548, + 0xB7A5: 1549, + 0xB7A9: 1550, + 0xB7AA: 1551, + 0xB7AB: 1552, + 0xB7B0: 1553, + 0xB7B1: 1554, + 0xB7B3: 1555, + 0xB7B5: 1556, + 0xB7B6: 1557, + 0xB7B7: 1558, + 0xB7B8: 1559, + 0xB7BC: 1560, + 0xB861: 1561, + 0xB862: 1562, + 0xB865: 1563, + 0xB867: 1564, + 0xB868: 1565, + 0xB869: 1566, + 0xB86B: 1567, + 0xB871: 1568, + 0xB873: 1569, + 0xB875: 1570, + 0xB876: 1571, + 0xB877: 1572, + 0xB878: 1573, + 0xB881: 1574, + 0xB882: 1575, + 0xB885: 1576, + 0xB889: 1577, + 0xB891: 1578, + 0xB893: 1579, + 0xB895: 1580, + 0xB896: 1581, + 0xB897: 1582, + 0xB8A1: 1583, + 0xB8A2: 1584, + 0xB8A5: 1585, + 0xB8A7: 1586, + 0xB8A9: 1587, + 0xB8B1: 1588, + 0xB8B7: 1589, + 0xB8C1: 1590, + 0xB8C5: 1591, + 0xB8C9: 1592, + 0xB8E1: 1593, + 0xB8E2: 1594, + 0xB8E5: 1595, + 0xB8E9: 1596, + 0xB8EB: 1597, + 0xB8F1: 1598, + 0xB8F3: 1599, + 0xB8F5: 1600, + 0xB8F7: 1601, + 0xB8F8: 1602, + 0xB941: 1603, + 0xB942: 1604, + 0xB945: 1605, + 0xB949: 1606, + 0xB951: 1607, + 0xB953: 1608, + 0xB955: 1609, + 0xB957: 1610, + 0xB961: 1611, + 0xB965: 1612, + 0xB969: 1613, + 0xB971: 1614, + 0xB973: 1615, + 0xB976: 1616, + 0xB977: 1617, + 0xB981: 1618, + 0xB9A1: 1619, + 0xB9A2: 1620, + 0xB9A5: 1621, + 0xB9A9: 1622, + 0xB9AB: 1623, + 0xB9B1: 1624, + 0xB9B3: 1625, + 0xB9B5: 1626, + 0xB9B7: 1627, + 0xB9B8: 1628, + 0xB9B9: 1629, + 0xB9BD: 1630, + 0xB9C1: 1631, + 0xB9C2: 1632, + 0xB9C9: 1633, + 0xB9D3: 1634, + 0xB9D5: 1635, + 0xB9D7: 1636, + 0xB9E1: 1637, + 0xB9F6: 1638, + 0xB9F7: 1639, + 0xBA41: 1640, + 0xBA45: 1641, + 0xBA49: 1642, + 0xBA51: 1643, + 0xBA53: 1644, + 0xBA55: 1645, + 0xBA57: 1646, + 0xBA61: 1647, + 0xBA62: 1648, + 0xBA65: 1649, + 0xBA77: 1650, + 0xBA81: 1651, + 0xBA82: 1652, + 0xBA85: 1653, + 0xBA89: 1654, + 0xBA8A: 1655, + 0xBA8B: 1656, + 0xBA91: 1657, + 0xBA93: 1658, + 0xBA95: 1659, + 0xBA97: 1660, + 0xBAA1: 1661, + 0xBAB6: 1662, + 0xBAC1: 1663, + 0xBAE1: 1664, + 0xBAE2: 1665, + 0xBAE5: 1666, + 0xBAE9: 1667, + 0xBAF1: 1668, + 0xBAF3: 1669, + 0xBAF5: 1670, + 0xBB41: 1671, + 0xBB45: 1672, + 0xBB49: 1673, + 0xBB51: 1674, + 0xBB61: 1675, + 0xBB62: 1676, + 0xBB65: 1677, + 0xBB69: 1678, + 0xBB71: 1679, + 0xBB73: 1680, + 0xBB75: 1681, + 0xBB77: 1682, + 0xBBA1: 1683, + 0xBBA2: 1684, + 0xBBA5: 1685, + 0xBBA8: 1686, + 0xBBA9: 1687, + 0xBBAB: 1688, + 0xBBB1: 1689, + 0xBBB3: 1690, + 0xBBB5: 1691, + 0xBBB7: 1692, + 0xBBB8: 1693, + 0xBBBB: 1694, + 0xBBBC: 1695, + 0xBC61: 1696, + 0xBC62: 1697, + 0xBC65: 1698, + 0xBC67: 1699, + 0xBC69: 1700, + 0xBC6C: 1701, + 0xBC71: 1702, + 0xBC73: 1703, + 0xBC75: 1704, + 0xBC76: 1705, + 0xBC77: 1706, + 0xBC81: 1707, + 0xBC82: 1708, + 0xBC85: 1709, + 0xBC89: 1710, + 0xBC91: 1711, + 0xBC93: 1712, + 0xBC95: 1713, + 0xBC96: 1714, + 0xBC97: 1715, + 0xBCA1: 1716, + 0xBCA5: 1717, + 0xBCB7: 1718, + 0xBCE1: 1719, + 0xBCE2: 1720, + 0xBCE5: 1721, + 0xBCE9: 1722, + 0xBCF1: 1723, + 0xBCF3: 1724, + 0xBCF5: 1725, + 0xBCF6: 1726, + 0xBCF7: 1727, + 0xBD41: 1728, + 0xBD57: 1729, + 0xBD61: 1730, + 0xBD76: 1731, + 0xBDA1: 1732, + 0xBDA2: 1733, + 0xBDA5: 1734, + 0xBDA9: 1735, + 0xBDB1: 1736, + 0xBDB3: 1737, + 0xBDB5: 1738, + 0xBDB7: 1739, + 0xBDB9: 1740, + 0xBDC1: 1741, + 0xBDC2: 1742, + 0xBDC9: 1743, + 0xBDD6: 1744, + 0xBDE1: 1745, + 0xBDF6: 1746, + 0xBE41: 1747, + 0xBE45: 1748, + 0xBE49: 1749, + 0xBE51: 1750, + 0xBE53: 1751, + 0xBE77: 1752, + 0xBE81: 1753, + 0xBE82: 1754, + 0xBE85: 1755, + 0xBE89: 1756, + 0xBE91: 1757, + 0xBE93: 1758, + 0xBE97: 1759, + 0xBEA1: 1760, + 0xBEB6: 1761, + 0xBEB7: 1762, + 0xBEE1: 1763, + 0xBF41: 1764, + 0xBF61: 1765, + 0xBF71: 1766, + 0xBF75: 1767, + 0xBF77: 1768, + 0xBFA1: 1769, + 0xBFA2: 1770, + 0xBFA5: 1771, + 0xBFA9: 1772, + 0xBFB1: 1773, + 0xBFB3: 1774, + 0xBFB7: 1775, + 0xBFB8: 1776, + 0xBFBD: 1777, + 0xC061: 1778, + 0xC062: 1779, + 0xC065: 1780, + 0xC067: 1781, + 0xC069: 1782, + 0xC071: 1783, + 0xC073: 1784, + 0xC075: 1785, + 0xC076: 1786, + 0xC077: 1787, + 0xC078: 1788, + 0xC081: 1789, + 0xC082: 1790, + 0xC085: 1791, + 0xC089: 1792, + 0xC091: 1793, + 0xC093: 1794, + 0xC095: 1795, + 0xC096: 1796, + 0xC097: 1797, + 0xC0A1: 1798, + 0xC0A5: 1799, + 0xC0A7: 1800, + 0xC0A9: 1801, + 0xC0B1: 1802, + 0xC0B7: 1803, + 0xC0E1: 1804, + 0xC0E2: 1805, + 0xC0E5: 1806, + 0xC0E9: 1807, + 0xC0F1: 1808, + 0xC0F3: 1809, + 0xC0F5: 1810, + 0xC0F6: 1811, + 0xC0F7: 1812, + 0xC141: 1813, + 0xC142: 1814, + 0xC145: 1815, + 0xC149: 1816, + 0xC151: 1817, + 0xC153: 1818, + 0xC155: 1819, + 0xC157: 1820, + 0xC161: 1821, + 0xC165: 1822, + 0xC176: 1823, + 0xC181: 1824, + 0xC185: 1825, + 0xC197: 1826, + 0xC1A1: 1827, + 0xC1A2: 1828, + 0xC1A5: 1829, + 0xC1A9: 1830, + 0xC1B1: 1831, + 0xC1B3: 1832, + 0xC1B5: 1833, + 0xC1B7: 1834, + 0xC1C1: 1835, + 0xC1C5: 1836, + 0xC1C9: 1837, + 0xC1D7: 1838, + 0xC241: 1839, + 0xC245: 1840, + 0xC249: 1841, + 0xC251: 1842, + 0xC253: 1843, + 0xC255: 1844, + 0xC257: 1845, + 0xC261: 1846, + 0xC271: 1847, + 0xC281: 1848, + 0xC282: 1849, + 0xC285: 1850, + 0xC289: 1851, + 0xC291: 1852, + 0xC293: 1853, + 0xC295: 1854, + 0xC297: 1855, + 0xC2A1: 1856, + 0xC2B6: 1857, + 0xC2C1: 1858, + 0xC2C5: 1859, + 0xC2E1: 1860, + 0xC2E5: 1861, + 0xC2E9: 1862, + 0xC2F1: 1863, + 0xC2F3: 1864, + 0xC2F5: 1865, + 0xC2F7: 1866, + 0xC341: 1867, + 0xC345: 1868, + 0xC349: 1869, + 0xC351: 1870, + 0xC357: 1871, + 0xC361: 1872, + 0xC362: 1873, + 0xC365: 1874, + 0xC369: 1875, + 0xC371: 1876, + 0xC373: 1877, + 0xC375: 1878, + 0xC377: 1879, + 0xC3A1: 1880, + 0xC3A2: 1881, + 0xC3A5: 1882, + 0xC3A8: 1883, + 0xC3A9: 1884, + 0xC3AA: 1885, + 0xC3B1: 1886, + 0xC3B3: 1887, + 0xC3B5: 1888, + 0xC3B7: 1889, + 0xC461: 1890, + 0xC462: 1891, + 0xC465: 1892, + 0xC469: 1893, + 0xC471: 1894, + 0xC473: 1895, + 0xC475: 1896, + 0xC477: 1897, + 0xC481: 1898, + 0xC482: 1899, + 0xC485: 1900, + 0xC489: 1901, + 0xC491: 1902, + 0xC493: 1903, + 0xC495: 1904, + 0xC496: 1905, + 0xC497: 1906, + 0xC4A1: 1907, + 0xC4A2: 1908, + 0xC4B7: 1909, + 0xC4E1: 1910, + 0xC4E2: 1911, + 0xC4E5: 1912, + 0xC4E8: 1913, + 0xC4E9: 1914, + 0xC4F1: 1915, + 0xC4F3: 1916, + 0xC4F5: 1917, + 0xC4F6: 1918, + 0xC4F7: 1919, + 0xC541: 1920, + 0xC542: 1921, + 0xC545: 1922, + 0xC549: 1923, + 0xC551: 1924, + 0xC553: 1925, + 0xC555: 1926, + 0xC557: 1927, + 0xC561: 1928, + 0xC565: 1929, + 0xC569: 1930, + 0xC571: 1931, + 0xC573: 1932, + 0xC575: 1933, + 0xC576: 1934, + 0xC577: 1935, + 0xC581: 1936, + 0xC5A1: 1937, + 0xC5A2: 1938, + 0xC5A5: 1939, + 0xC5A9: 1940, + 0xC5B1: 1941, + 0xC5B3: 1942, + 0xC5B5: 1943, + 0xC5B7: 1944, + 0xC5C1: 1945, + 0xC5C2: 1946, + 0xC5C5: 1947, + 0xC5C9: 1948, + 0xC5D1: 1949, + 0xC5D7: 1950, + 0xC5E1: 1951, + 0xC5F7: 1952, + 0xC641: 1953, + 0xC649: 1954, + 0xC661: 1955, + 0xC681: 1956, + 0xC682: 1957, + 0xC685: 1958, + 0xC689: 1959, + 0xC691: 1960, + 0xC693: 1961, + 0xC695: 1962, + 0xC697: 1963, + 0xC6A1: 1964, + 0xC6A5: 1965, + 0xC6A9: 1966, + 0xC6B7: 1967, + 0xC6C1: 1968, + 0xC6D7: 1969, + 0xC6E1: 1970, + 0xC6E2: 1971, + 0xC6E5: 1972, + 0xC6E9: 1973, + 0xC6F1: 1974, + 0xC6F3: 1975, + 0xC6F5: 1976, + 0xC6F7: 1977, + 0xC741: 1978, + 0xC745: 1979, + 0xC749: 1980, + 0xC751: 1981, + 0xC761: 1982, + 0xC762: 1983, + 0xC765: 1984, + 0xC769: 1985, + 0xC771: 1986, + 0xC773: 1987, + 0xC777: 1988, + 0xC7A1: 1989, + 0xC7A2: 1990, + 0xC7A5: 1991, + 0xC7A9: 1992, + 0xC7B1: 1993, + 0xC7B3: 1994, + 0xC7B5: 1995, + 0xC7B7: 1996, + 0xC861: 1997, + 0xC862: 1998, + 0xC865: 1999, + 0xC869: 2000, + 0xC86A: 2001, + 0xC871: 2002, + 0xC873: 2003, + 0xC875: 2004, + 0xC876: 2005, + 0xC877: 2006, + 0xC881: 2007, + 0xC882: 2008, + 0xC885: 2009, + 0xC889: 2010, + 0xC891: 2011, + 0xC893: 2012, + 0xC895: 2013, + 0xC896: 2014, + 0xC897: 2015, + 0xC8A1: 2016, + 0xC8B7: 2017, + 0xC8E1: 2018, + 0xC8E2: 2019, + 0xC8E5: 2020, + 0xC8E9: 2021, + 0xC8EB: 2022, + 0xC8F1: 2023, + 0xC8F3: 2024, + 0xC8F5: 2025, + 0xC8F6: 2026, + 0xC8F7: 2027, + 0xC941: 2028, + 0xC942: 2029, + 0xC945: 2030, + 0xC949: 2031, + 0xC951: 2032, + 0xC953: 2033, + 0xC955: 2034, + 0xC957: 2035, + 0xC961: 2036, + 0xC965: 2037, + 0xC976: 2038, + 0xC981: 2039, + 0xC985: 2040, + 0xC9A1: 2041, + 0xC9A2: 2042, + 0xC9A5: 2043, + 0xC9A9: 2044, + 0xC9B1: 2045, + 0xC9B3: 2046, + 0xC9B5: 2047, + 0xC9B7: 2048, + 0xC9BC: 2049, + 0xC9C1: 2050, + 0xC9C5: 2051, + 0xC9E1: 2052, + 0xCA41: 2053, + 0xCA45: 2054, + 0xCA55: 2055, + 0xCA57: 2056, + 0xCA61: 2057, + 0xCA81: 2058, + 0xCA82: 2059, + 0xCA85: 2060, + 0xCA89: 2061, + 0xCA91: 2062, + 0xCA93: 2063, + 0xCA95: 2064, + 0xCA97: 2065, + 0xCAA1: 2066, + 0xCAB6: 2067, + 0xCAC1: 2068, + 0xCAE1: 2069, + 0xCAE2: 2070, + 0xCAE5: 2071, + 0xCAE9: 2072, + 0xCAF1: 2073, + 0xCAF3: 2074, + 0xCAF7: 2075, + 0xCB41: 2076, + 0xCB45: 2077, + 0xCB49: 2078, + 0xCB51: 2079, + 0xCB57: 2080, + 0xCB61: 2081, + 0xCB62: 2082, + 0xCB65: 2083, + 0xCB68: 2084, + 0xCB69: 2085, + 0xCB6B: 2086, + 0xCB71: 2087, + 0xCB73: 2088, + 0xCB75: 2089, + 0xCB81: 2090, + 0xCB85: 2091, + 0xCB89: 2092, + 0xCB91: 2093, + 0xCB93: 2094, + 0xCBA1: 2095, + 0xCBA2: 2096, + 0xCBA5: 2097, + 0xCBA9: 2098, + 0xCBB1: 2099, + 0xCBB3: 2100, + 0xCBB5: 2101, + 0xCBB7: 2102, + 0xCC61: 2103, + 0xCC62: 2104, + 0xCC63: 2105, + 0xCC65: 2106, + 0xCC69: 2107, + 0xCC6B: 2108, + 0xCC71: 2109, + 0xCC73: 2110, + 0xCC75: 2111, + 0xCC76: 2112, + 0xCC77: 2113, + 0xCC7B: 2114, + 0xCC81: 2115, + 0xCC82: 2116, + 0xCC85: 2117, + 0xCC89: 2118, + 0xCC91: 2119, + 0xCC93: 2120, + 0xCC95: 2121, + 0xCC96: 2122, + 0xCC97: 2123, + 0xCCA1: 2124, + 0xCCA2: 2125, + 0xCCE1: 2126, + 0xCCE2: 2127, + 0xCCE5: 2128, + 0xCCE9: 2129, + 0xCCF1: 2130, + 0xCCF3: 2131, + 0xCCF5: 2132, + 0xCCF6: 2133, + 0xCCF7: 2134, + 0xCD41: 2135, + 0xCD42: 2136, + 0xCD45: 2137, + 0xCD49: 2138, + 0xCD51: 2139, + 0xCD53: 2140, + 0xCD55: 2141, + 0xCD57: 2142, + 0xCD61: 2143, + 0xCD65: 2144, + 0xCD69: 2145, + 0xCD71: 2146, + 0xCD73: 2147, + 0xCD76: 2148, + 0xCD77: 2149, + 0xCD81: 2150, + 0xCD89: 2151, + 0xCD93: 2152, + 0xCD95: 2153, + 0xCDA1: 2154, + 0xCDA2: 2155, + 0xCDA5: 2156, + 0xCDA9: 2157, + 0xCDB1: 2158, + 0xCDB3: 2159, + 0xCDB5: 2160, + 0xCDB7: 2161, + 0xCDC1: 2162, + 0xCDD7: 2163, + 0xCE41: 2164, + 0xCE45: 2165, + 0xCE61: 2166, + 0xCE65: 2167, + 0xCE69: 2168, + 0xCE73: 2169, + 0xCE75: 2170, + 0xCE81: 2171, + 0xCE82: 2172, + 0xCE85: 2173, + 0xCE88: 2174, + 0xCE89: 2175, + 0xCE8B: 2176, + 0xCE91: 2177, + 0xCE93: 2178, + 0xCE95: 2179, + 0xCE97: 2180, + 0xCEA1: 2181, + 0xCEB7: 2182, + 0xCEE1: 2183, + 0xCEE5: 2184, + 0xCEE9: 2185, + 0xCEF1: 2186, + 0xCEF5: 2187, + 0xCF41: 2188, + 0xCF45: 2189, + 0xCF49: 2190, + 0xCF51: 2191, + 0xCF55: 2192, + 0xCF57: 2193, + 0xCF61: 2194, + 0xCF65: 2195, + 0xCF69: 2196, + 0xCF71: 2197, + 0xCF73: 2198, + 0xCF75: 2199, + 0xCFA1: 2200, + 0xCFA2: 2201, + 0xCFA5: 2202, + 0xCFA9: 2203, + 0xCFB1: 2204, + 0xCFB3: 2205, + 0xCFB5: 2206, + 0xCFB7: 2207, + 0xD061: 2208, + 0xD062: 2209, + 0xD065: 2210, + 0xD069: 2211, + 0xD06E: 2212, + 0xD071: 2213, + 0xD073: 2214, + 0xD075: 2215, + 0xD077: 2216, + 0xD081: 2217, + 0xD082: 2218, + 0xD085: 2219, + 0xD089: 2220, + 0xD091: 2221, + 0xD093: 2222, + 0xD095: 2223, + 0xD096: 2224, + 0xD097: 2225, + 0xD0A1: 2226, + 0xD0B7: 2227, + 0xD0E1: 2228, + 0xD0E2: 2229, + 0xD0E5: 2230, + 0xD0E9: 2231, + 0xD0EB: 2232, + 0xD0F1: 2233, + 0xD0F3: 2234, + 0xD0F5: 2235, + 0xD0F7: 2236, + 0xD141: 2237, + 0xD142: 2238, + 0xD145: 2239, + 0xD149: 2240, + 0xD151: 2241, + 0xD153: 2242, + 0xD155: 2243, + 0xD157: 2244, + 0xD161: 2245, + 0xD162: 2246, + 0xD165: 2247, + 0xD169: 2248, + 0xD171: 2249, + 0xD173: 2250, + 0xD175: 2251, + 0xD176: 2252, + 0xD177: 2253, + 0xD181: 2254, + 0xD185: 2255, + 0xD189: 2256, + 0xD193: 2257, + 0xD1A1: 2258, + 0xD1A2: 2259, + 0xD1A5: 2260, + 0xD1A9: 2261, + 0xD1AE: 2262, + 0xD1B1: 2263, + 0xD1B3: 2264, + 0xD1B5: 2265, + 0xD1B7: 2266, + 0xD1BB: 2267, + 0xD1C1: 2268, + 0xD1C2: 2269, + 0xD1C5: 2270, + 0xD1C9: 2271, + 0xD1D5: 2272, + 0xD1D7: 2273, + 0xD1E1: 2274, + 0xD1E2: 2275, + 0xD1E5: 2276, + 0xD1F5: 2277, + 0xD1F7: 2278, + 0xD241: 2279, + 0xD242: 2280, + 0xD245: 2281, + 0xD249: 2282, + 0xD253: 2283, + 0xD255: 2284, + 0xD257: 2285, + 0xD261: 2286, + 0xD265: 2287, + 0xD269: 2288, + 0xD273: 2289, + 0xD275: 2290, + 0xD281: 2291, + 0xD282: 2292, + 0xD285: 2293, + 0xD289: 2294, + 0xD28E: 2295, + 0xD291: 2296, + 0xD295: 2297, + 0xD297: 2298, + 0xD2A1: 2299, + 0xD2A5: 2300, + 0xD2A9: 2301, + 0xD2B1: 2302, + 0xD2B7: 2303, + 0xD2C1: 2304, + 0xD2C2: 2305, + 0xD2C5: 2306, + 0xD2C9: 2307, + 0xD2D7: 2308, + 0xD2E1: 2309, + 0xD2E2: 2310, + 0xD2E5: 2311, + 0xD2E9: 2312, + 0xD2F1: 2313, + 0xD2F3: 2314, + 0xD2F5: 2315, + 0xD2F7: 2316, + 0xD341: 2317, + 0xD342: 2318, + 0xD345: 2319, + 0xD349: 2320, + 0xD351: 2321, + 0xD355: 2322, + 0xD357: 2323, + 0xD361: 2324, + 0xD362: 2325, + 0xD365: 2326, + 0xD367: 2327, + 0xD368: 2328, + 0xD369: 2329, + 0xD36A: 2330, + 0xD371: 2331, + 0xD373: 2332, + 0xD375: 2333, + 0xD377: 2334, + 0xD37B: 2335, + 0xD381: 2336, + 0xD385: 2337, + 0xD389: 2338, + 0xD391: 2339, + 0xD393: 2340, + 0xD397: 2341, + 0xD3A1: 2342, + 0xD3A2: 2343, + 0xD3A5: 2344, + 0xD3A9: 2345, + 0xD3B1: 2346, + 0xD3B3: 2347, + 0xD3B5: 2348, + 0xD3B7: 2349, +} diff --git a/libs/common/chardet/compat.py b/libs/common/chardet/johabprober.py similarity index 50% rename from libs/common/chardet/compat.py rename to libs/common/chardet/johabprober.py index 8941572b..d7364ba6 100644 --- a/libs/common/chardet/compat.py +++ b/libs/common/chardet/johabprober.py @@ -1,7 +1,13 @@ ######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# # Contributor(s): -# Dan Blanchard -# Ian Cordasco +# Mark Pilgrim - port to Python # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -19,18 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import sys +from .chardistribution import JOHABDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber +from .mbcssm import JOHAB_SM_MODEL -if sys.version_info < (3, 0): - PY2 = True - PY3 = False - string_types = (str, unicode) - text_type = unicode - iteritems = dict.iteritems -else: - PY2 = False - PY3 = True - string_types = (bytes, str) - text_type = str - iteritems = dict.items +class JOHABProber(MultiByteCharSetProber): + def __init__(self) -> None: + super().__init__() + self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL) + self.distribution_analyzer = JOHABDistributionAnalysis() + self.reset() + + @property + def charset_name(self) -> str: + return "Johab" + + @property + def language(self) -> str: + return "Korean" diff --git a/libs/common/chardet/jpcntx.py b/libs/common/chardet/jpcntx.py index 20044e4b..2f53bdda 100644 --- a/libs/common/chardet/jpcntx.py +++ b/libs/common/chardet/jpcntx.py @@ -25,110 +25,114 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Tuple, Union # This is hiragana 2-char sequence table, the number in each cell represents its frequency category -jp2CharContext = ( -(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), -(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), -(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), -(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), -(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), -(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), -(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), -(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), -(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), -(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), -(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), -(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), -(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), -(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), -(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), -(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), -(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), -(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), -(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), -(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), -(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), -(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), -(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), -(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), -(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), -(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), -(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), -(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), -(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), -(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), -(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), -(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), -(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), -(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), -(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), -(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), -(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), -(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), -(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), -(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), -(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), -(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), -(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), -(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), -(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), -(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), -(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), -(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), -(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), -(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), -(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), -(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), -(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), -(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), -(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), -(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), -(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), -(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), -(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), -(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), -(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), -(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), -(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), -(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), -(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), -(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), -(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), -(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), -(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), -(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), -(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), -(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), -(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), -(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +# fmt: off +jp2_char_context = ( + (0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), + (2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3, 3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5, 3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3, 0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4), + (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2), + (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4, 3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5, 5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3, 2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5, 5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3, 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3, 3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5, 4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2, 2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3, 3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3, 2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2, 2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4), + (1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4, 3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5, 5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3, 3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4), + (0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4, 3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5, 3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3), + (0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4, 3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5, 5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3, 3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3), + (0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3, 3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4, 3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1, 0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3), + (0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4, 2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5, 4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4), + (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3, 0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3, 4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3, 2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3), + (2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4, 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4, 4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1, 3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4), + (0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3, 2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4, 4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3), + (0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3, 3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4, 5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3, 3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5), + (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3, 4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2, 2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2, 1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3), + (2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4, 3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4, 4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3, 3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5), + (0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2, 0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3, 1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4), + (1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4, 1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5, 4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4, 4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4), + (0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3, 0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4, 3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3, 2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3), + (0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3, 0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4, 4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3, 3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3), + (0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3, 0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3, 5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3, 2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3), + (0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3, 3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4, 4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2, 3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5), + (0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1, 3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4, 2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4), + (0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1, 0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3, 3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2, 3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5), + (0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1, 0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2, 3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3, 1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3), + (0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3, 2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4, 4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3, 3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4), + (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5, 1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4, 4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3, 1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4), + (0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3, 1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3, 2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4), + (0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1, 2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1), + (0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0, 1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), + (1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3, 0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4, 4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3, 3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3), + (0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2, 0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0), + (0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4, 3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4, 4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5, 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3), + (0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4, 3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5, 4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4, 3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3), + (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4, 3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5, 5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4, 3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5), + (0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3, 0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4, 4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2, 1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4), + (2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5, 2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3, 4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4, 3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5), + (0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4, 2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5, 3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3, 3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3), + (0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0, 0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3, 3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0, 0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3), + (0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3, 1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3, 3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1, 3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3), + (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4, 2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5, 5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5, 3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3), + (0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4, 4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5, 3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4, 4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4), + (0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3, 3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5, 3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1, 2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4), + (0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2), + (0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3, 3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3), + (0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2, 3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3, 4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1, 1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3), + (0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3, 3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3, 3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3, 2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3), + (0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3, 2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3, 3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1, 2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4), + (0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3), + (0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3, 3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3, 2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1, 3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4), + (0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3, 1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3, 3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3), + (0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3), + (0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2, 2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1, 0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1, 1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4), + (0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3, 0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1, 1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4), + (0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3), + (2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4, 4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3, 2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4), + (0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3, 2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4), + (0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3, 2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3, 3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1, 2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3), + (0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3, 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4, 5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2, 2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4), + (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4, 1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5, 4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4), + (1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1, 0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4, 3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2, 2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4), + (0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3, 1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4, 3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4, 3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3), + (0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2), + (0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1, 1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2), + (0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3), + (0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3, 0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3, 2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3, 1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3), + (0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5, 5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5), + (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4, 3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4, 5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3, 3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3), + (0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4, 1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5, 5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3, 2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4), + (1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3, 4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5, 5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2, 3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4), + (0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3, 2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4, 4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3, 1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3, 4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5, 3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1, 2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2), + (0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4, 3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4, 3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3), + (0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5, 4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3, 3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1), ) +# fmt: on -class JapaneseContextAnalysis(object): + +class JapaneseContextAnalysis: NUM_OF_CATEGORY = 6 DONT_KNOW = -1 ENOUGH_REL_THRESHOLD = 100 MAX_REL_THRESHOLD = 1000 MINIMUM_DATA_THRESHOLD = 4 - def __init__(self): - self._total_rel = None - self._rel_sample = None - self._need_to_skip_char_num = None - self._last_char_order = None - self._done = None + def __init__(self) -> None: + self._total_rel = 0 + self._rel_sample: List[int] = [] + self._need_to_skip_char_num = 0 + self._last_char_order = -1 + self._done = False self.reset() - def reset(self): + def reset(self) -> None: self._total_rel = 0 # total sequence received # category counters, each integer counts sequence in its category self._rel_sample = [0] * self.NUM_OF_CATEGORY @@ -140,7 +144,7 @@ class JapaneseContextAnalysis(object): # been made self._done = False - def feed(self, byte_str, num_bytes): + def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None: if self._done: return @@ -153,7 +157,7 @@ class JapaneseContextAnalysis(object): # this character will simply our logic and improve performance. i = self._need_to_skip_char_num while i < num_bytes: - order, char_len = self.get_order(byte_str[i:i + 2]) + order, char_len = self.get_order(byte_str[i : i + 2]) i += char_len if i > num_bytes: self._need_to_skip_char_num = i - num_bytes @@ -164,32 +168,34 @@ class JapaneseContextAnalysis(object): if self._total_rel > self.MAX_REL_THRESHOLD: self._done = True break - self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 + self._rel_sample[ + jp2_char_context[self._last_char_order][order] + ] += 1 self._last_char_order = order - def got_enough_data(self): + def got_enough_data(self) -> bool: return self._total_rel > self.ENOUGH_REL_THRESHOLD - def get_confidence(self): + def get_confidence(self) -> float: # This is just one way to calculate confidence. It works well for me. if self._total_rel > self.MINIMUM_DATA_THRESHOLD: return (self._total_rel - self._rel_sample[0]) / self._total_rel - else: - return self.DONT_KNOW + return self.DONT_KNOW - def get_order(self, byte_str): + def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]: return -1, 1 + class SJISContextAnalysis(JapaneseContextAnalysis): - def __init__(self): - super(SJISContextAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._charset_name = "SHIFT_JIS" @property - def charset_name(self): + def charset_name(self) -> str: return self._charset_name - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]: if not byte_str: return -1, 1 # find out current char's byte length @@ -209,8 +215,9 @@ class SJISContextAnalysis(JapaneseContextAnalysis): return -1, char_len + class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]: if not byte_str: return -1, 1 # find out current char's byte length @@ -229,5 +236,3 @@ class EUCJPContextAnalysis(JapaneseContextAnalysis): return second_char - 0xA1, char_len return -1, char_len - - diff --git a/libs/common/chardet/langbulgarianmodel.py b/libs/common/chardet/langbulgarianmodel.py index 561bfd90..2f771bb8 100644 --- a/libs/common/chardet/langbulgarianmodel.py +++ b/libs/common/chardet/langbulgarianmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,536 +4111,539 @@ BULGARIAN_LANG_MODEL = { # Character Mapping Table(s): ISO_8859_5_BULGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 77, # 'A' - 66: 90, # 'B' - 67: 99, # 'C' - 68: 100, # 'D' - 69: 72, # 'E' - 70: 109, # 'F' - 71: 107, # 'G' - 72: 101, # 'H' - 73: 79, # 'I' - 74: 185, # 'J' - 75: 81, # 'K' - 76: 102, # 'L' - 77: 76, # 'M' - 78: 94, # 'N' - 79: 82, # 'O' - 80: 110, # 'P' - 81: 186, # 'Q' - 82: 108, # 'R' - 83: 91, # 'S' - 84: 74, # 'T' - 85: 119, # 'U' - 86: 84, # 'V' - 87: 96, # 'W' - 88: 111, # 'X' - 89: 187, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 65, # 'a' - 98: 69, # 'b' - 99: 70, # 'c' - 100: 66, # 'd' - 101: 63, # 'e' - 102: 68, # 'f' - 103: 112, # 'g' - 104: 103, # 'h' - 105: 92, # 'i' - 106: 194, # 'j' - 107: 104, # 'k' - 108: 95, # 'l' - 109: 86, # 'm' - 110: 87, # 'n' - 111: 71, # 'o' - 112: 116, # 'p' - 113: 195, # 'q' - 114: 85, # 'r' - 115: 93, # 's' - 116: 97, # 't' - 117: 113, # 'u' - 118: 196, # 'v' - 119: 197, # 'w' - 120: 198, # 'x' - 121: 199, # 'y' - 122: 200, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 194, # '\x80' - 129: 195, # '\x81' - 130: 196, # '\x82' - 131: 197, # '\x83' - 132: 198, # '\x84' - 133: 199, # '\x85' - 134: 200, # '\x86' - 135: 201, # '\x87' - 136: 202, # '\x88' - 137: 203, # '\x89' - 138: 204, # '\x8a' - 139: 205, # '\x8b' - 140: 206, # '\x8c' - 141: 207, # '\x8d' - 142: 208, # '\x8e' - 143: 209, # '\x8f' - 144: 210, # '\x90' - 145: 211, # '\x91' - 146: 212, # '\x92' - 147: 213, # '\x93' - 148: 214, # '\x94' - 149: 215, # '\x95' - 150: 216, # '\x96' - 151: 217, # '\x97' - 152: 218, # '\x98' - 153: 219, # '\x99' - 154: 220, # '\x9a' - 155: 221, # '\x9b' - 156: 222, # '\x9c' - 157: 223, # '\x9d' - 158: 224, # '\x9e' - 159: 225, # '\x9f' - 160: 81, # '\xa0' - 161: 226, # 'Ё' - 162: 227, # 'Ђ' - 163: 228, # 'Ѓ' - 164: 229, # 'Є' - 165: 230, # 'Ѕ' - 166: 105, # 'І' - 167: 231, # 'Ї' - 168: 232, # 'Ј' - 169: 233, # 'Љ' - 170: 234, # 'Њ' - 171: 235, # 'Ћ' - 172: 236, # 'Ќ' - 173: 45, # '\xad' - 174: 237, # 'Ў' - 175: 238, # 'Џ' - 176: 31, # 'А' - 177: 32, # 'Б' - 178: 35, # 'В' - 179: 43, # 'Г' - 180: 37, # 'Д' - 181: 44, # 'Е' - 182: 55, # 'Ж' - 183: 47, # 'З' - 184: 40, # 'И' - 185: 59, # 'Й' - 186: 33, # 'К' - 187: 46, # 'Л' - 188: 38, # 'М' - 189: 36, # 'Н' - 190: 41, # 'О' - 191: 30, # 'П' - 192: 39, # 'Р' - 193: 28, # 'С' - 194: 34, # 'Т' - 195: 51, # 'У' - 196: 48, # 'Ф' - 197: 49, # 'Х' - 198: 53, # 'Ц' - 199: 50, # 'Ч' - 200: 54, # 'Ш' - 201: 57, # 'Щ' - 202: 61, # 'Ъ' - 203: 239, # 'Ы' - 204: 67, # 'Ь' - 205: 240, # 'Э' - 206: 60, # 'Ю' - 207: 56, # 'Я' - 208: 1, # 'а' - 209: 18, # 'б' - 210: 9, # 'в' - 211: 20, # 'г' - 212: 11, # 'д' - 213: 3, # 'е' - 214: 23, # 'ж' - 215: 15, # 'з' - 216: 2, # 'и' - 217: 26, # 'й' - 218: 12, # 'к' - 219: 10, # 'л' - 220: 14, # 'м' - 221: 6, # 'н' - 222: 4, # 'о' - 223: 13, # 'п' - 224: 7, # 'р' - 225: 8, # 'с' - 226: 5, # 'т' - 227: 19, # 'у' - 228: 29, # 'ф' - 229: 25, # 'х' - 230: 22, # 'ц' - 231: 21, # 'ч' - 232: 27, # 'ш' - 233: 24, # 'щ' - 234: 17, # 'ъ' - 235: 75, # 'ы' - 236: 52, # 'ь' - 237: 241, # 'э' - 238: 42, # 'ю' - 239: 16, # 'я' - 240: 62, # '№' - 241: 242, # 'ё' - 242: 243, # 'ђ' - 243: 244, # 'ѓ' - 244: 58, # 'є' - 245: 245, # 'ѕ' - 246: 98, # 'і' - 247: 246, # 'ї' - 248: 247, # 'ј' - 249: 248, # 'љ' - 250: 249, # 'њ' - 251: 250, # 'ћ' - 252: 251, # 'ќ' - 253: 91, # '§' - 254: 252, # 'ў' - 255: 253, # 'џ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 77, # 'A' + 66: 90, # 'B' + 67: 99, # 'C' + 68: 100, # 'D' + 69: 72, # 'E' + 70: 109, # 'F' + 71: 107, # 'G' + 72: 101, # 'H' + 73: 79, # 'I' + 74: 185, # 'J' + 75: 81, # 'K' + 76: 102, # 'L' + 77: 76, # 'M' + 78: 94, # 'N' + 79: 82, # 'O' + 80: 110, # 'P' + 81: 186, # 'Q' + 82: 108, # 'R' + 83: 91, # 'S' + 84: 74, # 'T' + 85: 119, # 'U' + 86: 84, # 'V' + 87: 96, # 'W' + 88: 111, # 'X' + 89: 187, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 65, # 'a' + 98: 69, # 'b' + 99: 70, # 'c' + 100: 66, # 'd' + 101: 63, # 'e' + 102: 68, # 'f' + 103: 112, # 'g' + 104: 103, # 'h' + 105: 92, # 'i' + 106: 194, # 'j' + 107: 104, # 'k' + 108: 95, # 'l' + 109: 86, # 'm' + 110: 87, # 'n' + 111: 71, # 'o' + 112: 116, # 'p' + 113: 195, # 'q' + 114: 85, # 'r' + 115: 93, # 's' + 116: 97, # 't' + 117: 113, # 'u' + 118: 196, # 'v' + 119: 197, # 'w' + 120: 198, # 'x' + 121: 199, # 'y' + 122: 200, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 194, # '\x80' + 129: 195, # '\x81' + 130: 196, # '\x82' + 131: 197, # '\x83' + 132: 198, # '\x84' + 133: 199, # '\x85' + 134: 200, # '\x86' + 135: 201, # '\x87' + 136: 202, # '\x88' + 137: 203, # '\x89' + 138: 204, # '\x8a' + 139: 205, # '\x8b' + 140: 206, # '\x8c' + 141: 207, # '\x8d' + 142: 208, # '\x8e' + 143: 209, # '\x8f' + 144: 210, # '\x90' + 145: 211, # '\x91' + 146: 212, # '\x92' + 147: 213, # '\x93' + 148: 214, # '\x94' + 149: 215, # '\x95' + 150: 216, # '\x96' + 151: 217, # '\x97' + 152: 218, # '\x98' + 153: 219, # '\x99' + 154: 220, # '\x9a' + 155: 221, # '\x9b' + 156: 222, # '\x9c' + 157: 223, # '\x9d' + 158: 224, # '\x9e' + 159: 225, # '\x9f' + 160: 81, # '\xa0' + 161: 226, # 'Ё' + 162: 227, # 'Ђ' + 163: 228, # 'Ѓ' + 164: 229, # 'Є' + 165: 230, # 'Ѕ' + 166: 105, # 'І' + 167: 231, # 'Ї' + 168: 232, # 'Ј' + 169: 233, # 'Љ' + 170: 234, # 'Њ' + 171: 235, # 'Ћ' + 172: 236, # 'Ќ' + 173: 45, # '\xad' + 174: 237, # 'Ў' + 175: 238, # 'Џ' + 176: 31, # 'А' + 177: 32, # 'Б' + 178: 35, # 'В' + 179: 43, # 'Г' + 180: 37, # 'Д' + 181: 44, # 'Е' + 182: 55, # 'Ж' + 183: 47, # 'З' + 184: 40, # 'И' + 185: 59, # 'Й' + 186: 33, # 'К' + 187: 46, # 'Л' + 188: 38, # 'М' + 189: 36, # 'Н' + 190: 41, # 'О' + 191: 30, # 'П' + 192: 39, # 'Р' + 193: 28, # 'С' + 194: 34, # 'Т' + 195: 51, # 'У' + 196: 48, # 'Ф' + 197: 49, # 'Х' + 198: 53, # 'Ц' + 199: 50, # 'Ч' + 200: 54, # 'Ш' + 201: 57, # 'Щ' + 202: 61, # 'Ъ' + 203: 239, # 'Ы' + 204: 67, # 'Ь' + 205: 240, # 'Э' + 206: 60, # 'Ю' + 207: 56, # 'Я' + 208: 1, # 'а' + 209: 18, # 'б' + 210: 9, # 'в' + 211: 20, # 'г' + 212: 11, # 'д' + 213: 3, # 'е' + 214: 23, # 'ж' + 215: 15, # 'з' + 216: 2, # 'и' + 217: 26, # 'й' + 218: 12, # 'к' + 219: 10, # 'л' + 220: 14, # 'м' + 221: 6, # 'н' + 222: 4, # 'о' + 223: 13, # 'п' + 224: 7, # 'р' + 225: 8, # 'с' + 226: 5, # 'т' + 227: 19, # 'у' + 228: 29, # 'ф' + 229: 25, # 'х' + 230: 22, # 'ц' + 231: 21, # 'ч' + 232: 27, # 'ш' + 233: 24, # 'щ' + 234: 17, # 'ъ' + 235: 75, # 'ы' + 236: 52, # 'ь' + 237: 241, # 'э' + 238: 42, # 'ю' + 239: 16, # 'я' + 240: 62, # '№' + 241: 242, # 'ё' + 242: 243, # 'ђ' + 243: 244, # 'ѓ' + 244: 58, # 'є' + 245: 245, # 'ѕ' + 246: 98, # 'і' + 247: 246, # 'ї' + 248: 247, # 'ј' + 249: 248, # 'љ' + 250: 249, # 'њ' + 251: 250, # 'ћ' + 252: 251, # 'ќ' + 253: 91, # '§' + 254: 252, # 'ў' + 255: 253, # 'џ' } -ISO_8859_5_BULGARIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-5', - language='Bulgarian', - char_to_order_map=ISO_8859_5_BULGARIAN_CHAR_TO_ORDER, - language_model=BULGARIAN_LANG_MODEL, - typical_positive_ratio=0.969392, - keep_ascii_letters=False, - alphabet='АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя') +ISO_8859_5_BULGARIAN_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-5", + language="Bulgarian", + char_to_order_map=ISO_8859_5_BULGARIAN_CHAR_TO_ORDER, + language_model=BULGARIAN_LANG_MODEL, + typical_positive_ratio=0.969392, + keep_ascii_letters=False, + alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя", +) WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 77, # 'A' - 66: 90, # 'B' - 67: 99, # 'C' - 68: 100, # 'D' - 69: 72, # 'E' - 70: 109, # 'F' - 71: 107, # 'G' - 72: 101, # 'H' - 73: 79, # 'I' - 74: 185, # 'J' - 75: 81, # 'K' - 76: 102, # 'L' - 77: 76, # 'M' - 78: 94, # 'N' - 79: 82, # 'O' - 80: 110, # 'P' - 81: 186, # 'Q' - 82: 108, # 'R' - 83: 91, # 'S' - 84: 74, # 'T' - 85: 119, # 'U' - 86: 84, # 'V' - 87: 96, # 'W' - 88: 111, # 'X' - 89: 187, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 65, # 'a' - 98: 69, # 'b' - 99: 70, # 'c' - 100: 66, # 'd' - 101: 63, # 'e' - 102: 68, # 'f' - 103: 112, # 'g' - 104: 103, # 'h' - 105: 92, # 'i' - 106: 194, # 'j' - 107: 104, # 'k' - 108: 95, # 'l' - 109: 86, # 'm' - 110: 87, # 'n' - 111: 71, # 'o' - 112: 116, # 'p' - 113: 195, # 'q' - 114: 85, # 'r' - 115: 93, # 's' - 116: 97, # 't' - 117: 113, # 'u' - 118: 196, # 'v' - 119: 197, # 'w' - 120: 198, # 'x' - 121: 199, # 'y' - 122: 200, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 206, # 'Ђ' - 129: 207, # 'Ѓ' - 130: 208, # '‚' - 131: 209, # 'ѓ' - 132: 210, # '„' - 133: 211, # '…' - 134: 212, # '†' - 135: 213, # '‡' - 136: 120, # '€' - 137: 214, # '‰' - 138: 215, # 'Љ' - 139: 216, # '‹' - 140: 217, # 'Њ' - 141: 218, # 'Ќ' - 142: 219, # 'Ћ' - 143: 220, # 'Џ' - 144: 221, # 'ђ' - 145: 78, # '‘' - 146: 64, # '’' - 147: 83, # '“' - 148: 121, # '”' - 149: 98, # '•' - 150: 117, # '–' - 151: 105, # '—' - 152: 222, # None - 153: 223, # '™' - 154: 224, # 'љ' - 155: 225, # '›' - 156: 226, # 'њ' - 157: 227, # 'ќ' - 158: 228, # 'ћ' - 159: 229, # 'џ' - 160: 88, # '\xa0' - 161: 230, # 'Ў' - 162: 231, # 'ў' - 163: 232, # 'Ј' - 164: 233, # '¤' - 165: 122, # 'Ґ' - 166: 89, # '¦' - 167: 106, # '§' - 168: 234, # 'Ё' - 169: 235, # '©' - 170: 236, # 'Є' - 171: 237, # '«' - 172: 238, # '¬' - 173: 45, # '\xad' - 174: 239, # '®' - 175: 240, # 'Ї' - 176: 73, # '°' - 177: 80, # '±' - 178: 118, # 'І' - 179: 114, # 'і' - 180: 241, # 'ґ' - 181: 242, # 'µ' - 182: 243, # '¶' - 183: 244, # '·' - 184: 245, # 'ё' - 185: 62, # '№' - 186: 58, # 'є' - 187: 246, # '»' - 188: 247, # 'ј' - 189: 248, # 'Ѕ' - 190: 249, # 'ѕ' - 191: 250, # 'ї' - 192: 31, # 'А' - 193: 32, # 'Б' - 194: 35, # 'В' - 195: 43, # 'Г' - 196: 37, # 'Д' - 197: 44, # 'Е' - 198: 55, # 'Ж' - 199: 47, # 'З' - 200: 40, # 'И' - 201: 59, # 'Й' - 202: 33, # 'К' - 203: 46, # 'Л' - 204: 38, # 'М' - 205: 36, # 'Н' - 206: 41, # 'О' - 207: 30, # 'П' - 208: 39, # 'Р' - 209: 28, # 'С' - 210: 34, # 'Т' - 211: 51, # 'У' - 212: 48, # 'Ф' - 213: 49, # 'Х' - 214: 53, # 'Ц' - 215: 50, # 'Ч' - 216: 54, # 'Ш' - 217: 57, # 'Щ' - 218: 61, # 'Ъ' - 219: 251, # 'Ы' - 220: 67, # 'Ь' - 221: 252, # 'Э' - 222: 60, # 'Ю' - 223: 56, # 'Я' - 224: 1, # 'а' - 225: 18, # 'б' - 226: 9, # 'в' - 227: 20, # 'г' - 228: 11, # 'д' - 229: 3, # 'е' - 230: 23, # 'ж' - 231: 15, # 'з' - 232: 2, # 'и' - 233: 26, # 'й' - 234: 12, # 'к' - 235: 10, # 'л' - 236: 14, # 'м' - 237: 6, # 'н' - 238: 4, # 'о' - 239: 13, # 'п' - 240: 7, # 'р' - 241: 8, # 'с' - 242: 5, # 'т' - 243: 19, # 'у' - 244: 29, # 'ф' - 245: 25, # 'х' - 246: 22, # 'ц' - 247: 21, # 'ч' - 248: 27, # 'ш' - 249: 24, # 'щ' - 250: 17, # 'ъ' - 251: 75, # 'ы' - 252: 52, # 'ь' - 253: 253, # 'э' - 254: 42, # 'ю' - 255: 16, # 'я' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 77, # 'A' + 66: 90, # 'B' + 67: 99, # 'C' + 68: 100, # 'D' + 69: 72, # 'E' + 70: 109, # 'F' + 71: 107, # 'G' + 72: 101, # 'H' + 73: 79, # 'I' + 74: 185, # 'J' + 75: 81, # 'K' + 76: 102, # 'L' + 77: 76, # 'M' + 78: 94, # 'N' + 79: 82, # 'O' + 80: 110, # 'P' + 81: 186, # 'Q' + 82: 108, # 'R' + 83: 91, # 'S' + 84: 74, # 'T' + 85: 119, # 'U' + 86: 84, # 'V' + 87: 96, # 'W' + 88: 111, # 'X' + 89: 187, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 65, # 'a' + 98: 69, # 'b' + 99: 70, # 'c' + 100: 66, # 'd' + 101: 63, # 'e' + 102: 68, # 'f' + 103: 112, # 'g' + 104: 103, # 'h' + 105: 92, # 'i' + 106: 194, # 'j' + 107: 104, # 'k' + 108: 95, # 'l' + 109: 86, # 'm' + 110: 87, # 'n' + 111: 71, # 'o' + 112: 116, # 'p' + 113: 195, # 'q' + 114: 85, # 'r' + 115: 93, # 's' + 116: 97, # 't' + 117: 113, # 'u' + 118: 196, # 'v' + 119: 197, # 'w' + 120: 198, # 'x' + 121: 199, # 'y' + 122: 200, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 206, # 'Ђ' + 129: 207, # 'Ѓ' + 130: 208, # '‚' + 131: 209, # 'ѓ' + 132: 210, # '„' + 133: 211, # '…' + 134: 212, # '†' + 135: 213, # '‡' + 136: 120, # '€' + 137: 214, # '‰' + 138: 215, # 'Љ' + 139: 216, # '‹' + 140: 217, # 'Њ' + 141: 218, # 'Ќ' + 142: 219, # 'Ћ' + 143: 220, # 'Џ' + 144: 221, # 'ђ' + 145: 78, # '‘' + 146: 64, # '’' + 147: 83, # '“' + 148: 121, # '”' + 149: 98, # '•' + 150: 117, # '–' + 151: 105, # '—' + 152: 222, # None + 153: 223, # '™' + 154: 224, # 'љ' + 155: 225, # '›' + 156: 226, # 'њ' + 157: 227, # 'ќ' + 158: 228, # 'ћ' + 159: 229, # 'џ' + 160: 88, # '\xa0' + 161: 230, # 'Ў' + 162: 231, # 'ў' + 163: 232, # 'Ј' + 164: 233, # '¤' + 165: 122, # 'Ґ' + 166: 89, # '¦' + 167: 106, # '§' + 168: 234, # 'Ё' + 169: 235, # '©' + 170: 236, # 'Є' + 171: 237, # '«' + 172: 238, # '¬' + 173: 45, # '\xad' + 174: 239, # '®' + 175: 240, # 'Ї' + 176: 73, # '°' + 177: 80, # '±' + 178: 118, # 'І' + 179: 114, # 'і' + 180: 241, # 'ґ' + 181: 242, # 'µ' + 182: 243, # '¶' + 183: 244, # '·' + 184: 245, # 'ё' + 185: 62, # '№' + 186: 58, # 'є' + 187: 246, # '»' + 188: 247, # 'ј' + 189: 248, # 'Ѕ' + 190: 249, # 'ѕ' + 191: 250, # 'ї' + 192: 31, # 'А' + 193: 32, # 'Б' + 194: 35, # 'В' + 195: 43, # 'Г' + 196: 37, # 'Д' + 197: 44, # 'Е' + 198: 55, # 'Ж' + 199: 47, # 'З' + 200: 40, # 'И' + 201: 59, # 'Й' + 202: 33, # 'К' + 203: 46, # 'Л' + 204: 38, # 'М' + 205: 36, # 'Н' + 206: 41, # 'О' + 207: 30, # 'П' + 208: 39, # 'Р' + 209: 28, # 'С' + 210: 34, # 'Т' + 211: 51, # 'У' + 212: 48, # 'Ф' + 213: 49, # 'Х' + 214: 53, # 'Ц' + 215: 50, # 'Ч' + 216: 54, # 'Ш' + 217: 57, # 'Щ' + 218: 61, # 'Ъ' + 219: 251, # 'Ы' + 220: 67, # 'Ь' + 221: 252, # 'Э' + 222: 60, # 'Ю' + 223: 56, # 'Я' + 224: 1, # 'а' + 225: 18, # 'б' + 226: 9, # 'в' + 227: 20, # 'г' + 228: 11, # 'д' + 229: 3, # 'е' + 230: 23, # 'ж' + 231: 15, # 'з' + 232: 2, # 'и' + 233: 26, # 'й' + 234: 12, # 'к' + 235: 10, # 'л' + 236: 14, # 'м' + 237: 6, # 'н' + 238: 4, # 'о' + 239: 13, # 'п' + 240: 7, # 'р' + 241: 8, # 'с' + 242: 5, # 'т' + 243: 19, # 'у' + 244: 29, # 'ф' + 245: 25, # 'х' + 246: 22, # 'ц' + 247: 21, # 'ч' + 248: 27, # 'ш' + 249: 24, # 'щ' + 250: 17, # 'ъ' + 251: 75, # 'ы' + 252: 52, # 'ь' + 253: 253, # 'э' + 254: 42, # 'ю' + 255: 16, # 'я' } -WINDOWS_1251_BULGARIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1251', - language='Bulgarian', - char_to_order_map=WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER, - language_model=BULGARIAN_LANG_MODEL, - typical_positive_ratio=0.969392, - keep_ascii_letters=False, - alphabet='АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя') - +WINDOWS_1251_BULGARIAN_MODEL = SingleByteCharSetModel( + charset_name="windows-1251", + language="Bulgarian", + char_to_order_map=WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER, + language_model=BULGARIAN_LANG_MODEL, + typical_positive_ratio=0.969392, + keep_ascii_letters=False, + alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя", +) diff --git a/libs/common/chardet/langgreekmodel.py b/libs/common/chardet/langgreekmodel.py index 02b94de6..0471d8bb 100644 --- a/libs/common/chardet/langgreekmodel.py +++ b/libs/common/chardet/langgreekmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -3863,536 +3859,539 @@ GREEK_LANG_MODEL = { # Character Mapping Table(s): WINDOWS_1253_GREEK_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 82, # 'A' - 66: 100, # 'B' - 67: 104, # 'C' - 68: 94, # 'D' - 69: 98, # 'E' - 70: 101, # 'F' - 71: 116, # 'G' - 72: 102, # 'H' - 73: 111, # 'I' - 74: 187, # 'J' - 75: 117, # 'K' - 76: 92, # 'L' - 77: 88, # 'M' - 78: 113, # 'N' - 79: 85, # 'O' - 80: 79, # 'P' - 81: 118, # 'Q' - 82: 105, # 'R' - 83: 83, # 'S' - 84: 67, # 'T' - 85: 114, # 'U' - 86: 119, # 'V' - 87: 95, # 'W' - 88: 99, # 'X' - 89: 109, # 'Y' - 90: 188, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 72, # 'a' - 98: 70, # 'b' - 99: 80, # 'c' - 100: 81, # 'd' - 101: 60, # 'e' - 102: 96, # 'f' - 103: 93, # 'g' - 104: 89, # 'h' - 105: 68, # 'i' - 106: 120, # 'j' - 107: 97, # 'k' - 108: 77, # 'l' - 109: 86, # 'm' - 110: 69, # 'n' - 111: 55, # 'o' - 112: 78, # 'p' - 113: 115, # 'q' - 114: 65, # 'r' - 115: 66, # 's' - 116: 58, # 't' - 117: 76, # 'u' - 118: 106, # 'v' - 119: 103, # 'w' - 120: 87, # 'x' - 121: 107, # 'y' - 122: 112, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 255, # '€' - 129: 255, # None - 130: 255, # '‚' - 131: 255, # 'ƒ' - 132: 255, # '„' - 133: 255, # '…' - 134: 255, # '†' - 135: 255, # '‡' - 136: 255, # None - 137: 255, # '‰' - 138: 255, # None - 139: 255, # '‹' - 140: 255, # None - 141: 255, # None - 142: 255, # None - 143: 255, # None - 144: 255, # None - 145: 255, # '‘' - 146: 255, # '’' - 147: 255, # '“' - 148: 255, # '”' - 149: 255, # '•' - 150: 255, # '–' - 151: 255, # '—' - 152: 255, # None - 153: 255, # '™' - 154: 255, # None - 155: 255, # '›' - 156: 255, # None - 157: 255, # None - 158: 255, # None - 159: 255, # None - 160: 253, # '\xa0' - 161: 233, # '΅' - 162: 61, # 'Ά' - 163: 253, # '£' - 164: 253, # '¤' - 165: 253, # '¥' - 166: 253, # '¦' - 167: 253, # '§' - 168: 253, # '¨' - 169: 253, # '©' - 170: 253, # None - 171: 253, # '«' - 172: 253, # '¬' - 173: 74, # '\xad' - 174: 253, # '®' - 175: 253, # '―' - 176: 253, # '°' - 177: 253, # '±' - 178: 253, # '²' - 179: 253, # '³' - 180: 247, # '΄' - 181: 253, # 'µ' - 182: 253, # '¶' - 183: 36, # '·' - 184: 46, # 'Έ' - 185: 71, # 'Ή' - 186: 73, # 'Ί' - 187: 253, # '»' - 188: 54, # 'Ό' - 189: 253, # '½' - 190: 108, # 'Ύ' - 191: 123, # 'Ώ' - 192: 110, # 'ΐ' - 193: 31, # 'Α' - 194: 51, # 'Β' - 195: 43, # 'Γ' - 196: 41, # 'Δ' - 197: 34, # 'Ε' - 198: 91, # 'Ζ' - 199: 40, # 'Η' - 200: 52, # 'Θ' - 201: 47, # 'Ι' - 202: 44, # 'Κ' - 203: 53, # 'Λ' - 204: 38, # 'Μ' - 205: 49, # 'Ν' - 206: 59, # 'Ξ' - 207: 39, # 'Ο' - 208: 35, # 'Π' - 209: 48, # 'Ρ' - 210: 250, # None - 211: 37, # 'Σ' - 212: 33, # 'Τ' - 213: 45, # 'Υ' - 214: 56, # 'Φ' - 215: 50, # 'Χ' - 216: 84, # 'Ψ' - 217: 57, # 'Ω' - 218: 120, # 'Ϊ' - 219: 121, # 'Ϋ' - 220: 17, # 'ά' - 221: 18, # 'έ' - 222: 22, # 'ή' - 223: 15, # 'ί' - 224: 124, # 'ΰ' - 225: 1, # 'α' - 226: 29, # 'β' - 227: 20, # 'γ' - 228: 21, # 'δ' - 229: 3, # 'ε' - 230: 32, # 'ζ' - 231: 13, # 'η' - 232: 25, # 'θ' - 233: 5, # 'ι' - 234: 11, # 'κ' - 235: 16, # 'λ' - 236: 10, # 'μ' - 237: 6, # 'ν' - 238: 30, # 'ξ' - 239: 4, # 'ο' - 240: 9, # 'π' - 241: 8, # 'ρ' - 242: 14, # 'ς' - 243: 7, # 'σ' - 244: 2, # 'τ' - 245: 12, # 'υ' - 246: 28, # 'φ' - 247: 23, # 'χ' - 248: 42, # 'ψ' - 249: 24, # 'ω' - 250: 64, # 'ϊ' - 251: 75, # 'ϋ' - 252: 19, # 'ό' - 253: 26, # 'ύ' - 254: 27, # 'ώ' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 82, # 'A' + 66: 100, # 'B' + 67: 104, # 'C' + 68: 94, # 'D' + 69: 98, # 'E' + 70: 101, # 'F' + 71: 116, # 'G' + 72: 102, # 'H' + 73: 111, # 'I' + 74: 187, # 'J' + 75: 117, # 'K' + 76: 92, # 'L' + 77: 88, # 'M' + 78: 113, # 'N' + 79: 85, # 'O' + 80: 79, # 'P' + 81: 118, # 'Q' + 82: 105, # 'R' + 83: 83, # 'S' + 84: 67, # 'T' + 85: 114, # 'U' + 86: 119, # 'V' + 87: 95, # 'W' + 88: 99, # 'X' + 89: 109, # 'Y' + 90: 188, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 72, # 'a' + 98: 70, # 'b' + 99: 80, # 'c' + 100: 81, # 'd' + 101: 60, # 'e' + 102: 96, # 'f' + 103: 93, # 'g' + 104: 89, # 'h' + 105: 68, # 'i' + 106: 120, # 'j' + 107: 97, # 'k' + 108: 77, # 'l' + 109: 86, # 'm' + 110: 69, # 'n' + 111: 55, # 'o' + 112: 78, # 'p' + 113: 115, # 'q' + 114: 65, # 'r' + 115: 66, # 's' + 116: 58, # 't' + 117: 76, # 'u' + 118: 106, # 'v' + 119: 103, # 'w' + 120: 87, # 'x' + 121: 107, # 'y' + 122: 112, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 255, # '€' + 129: 255, # None + 130: 255, # '‚' + 131: 255, # 'ƒ' + 132: 255, # '„' + 133: 255, # '…' + 134: 255, # '†' + 135: 255, # '‡' + 136: 255, # None + 137: 255, # '‰' + 138: 255, # None + 139: 255, # '‹' + 140: 255, # None + 141: 255, # None + 142: 255, # None + 143: 255, # None + 144: 255, # None + 145: 255, # '‘' + 146: 255, # '’' + 147: 255, # '“' + 148: 255, # '”' + 149: 255, # '•' + 150: 255, # '–' + 151: 255, # '—' + 152: 255, # None + 153: 255, # '™' + 154: 255, # None + 155: 255, # '›' + 156: 255, # None + 157: 255, # None + 158: 255, # None + 159: 255, # None + 160: 253, # '\xa0' + 161: 233, # '΅' + 162: 61, # 'Ά' + 163: 253, # '£' + 164: 253, # '¤' + 165: 253, # '¥' + 166: 253, # '¦' + 167: 253, # '§' + 168: 253, # '¨' + 169: 253, # '©' + 170: 253, # None + 171: 253, # '«' + 172: 253, # '¬' + 173: 74, # '\xad' + 174: 253, # '®' + 175: 253, # '―' + 176: 253, # '°' + 177: 253, # '±' + 178: 253, # '²' + 179: 253, # '³' + 180: 247, # '΄' + 181: 253, # 'µ' + 182: 253, # '¶' + 183: 36, # '·' + 184: 46, # 'Έ' + 185: 71, # 'Ή' + 186: 73, # 'Ί' + 187: 253, # '»' + 188: 54, # 'Ό' + 189: 253, # '½' + 190: 108, # 'Ύ' + 191: 123, # 'Ώ' + 192: 110, # 'ΐ' + 193: 31, # 'Α' + 194: 51, # 'Β' + 195: 43, # 'Γ' + 196: 41, # 'Δ' + 197: 34, # 'Ε' + 198: 91, # 'Ζ' + 199: 40, # 'Η' + 200: 52, # 'Θ' + 201: 47, # 'Ι' + 202: 44, # 'Κ' + 203: 53, # 'Λ' + 204: 38, # 'Μ' + 205: 49, # 'Ν' + 206: 59, # 'Ξ' + 207: 39, # 'Ο' + 208: 35, # 'Π' + 209: 48, # 'Ρ' + 210: 250, # None + 211: 37, # 'Σ' + 212: 33, # 'Τ' + 213: 45, # 'Υ' + 214: 56, # 'Φ' + 215: 50, # 'Χ' + 216: 84, # 'Ψ' + 217: 57, # 'Ω' + 218: 120, # 'Ϊ' + 219: 121, # 'Ϋ' + 220: 17, # 'ά' + 221: 18, # 'έ' + 222: 22, # 'ή' + 223: 15, # 'ί' + 224: 124, # 'ΰ' + 225: 1, # 'α' + 226: 29, # 'β' + 227: 20, # 'γ' + 228: 21, # 'δ' + 229: 3, # 'ε' + 230: 32, # 'ζ' + 231: 13, # 'η' + 232: 25, # 'θ' + 233: 5, # 'ι' + 234: 11, # 'κ' + 235: 16, # 'λ' + 236: 10, # 'μ' + 237: 6, # 'ν' + 238: 30, # 'ξ' + 239: 4, # 'ο' + 240: 9, # 'π' + 241: 8, # 'ρ' + 242: 14, # 'ς' + 243: 7, # 'σ' + 244: 2, # 'τ' + 245: 12, # 'υ' + 246: 28, # 'φ' + 247: 23, # 'χ' + 248: 42, # 'ψ' + 249: 24, # 'ω' + 250: 64, # 'ϊ' + 251: 75, # 'ϋ' + 252: 19, # 'ό' + 253: 26, # 'ύ' + 254: 27, # 'ώ' + 255: 253, # None } -WINDOWS_1253_GREEK_MODEL = SingleByteCharSetModel(charset_name='windows-1253', - language='Greek', - char_to_order_map=WINDOWS_1253_GREEK_CHAR_TO_ORDER, - language_model=GREEK_LANG_MODEL, - typical_positive_ratio=0.982851, - keep_ascii_letters=False, - alphabet='ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ') +WINDOWS_1253_GREEK_MODEL = SingleByteCharSetModel( + charset_name="windows-1253", + language="Greek", + char_to_order_map=WINDOWS_1253_GREEK_CHAR_TO_ORDER, + language_model=GREEK_LANG_MODEL, + typical_positive_ratio=0.982851, + keep_ascii_letters=False, + alphabet="ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ", +) ISO_8859_7_GREEK_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 82, # 'A' - 66: 100, # 'B' - 67: 104, # 'C' - 68: 94, # 'D' - 69: 98, # 'E' - 70: 101, # 'F' - 71: 116, # 'G' - 72: 102, # 'H' - 73: 111, # 'I' - 74: 187, # 'J' - 75: 117, # 'K' - 76: 92, # 'L' - 77: 88, # 'M' - 78: 113, # 'N' - 79: 85, # 'O' - 80: 79, # 'P' - 81: 118, # 'Q' - 82: 105, # 'R' - 83: 83, # 'S' - 84: 67, # 'T' - 85: 114, # 'U' - 86: 119, # 'V' - 87: 95, # 'W' - 88: 99, # 'X' - 89: 109, # 'Y' - 90: 188, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 72, # 'a' - 98: 70, # 'b' - 99: 80, # 'c' - 100: 81, # 'd' - 101: 60, # 'e' - 102: 96, # 'f' - 103: 93, # 'g' - 104: 89, # 'h' - 105: 68, # 'i' - 106: 120, # 'j' - 107: 97, # 'k' - 108: 77, # 'l' - 109: 86, # 'm' - 110: 69, # 'n' - 111: 55, # 'o' - 112: 78, # 'p' - 113: 115, # 'q' - 114: 65, # 'r' - 115: 66, # 's' - 116: 58, # 't' - 117: 76, # 'u' - 118: 106, # 'v' - 119: 103, # 'w' - 120: 87, # 'x' - 121: 107, # 'y' - 122: 112, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 255, # '\x80' - 129: 255, # '\x81' - 130: 255, # '\x82' - 131: 255, # '\x83' - 132: 255, # '\x84' - 133: 255, # '\x85' - 134: 255, # '\x86' - 135: 255, # '\x87' - 136: 255, # '\x88' - 137: 255, # '\x89' - 138: 255, # '\x8a' - 139: 255, # '\x8b' - 140: 255, # '\x8c' - 141: 255, # '\x8d' - 142: 255, # '\x8e' - 143: 255, # '\x8f' - 144: 255, # '\x90' - 145: 255, # '\x91' - 146: 255, # '\x92' - 147: 255, # '\x93' - 148: 255, # '\x94' - 149: 255, # '\x95' - 150: 255, # '\x96' - 151: 255, # '\x97' - 152: 255, # '\x98' - 153: 255, # '\x99' - 154: 255, # '\x9a' - 155: 255, # '\x9b' - 156: 255, # '\x9c' - 157: 255, # '\x9d' - 158: 255, # '\x9e' - 159: 255, # '\x9f' - 160: 253, # '\xa0' - 161: 233, # '‘' - 162: 90, # '’' - 163: 253, # '£' - 164: 253, # '€' - 165: 253, # '₯' - 166: 253, # '¦' - 167: 253, # '§' - 168: 253, # '¨' - 169: 253, # '©' - 170: 253, # 'ͺ' - 171: 253, # '«' - 172: 253, # '¬' - 173: 74, # '\xad' - 174: 253, # None - 175: 253, # '―' - 176: 253, # '°' - 177: 253, # '±' - 178: 253, # '²' - 179: 253, # '³' - 180: 247, # '΄' - 181: 248, # '΅' - 182: 61, # 'Ά' - 183: 36, # '·' - 184: 46, # 'Έ' - 185: 71, # 'Ή' - 186: 73, # 'Ί' - 187: 253, # '»' - 188: 54, # 'Ό' - 189: 253, # '½' - 190: 108, # 'Ύ' - 191: 123, # 'Ώ' - 192: 110, # 'ΐ' - 193: 31, # 'Α' - 194: 51, # 'Β' - 195: 43, # 'Γ' - 196: 41, # 'Δ' - 197: 34, # 'Ε' - 198: 91, # 'Ζ' - 199: 40, # 'Η' - 200: 52, # 'Θ' - 201: 47, # 'Ι' - 202: 44, # 'Κ' - 203: 53, # 'Λ' - 204: 38, # 'Μ' - 205: 49, # 'Ν' - 206: 59, # 'Ξ' - 207: 39, # 'Ο' - 208: 35, # 'Π' - 209: 48, # 'Ρ' - 210: 250, # None - 211: 37, # 'Σ' - 212: 33, # 'Τ' - 213: 45, # 'Υ' - 214: 56, # 'Φ' - 215: 50, # 'Χ' - 216: 84, # 'Ψ' - 217: 57, # 'Ω' - 218: 120, # 'Ϊ' - 219: 121, # 'Ϋ' - 220: 17, # 'ά' - 221: 18, # 'έ' - 222: 22, # 'ή' - 223: 15, # 'ί' - 224: 124, # 'ΰ' - 225: 1, # 'α' - 226: 29, # 'β' - 227: 20, # 'γ' - 228: 21, # 'δ' - 229: 3, # 'ε' - 230: 32, # 'ζ' - 231: 13, # 'η' - 232: 25, # 'θ' - 233: 5, # 'ι' - 234: 11, # 'κ' - 235: 16, # 'λ' - 236: 10, # 'μ' - 237: 6, # 'ν' - 238: 30, # 'ξ' - 239: 4, # 'ο' - 240: 9, # 'π' - 241: 8, # 'ρ' - 242: 14, # 'ς' - 243: 7, # 'σ' - 244: 2, # 'τ' - 245: 12, # 'υ' - 246: 28, # 'φ' - 247: 23, # 'χ' - 248: 42, # 'ψ' - 249: 24, # 'ω' - 250: 64, # 'ϊ' - 251: 75, # 'ϋ' - 252: 19, # 'ό' - 253: 26, # 'ύ' - 254: 27, # 'ώ' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 82, # 'A' + 66: 100, # 'B' + 67: 104, # 'C' + 68: 94, # 'D' + 69: 98, # 'E' + 70: 101, # 'F' + 71: 116, # 'G' + 72: 102, # 'H' + 73: 111, # 'I' + 74: 187, # 'J' + 75: 117, # 'K' + 76: 92, # 'L' + 77: 88, # 'M' + 78: 113, # 'N' + 79: 85, # 'O' + 80: 79, # 'P' + 81: 118, # 'Q' + 82: 105, # 'R' + 83: 83, # 'S' + 84: 67, # 'T' + 85: 114, # 'U' + 86: 119, # 'V' + 87: 95, # 'W' + 88: 99, # 'X' + 89: 109, # 'Y' + 90: 188, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 72, # 'a' + 98: 70, # 'b' + 99: 80, # 'c' + 100: 81, # 'd' + 101: 60, # 'e' + 102: 96, # 'f' + 103: 93, # 'g' + 104: 89, # 'h' + 105: 68, # 'i' + 106: 120, # 'j' + 107: 97, # 'k' + 108: 77, # 'l' + 109: 86, # 'm' + 110: 69, # 'n' + 111: 55, # 'o' + 112: 78, # 'p' + 113: 115, # 'q' + 114: 65, # 'r' + 115: 66, # 's' + 116: 58, # 't' + 117: 76, # 'u' + 118: 106, # 'v' + 119: 103, # 'w' + 120: 87, # 'x' + 121: 107, # 'y' + 122: 112, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 255, # '\x80' + 129: 255, # '\x81' + 130: 255, # '\x82' + 131: 255, # '\x83' + 132: 255, # '\x84' + 133: 255, # '\x85' + 134: 255, # '\x86' + 135: 255, # '\x87' + 136: 255, # '\x88' + 137: 255, # '\x89' + 138: 255, # '\x8a' + 139: 255, # '\x8b' + 140: 255, # '\x8c' + 141: 255, # '\x8d' + 142: 255, # '\x8e' + 143: 255, # '\x8f' + 144: 255, # '\x90' + 145: 255, # '\x91' + 146: 255, # '\x92' + 147: 255, # '\x93' + 148: 255, # '\x94' + 149: 255, # '\x95' + 150: 255, # '\x96' + 151: 255, # '\x97' + 152: 255, # '\x98' + 153: 255, # '\x99' + 154: 255, # '\x9a' + 155: 255, # '\x9b' + 156: 255, # '\x9c' + 157: 255, # '\x9d' + 158: 255, # '\x9e' + 159: 255, # '\x9f' + 160: 253, # '\xa0' + 161: 233, # '‘' + 162: 90, # '’' + 163: 253, # '£' + 164: 253, # '€' + 165: 253, # '₯' + 166: 253, # '¦' + 167: 253, # '§' + 168: 253, # '¨' + 169: 253, # '©' + 170: 253, # 'ͺ' + 171: 253, # '«' + 172: 253, # '¬' + 173: 74, # '\xad' + 174: 253, # None + 175: 253, # '―' + 176: 253, # '°' + 177: 253, # '±' + 178: 253, # '²' + 179: 253, # '³' + 180: 247, # '΄' + 181: 248, # '΅' + 182: 61, # 'Ά' + 183: 36, # '·' + 184: 46, # 'Έ' + 185: 71, # 'Ή' + 186: 73, # 'Ί' + 187: 253, # '»' + 188: 54, # 'Ό' + 189: 253, # '½' + 190: 108, # 'Ύ' + 191: 123, # 'Ώ' + 192: 110, # 'ΐ' + 193: 31, # 'Α' + 194: 51, # 'Β' + 195: 43, # 'Γ' + 196: 41, # 'Δ' + 197: 34, # 'Ε' + 198: 91, # 'Ζ' + 199: 40, # 'Η' + 200: 52, # 'Θ' + 201: 47, # 'Ι' + 202: 44, # 'Κ' + 203: 53, # 'Λ' + 204: 38, # 'Μ' + 205: 49, # 'Ν' + 206: 59, # 'Ξ' + 207: 39, # 'Ο' + 208: 35, # 'Π' + 209: 48, # 'Ρ' + 210: 250, # None + 211: 37, # 'Σ' + 212: 33, # 'Τ' + 213: 45, # 'Υ' + 214: 56, # 'Φ' + 215: 50, # 'Χ' + 216: 84, # 'Ψ' + 217: 57, # 'Ω' + 218: 120, # 'Ϊ' + 219: 121, # 'Ϋ' + 220: 17, # 'ά' + 221: 18, # 'έ' + 222: 22, # 'ή' + 223: 15, # 'ί' + 224: 124, # 'ΰ' + 225: 1, # 'α' + 226: 29, # 'β' + 227: 20, # 'γ' + 228: 21, # 'δ' + 229: 3, # 'ε' + 230: 32, # 'ζ' + 231: 13, # 'η' + 232: 25, # 'θ' + 233: 5, # 'ι' + 234: 11, # 'κ' + 235: 16, # 'λ' + 236: 10, # 'μ' + 237: 6, # 'ν' + 238: 30, # 'ξ' + 239: 4, # 'ο' + 240: 9, # 'π' + 241: 8, # 'ρ' + 242: 14, # 'ς' + 243: 7, # 'σ' + 244: 2, # 'τ' + 245: 12, # 'υ' + 246: 28, # 'φ' + 247: 23, # 'χ' + 248: 42, # 'ψ' + 249: 24, # 'ω' + 250: 64, # 'ϊ' + 251: 75, # 'ϋ' + 252: 19, # 'ό' + 253: 26, # 'ύ' + 254: 27, # 'ώ' + 255: 253, # None } -ISO_8859_7_GREEK_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-7', - language='Greek', - char_to_order_map=ISO_8859_7_GREEK_CHAR_TO_ORDER, - language_model=GREEK_LANG_MODEL, - typical_positive_ratio=0.982851, - keep_ascii_letters=False, - alphabet='ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ') - +ISO_8859_7_GREEK_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-7", + language="Greek", + char_to_order_map=ISO_8859_7_GREEK_CHAR_TO_ORDER, + language_model=GREEK_LANG_MODEL, + typical_positive_ratio=0.982851, + keep_ascii_letters=False, + alphabet="ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ", +) diff --git a/libs/common/chardet/langhebrewmodel.py b/libs/common/chardet/langhebrewmodel.py index 40fd674c..86b3c5e6 100644 --- a/libs/common/chardet/langhebrewmodel.py +++ b/libs/common/chardet/langhebrewmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ HEBREW_LANG_MODEL = { # Character Mapping Table(s): WINDOWS_1255_HEBREW_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 69, # 'A' - 66: 91, # 'B' - 67: 79, # 'C' - 68: 80, # 'D' - 69: 92, # 'E' - 70: 89, # 'F' - 71: 97, # 'G' - 72: 90, # 'H' - 73: 68, # 'I' - 74: 111, # 'J' - 75: 112, # 'K' - 76: 82, # 'L' - 77: 73, # 'M' - 78: 95, # 'N' - 79: 85, # 'O' - 80: 78, # 'P' - 81: 121, # 'Q' - 82: 86, # 'R' - 83: 71, # 'S' - 84: 67, # 'T' - 85: 102, # 'U' - 86: 107, # 'V' - 87: 84, # 'W' - 88: 114, # 'X' - 89: 103, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 50, # 'a' - 98: 74, # 'b' - 99: 60, # 'c' - 100: 61, # 'd' - 101: 42, # 'e' - 102: 76, # 'f' - 103: 70, # 'g' - 104: 64, # 'h' - 105: 53, # 'i' - 106: 105, # 'j' - 107: 93, # 'k' - 108: 56, # 'l' - 109: 65, # 'm' - 110: 54, # 'n' - 111: 49, # 'o' - 112: 66, # 'p' - 113: 110, # 'q' - 114: 51, # 'r' - 115: 43, # 's' - 116: 44, # 't' - 117: 63, # 'u' - 118: 81, # 'v' - 119: 77, # 'w' - 120: 98, # 'x' - 121: 75, # 'y' - 122: 108, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 124, # '€' - 129: 202, # None - 130: 203, # '‚' - 131: 204, # 'ƒ' - 132: 205, # '„' - 133: 40, # '…' - 134: 58, # '†' - 135: 206, # '‡' - 136: 207, # 'ˆ' - 137: 208, # '‰' - 138: 209, # None - 139: 210, # '‹' - 140: 211, # None - 141: 212, # None - 142: 213, # None - 143: 214, # None - 144: 215, # None - 145: 83, # '‘' - 146: 52, # '’' - 147: 47, # '“' - 148: 46, # '”' - 149: 72, # '•' - 150: 32, # '–' - 151: 94, # '—' - 152: 216, # '˜' - 153: 113, # '™' - 154: 217, # None - 155: 109, # '›' - 156: 218, # None - 157: 219, # None - 158: 220, # None - 159: 221, # None - 160: 34, # '\xa0' - 161: 116, # '¡' - 162: 222, # '¢' - 163: 118, # '£' - 164: 100, # '₪' - 165: 223, # '¥' - 166: 224, # '¦' - 167: 117, # '§' - 168: 119, # '¨' - 169: 104, # '©' - 170: 125, # '×' - 171: 225, # '«' - 172: 226, # '¬' - 173: 87, # '\xad' - 174: 99, # '®' - 175: 227, # '¯' - 176: 106, # '°' - 177: 122, # '±' - 178: 123, # '²' - 179: 228, # '³' - 180: 55, # '´' - 181: 229, # 'µ' - 182: 230, # '¶' - 183: 101, # '·' - 184: 231, # '¸' - 185: 232, # '¹' - 186: 120, # '÷' - 187: 233, # '»' - 188: 48, # '¼' - 189: 39, # '½' - 190: 57, # '¾' - 191: 234, # '¿' - 192: 30, # 'ְ' - 193: 59, # 'ֱ' - 194: 41, # 'ֲ' - 195: 88, # 'ֳ' - 196: 33, # 'ִ' - 197: 37, # 'ֵ' - 198: 36, # 'ֶ' - 199: 31, # 'ַ' - 200: 29, # 'ָ' - 201: 35, # 'ֹ' - 202: 235, # None - 203: 62, # 'ֻ' - 204: 28, # 'ּ' - 205: 236, # 'ֽ' - 206: 126, # '־' - 207: 237, # 'ֿ' - 208: 238, # '׀' - 209: 38, # 'ׁ' - 210: 45, # 'ׂ' - 211: 239, # '׃' - 212: 240, # 'װ' - 213: 241, # 'ױ' - 214: 242, # 'ײ' - 215: 243, # '׳' - 216: 127, # '״' - 217: 244, # None - 218: 245, # None - 219: 246, # None - 220: 247, # None - 221: 248, # None - 222: 249, # None - 223: 250, # None - 224: 9, # 'א' - 225: 8, # 'ב' - 226: 20, # 'ג' - 227: 16, # 'ד' - 228: 3, # 'ה' - 229: 2, # 'ו' - 230: 24, # 'ז' - 231: 14, # 'ח' - 232: 22, # 'ט' - 233: 1, # 'י' - 234: 25, # 'ך' - 235: 15, # 'כ' - 236: 4, # 'ל' - 237: 11, # 'ם' - 238: 6, # 'מ' - 239: 23, # 'ן' - 240: 12, # 'נ' - 241: 19, # 'ס' - 242: 13, # 'ע' - 243: 26, # 'ף' - 244: 18, # 'פ' - 245: 27, # 'ץ' - 246: 21, # 'צ' - 247: 17, # 'ק' - 248: 7, # 'ר' - 249: 10, # 'ש' - 250: 5, # 'ת' - 251: 251, # None - 252: 252, # None - 253: 128, # '\u200e' - 254: 96, # '\u200f' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 69, # 'A' + 66: 91, # 'B' + 67: 79, # 'C' + 68: 80, # 'D' + 69: 92, # 'E' + 70: 89, # 'F' + 71: 97, # 'G' + 72: 90, # 'H' + 73: 68, # 'I' + 74: 111, # 'J' + 75: 112, # 'K' + 76: 82, # 'L' + 77: 73, # 'M' + 78: 95, # 'N' + 79: 85, # 'O' + 80: 78, # 'P' + 81: 121, # 'Q' + 82: 86, # 'R' + 83: 71, # 'S' + 84: 67, # 'T' + 85: 102, # 'U' + 86: 107, # 'V' + 87: 84, # 'W' + 88: 114, # 'X' + 89: 103, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 50, # 'a' + 98: 74, # 'b' + 99: 60, # 'c' + 100: 61, # 'd' + 101: 42, # 'e' + 102: 76, # 'f' + 103: 70, # 'g' + 104: 64, # 'h' + 105: 53, # 'i' + 106: 105, # 'j' + 107: 93, # 'k' + 108: 56, # 'l' + 109: 65, # 'm' + 110: 54, # 'n' + 111: 49, # 'o' + 112: 66, # 'p' + 113: 110, # 'q' + 114: 51, # 'r' + 115: 43, # 's' + 116: 44, # 't' + 117: 63, # 'u' + 118: 81, # 'v' + 119: 77, # 'w' + 120: 98, # 'x' + 121: 75, # 'y' + 122: 108, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 124, # '€' + 129: 202, # None + 130: 203, # '‚' + 131: 204, # 'ƒ' + 132: 205, # '„' + 133: 40, # '…' + 134: 58, # '†' + 135: 206, # '‡' + 136: 207, # 'ˆ' + 137: 208, # '‰' + 138: 209, # None + 139: 210, # '‹' + 140: 211, # None + 141: 212, # None + 142: 213, # None + 143: 214, # None + 144: 215, # None + 145: 83, # '‘' + 146: 52, # '’' + 147: 47, # '“' + 148: 46, # '”' + 149: 72, # '•' + 150: 32, # '–' + 151: 94, # '—' + 152: 216, # '˜' + 153: 113, # '™' + 154: 217, # None + 155: 109, # '›' + 156: 218, # None + 157: 219, # None + 158: 220, # None + 159: 221, # None + 160: 34, # '\xa0' + 161: 116, # '¡' + 162: 222, # '¢' + 163: 118, # '£' + 164: 100, # '₪' + 165: 223, # '¥' + 166: 224, # '¦' + 167: 117, # '§' + 168: 119, # '¨' + 169: 104, # '©' + 170: 125, # '×' + 171: 225, # '«' + 172: 226, # '¬' + 173: 87, # '\xad' + 174: 99, # '®' + 175: 227, # '¯' + 176: 106, # '°' + 177: 122, # '±' + 178: 123, # '²' + 179: 228, # '³' + 180: 55, # '´' + 181: 229, # 'µ' + 182: 230, # '¶' + 183: 101, # '·' + 184: 231, # '¸' + 185: 232, # '¹' + 186: 120, # '÷' + 187: 233, # '»' + 188: 48, # '¼' + 189: 39, # '½' + 190: 57, # '¾' + 191: 234, # '¿' + 192: 30, # 'ְ' + 193: 59, # 'ֱ' + 194: 41, # 'ֲ' + 195: 88, # 'ֳ' + 196: 33, # 'ִ' + 197: 37, # 'ֵ' + 198: 36, # 'ֶ' + 199: 31, # 'ַ' + 200: 29, # 'ָ' + 201: 35, # 'ֹ' + 202: 235, # None + 203: 62, # 'ֻ' + 204: 28, # 'ּ' + 205: 236, # 'ֽ' + 206: 126, # '־' + 207: 237, # 'ֿ' + 208: 238, # '׀' + 209: 38, # 'ׁ' + 210: 45, # 'ׂ' + 211: 239, # '׃' + 212: 240, # 'װ' + 213: 241, # 'ױ' + 214: 242, # 'ײ' + 215: 243, # '׳' + 216: 127, # '״' + 217: 244, # None + 218: 245, # None + 219: 246, # None + 220: 247, # None + 221: 248, # None + 222: 249, # None + 223: 250, # None + 224: 9, # 'א' + 225: 8, # 'ב' + 226: 20, # 'ג' + 227: 16, # 'ד' + 228: 3, # 'ה' + 229: 2, # 'ו' + 230: 24, # 'ז' + 231: 14, # 'ח' + 232: 22, # 'ט' + 233: 1, # 'י' + 234: 25, # 'ך' + 235: 15, # 'כ' + 236: 4, # 'ל' + 237: 11, # 'ם' + 238: 6, # 'מ' + 239: 23, # 'ן' + 240: 12, # 'נ' + 241: 19, # 'ס' + 242: 13, # 'ע' + 243: 26, # 'ף' + 244: 18, # 'פ' + 245: 27, # 'ץ' + 246: 21, # 'צ' + 247: 17, # 'ק' + 248: 7, # 'ר' + 249: 10, # 'ש' + 250: 5, # 'ת' + 251: 251, # None + 252: 252, # None + 253: 128, # '\u200e' + 254: 96, # '\u200f' + 255: 253, # None } -WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel(charset_name='windows-1255', - language='Hebrew', - char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER, - language_model=HEBREW_LANG_MODEL, - typical_positive_ratio=0.984004, - keep_ascii_letters=False, - alphabet='אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ') - +WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel( + charset_name="windows-1255", + language="Hebrew", + char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER, + language_model=HEBREW_LANG_MODEL, + typical_positive_ratio=0.984004, + keep_ascii_letters=False, + alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ", +) diff --git a/libs/common/chardet/langhungarianmodel.py b/libs/common/chardet/langhungarianmodel.py index 24a097f5..bd6630a0 100644 --- a/libs/common/chardet/langhungarianmodel.py +++ b/libs/common/chardet/langhungarianmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,536 +4111,539 @@ HUNGARIAN_LANG_MODEL = { # Character Mapping Table(s): WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 28, # 'A' - 66: 40, # 'B' - 67: 54, # 'C' - 68: 45, # 'D' - 69: 32, # 'E' - 70: 50, # 'F' - 71: 49, # 'G' - 72: 38, # 'H' - 73: 39, # 'I' - 74: 53, # 'J' - 75: 36, # 'K' - 76: 41, # 'L' - 77: 34, # 'M' - 78: 35, # 'N' - 79: 47, # 'O' - 80: 46, # 'P' - 81: 72, # 'Q' - 82: 43, # 'R' - 83: 33, # 'S' - 84: 37, # 'T' - 85: 57, # 'U' - 86: 48, # 'V' - 87: 64, # 'W' - 88: 68, # 'X' - 89: 55, # 'Y' - 90: 52, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 2, # 'a' - 98: 18, # 'b' - 99: 26, # 'c' - 100: 17, # 'd' - 101: 1, # 'e' - 102: 27, # 'f' - 103: 12, # 'g' - 104: 20, # 'h' - 105: 9, # 'i' - 106: 22, # 'j' - 107: 7, # 'k' - 108: 6, # 'l' - 109: 13, # 'm' - 110: 4, # 'n' - 111: 8, # 'o' - 112: 23, # 'p' - 113: 67, # 'q' - 114: 10, # 'r' - 115: 5, # 's' - 116: 3, # 't' - 117: 21, # 'u' - 118: 19, # 'v' - 119: 65, # 'w' - 120: 62, # 'x' - 121: 16, # 'y' - 122: 11, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 161, # '€' - 129: 162, # None - 130: 163, # '‚' - 131: 164, # None - 132: 165, # '„' - 133: 166, # '…' - 134: 167, # '†' - 135: 168, # '‡' - 136: 169, # None - 137: 170, # '‰' - 138: 171, # 'Š' - 139: 172, # '‹' - 140: 173, # 'Ś' - 141: 174, # 'Ť' - 142: 175, # 'Ž' - 143: 176, # 'Ź' - 144: 177, # None - 145: 178, # '‘' - 146: 179, # '’' - 147: 180, # '“' - 148: 78, # '”' - 149: 181, # '•' - 150: 69, # '–' - 151: 182, # '—' - 152: 183, # None - 153: 184, # '™' - 154: 185, # 'š' - 155: 186, # '›' - 156: 187, # 'ś' - 157: 188, # 'ť' - 158: 189, # 'ž' - 159: 190, # 'ź' - 160: 191, # '\xa0' - 161: 192, # 'ˇ' - 162: 193, # '˘' - 163: 194, # 'Ł' - 164: 195, # '¤' - 165: 196, # 'Ą' - 166: 197, # '¦' - 167: 76, # '§' - 168: 198, # '¨' - 169: 199, # '©' - 170: 200, # 'Ş' - 171: 201, # '«' - 172: 202, # '¬' - 173: 203, # '\xad' - 174: 204, # '®' - 175: 205, # 'Ż' - 176: 81, # '°' - 177: 206, # '±' - 178: 207, # '˛' - 179: 208, # 'ł' - 180: 209, # '´' - 181: 210, # 'µ' - 182: 211, # '¶' - 183: 212, # '·' - 184: 213, # '¸' - 185: 214, # 'ą' - 186: 215, # 'ş' - 187: 216, # '»' - 188: 217, # 'Ľ' - 189: 218, # '˝' - 190: 219, # 'ľ' - 191: 220, # 'ż' - 192: 221, # 'Ŕ' - 193: 51, # 'Á' - 194: 83, # 'Â' - 195: 222, # 'Ă' - 196: 80, # 'Ä' - 197: 223, # 'Ĺ' - 198: 224, # 'Ć' - 199: 225, # 'Ç' - 200: 226, # 'Č' - 201: 44, # 'É' - 202: 227, # 'Ę' - 203: 228, # 'Ë' - 204: 229, # 'Ě' - 205: 61, # 'Í' - 206: 230, # 'Î' - 207: 231, # 'Ď' - 208: 232, # 'Đ' - 209: 233, # 'Ń' - 210: 234, # 'Ň' - 211: 58, # 'Ó' - 212: 235, # 'Ô' - 213: 66, # 'Ő' - 214: 59, # 'Ö' - 215: 236, # '×' - 216: 237, # 'Ř' - 217: 238, # 'Ů' - 218: 60, # 'Ú' - 219: 70, # 'Ű' - 220: 63, # 'Ü' - 221: 239, # 'Ý' - 222: 240, # 'Ţ' - 223: 241, # 'ß' - 224: 84, # 'ŕ' - 225: 14, # 'á' - 226: 75, # 'â' - 227: 242, # 'ă' - 228: 71, # 'ä' - 229: 82, # 'ĺ' - 230: 243, # 'ć' - 231: 73, # 'ç' - 232: 244, # 'č' - 233: 15, # 'é' - 234: 85, # 'ę' - 235: 79, # 'ë' - 236: 86, # 'ě' - 237: 30, # 'í' - 238: 77, # 'î' - 239: 87, # 'ď' - 240: 245, # 'đ' - 241: 246, # 'ń' - 242: 247, # 'ň' - 243: 25, # 'ó' - 244: 74, # 'ô' - 245: 42, # 'ő' - 246: 24, # 'ö' - 247: 248, # '÷' - 248: 249, # 'ř' - 249: 250, # 'ů' - 250: 31, # 'ú' - 251: 56, # 'ű' - 252: 29, # 'ü' - 253: 251, # 'ý' - 254: 252, # 'ţ' - 255: 253, # '˙' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 28, # 'A' + 66: 40, # 'B' + 67: 54, # 'C' + 68: 45, # 'D' + 69: 32, # 'E' + 70: 50, # 'F' + 71: 49, # 'G' + 72: 38, # 'H' + 73: 39, # 'I' + 74: 53, # 'J' + 75: 36, # 'K' + 76: 41, # 'L' + 77: 34, # 'M' + 78: 35, # 'N' + 79: 47, # 'O' + 80: 46, # 'P' + 81: 72, # 'Q' + 82: 43, # 'R' + 83: 33, # 'S' + 84: 37, # 'T' + 85: 57, # 'U' + 86: 48, # 'V' + 87: 64, # 'W' + 88: 68, # 'X' + 89: 55, # 'Y' + 90: 52, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 2, # 'a' + 98: 18, # 'b' + 99: 26, # 'c' + 100: 17, # 'd' + 101: 1, # 'e' + 102: 27, # 'f' + 103: 12, # 'g' + 104: 20, # 'h' + 105: 9, # 'i' + 106: 22, # 'j' + 107: 7, # 'k' + 108: 6, # 'l' + 109: 13, # 'm' + 110: 4, # 'n' + 111: 8, # 'o' + 112: 23, # 'p' + 113: 67, # 'q' + 114: 10, # 'r' + 115: 5, # 's' + 116: 3, # 't' + 117: 21, # 'u' + 118: 19, # 'v' + 119: 65, # 'w' + 120: 62, # 'x' + 121: 16, # 'y' + 122: 11, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 161, # '€' + 129: 162, # None + 130: 163, # '‚' + 131: 164, # None + 132: 165, # '„' + 133: 166, # '…' + 134: 167, # '†' + 135: 168, # '‡' + 136: 169, # None + 137: 170, # '‰' + 138: 171, # 'Š' + 139: 172, # '‹' + 140: 173, # 'Ś' + 141: 174, # 'Ť' + 142: 175, # 'Ž' + 143: 176, # 'Ź' + 144: 177, # None + 145: 178, # '‘' + 146: 179, # '’' + 147: 180, # '“' + 148: 78, # '”' + 149: 181, # '•' + 150: 69, # '–' + 151: 182, # '—' + 152: 183, # None + 153: 184, # '™' + 154: 185, # 'š' + 155: 186, # '›' + 156: 187, # 'ś' + 157: 188, # 'ť' + 158: 189, # 'ž' + 159: 190, # 'ź' + 160: 191, # '\xa0' + 161: 192, # 'ˇ' + 162: 193, # '˘' + 163: 194, # 'Ł' + 164: 195, # '¤' + 165: 196, # 'Ą' + 166: 197, # '¦' + 167: 76, # '§' + 168: 198, # '¨' + 169: 199, # '©' + 170: 200, # 'Ş' + 171: 201, # '«' + 172: 202, # '¬' + 173: 203, # '\xad' + 174: 204, # '®' + 175: 205, # 'Ż' + 176: 81, # '°' + 177: 206, # '±' + 178: 207, # '˛' + 179: 208, # 'ł' + 180: 209, # '´' + 181: 210, # 'µ' + 182: 211, # '¶' + 183: 212, # '·' + 184: 213, # '¸' + 185: 214, # 'ą' + 186: 215, # 'ş' + 187: 216, # '»' + 188: 217, # 'Ľ' + 189: 218, # '˝' + 190: 219, # 'ľ' + 191: 220, # 'ż' + 192: 221, # 'Ŕ' + 193: 51, # 'Á' + 194: 83, # 'Â' + 195: 222, # 'Ă' + 196: 80, # 'Ä' + 197: 223, # 'Ĺ' + 198: 224, # 'Ć' + 199: 225, # 'Ç' + 200: 226, # 'Č' + 201: 44, # 'É' + 202: 227, # 'Ę' + 203: 228, # 'Ë' + 204: 229, # 'Ě' + 205: 61, # 'Í' + 206: 230, # 'Î' + 207: 231, # 'Ď' + 208: 232, # 'Đ' + 209: 233, # 'Ń' + 210: 234, # 'Ň' + 211: 58, # 'Ó' + 212: 235, # 'Ô' + 213: 66, # 'Ő' + 214: 59, # 'Ö' + 215: 236, # '×' + 216: 237, # 'Ř' + 217: 238, # 'Ů' + 218: 60, # 'Ú' + 219: 70, # 'Ű' + 220: 63, # 'Ü' + 221: 239, # 'Ý' + 222: 240, # 'Ţ' + 223: 241, # 'ß' + 224: 84, # 'ŕ' + 225: 14, # 'á' + 226: 75, # 'â' + 227: 242, # 'ă' + 228: 71, # 'ä' + 229: 82, # 'ĺ' + 230: 243, # 'ć' + 231: 73, # 'ç' + 232: 244, # 'č' + 233: 15, # 'é' + 234: 85, # 'ę' + 235: 79, # 'ë' + 236: 86, # 'ě' + 237: 30, # 'í' + 238: 77, # 'î' + 239: 87, # 'ď' + 240: 245, # 'đ' + 241: 246, # 'ń' + 242: 247, # 'ň' + 243: 25, # 'ó' + 244: 74, # 'ô' + 245: 42, # 'ő' + 246: 24, # 'ö' + 247: 248, # '÷' + 248: 249, # 'ř' + 249: 250, # 'ů' + 250: 31, # 'ú' + 251: 56, # 'ű' + 252: 29, # 'ü' + 253: 251, # 'ý' + 254: 252, # 'ţ' + 255: 253, # '˙' } -WINDOWS_1250_HUNGARIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1250', - language='Hungarian', - char_to_order_map=WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER, - language_model=HUNGARIAN_LANG_MODEL, - typical_positive_ratio=0.947368, - keep_ascii_letters=True, - alphabet='ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű') +WINDOWS_1250_HUNGARIAN_MODEL = SingleByteCharSetModel( + charset_name="windows-1250", + language="Hungarian", + char_to_order_map=WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER, + language_model=HUNGARIAN_LANG_MODEL, + typical_positive_ratio=0.947368, + keep_ascii_letters=True, + alphabet="ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű", +) ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 28, # 'A' - 66: 40, # 'B' - 67: 54, # 'C' - 68: 45, # 'D' - 69: 32, # 'E' - 70: 50, # 'F' - 71: 49, # 'G' - 72: 38, # 'H' - 73: 39, # 'I' - 74: 53, # 'J' - 75: 36, # 'K' - 76: 41, # 'L' - 77: 34, # 'M' - 78: 35, # 'N' - 79: 47, # 'O' - 80: 46, # 'P' - 81: 71, # 'Q' - 82: 43, # 'R' - 83: 33, # 'S' - 84: 37, # 'T' - 85: 57, # 'U' - 86: 48, # 'V' - 87: 64, # 'W' - 88: 68, # 'X' - 89: 55, # 'Y' - 90: 52, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 2, # 'a' - 98: 18, # 'b' - 99: 26, # 'c' - 100: 17, # 'd' - 101: 1, # 'e' - 102: 27, # 'f' - 103: 12, # 'g' - 104: 20, # 'h' - 105: 9, # 'i' - 106: 22, # 'j' - 107: 7, # 'k' - 108: 6, # 'l' - 109: 13, # 'm' - 110: 4, # 'n' - 111: 8, # 'o' - 112: 23, # 'p' - 113: 67, # 'q' - 114: 10, # 'r' - 115: 5, # 's' - 116: 3, # 't' - 117: 21, # 'u' - 118: 19, # 'v' - 119: 65, # 'w' - 120: 62, # 'x' - 121: 16, # 'y' - 122: 11, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 159, # '\x80' - 129: 160, # '\x81' - 130: 161, # '\x82' - 131: 162, # '\x83' - 132: 163, # '\x84' - 133: 164, # '\x85' - 134: 165, # '\x86' - 135: 166, # '\x87' - 136: 167, # '\x88' - 137: 168, # '\x89' - 138: 169, # '\x8a' - 139: 170, # '\x8b' - 140: 171, # '\x8c' - 141: 172, # '\x8d' - 142: 173, # '\x8e' - 143: 174, # '\x8f' - 144: 175, # '\x90' - 145: 176, # '\x91' - 146: 177, # '\x92' - 147: 178, # '\x93' - 148: 179, # '\x94' - 149: 180, # '\x95' - 150: 181, # '\x96' - 151: 182, # '\x97' - 152: 183, # '\x98' - 153: 184, # '\x99' - 154: 185, # '\x9a' - 155: 186, # '\x9b' - 156: 187, # '\x9c' - 157: 188, # '\x9d' - 158: 189, # '\x9e' - 159: 190, # '\x9f' - 160: 191, # '\xa0' - 161: 192, # 'Ą' - 162: 193, # '˘' - 163: 194, # 'Ł' - 164: 195, # '¤' - 165: 196, # 'Ľ' - 166: 197, # 'Ś' - 167: 75, # '§' - 168: 198, # '¨' - 169: 199, # 'Š' - 170: 200, # 'Ş' - 171: 201, # 'Ť' - 172: 202, # 'Ź' - 173: 203, # '\xad' - 174: 204, # 'Ž' - 175: 205, # 'Ż' - 176: 79, # '°' - 177: 206, # 'ą' - 178: 207, # '˛' - 179: 208, # 'ł' - 180: 209, # '´' - 181: 210, # 'ľ' - 182: 211, # 'ś' - 183: 212, # 'ˇ' - 184: 213, # '¸' - 185: 214, # 'š' - 186: 215, # 'ş' - 187: 216, # 'ť' - 188: 217, # 'ź' - 189: 218, # '˝' - 190: 219, # 'ž' - 191: 220, # 'ż' - 192: 221, # 'Ŕ' - 193: 51, # 'Á' - 194: 81, # 'Â' - 195: 222, # 'Ă' - 196: 78, # 'Ä' - 197: 223, # 'Ĺ' - 198: 224, # 'Ć' - 199: 225, # 'Ç' - 200: 226, # 'Č' - 201: 44, # 'É' - 202: 227, # 'Ę' - 203: 228, # 'Ë' - 204: 229, # 'Ě' - 205: 61, # 'Í' - 206: 230, # 'Î' - 207: 231, # 'Ď' - 208: 232, # 'Đ' - 209: 233, # 'Ń' - 210: 234, # 'Ň' - 211: 58, # 'Ó' - 212: 235, # 'Ô' - 213: 66, # 'Ő' - 214: 59, # 'Ö' - 215: 236, # '×' - 216: 237, # 'Ř' - 217: 238, # 'Ů' - 218: 60, # 'Ú' - 219: 69, # 'Ű' - 220: 63, # 'Ü' - 221: 239, # 'Ý' - 222: 240, # 'Ţ' - 223: 241, # 'ß' - 224: 82, # 'ŕ' - 225: 14, # 'á' - 226: 74, # 'â' - 227: 242, # 'ă' - 228: 70, # 'ä' - 229: 80, # 'ĺ' - 230: 243, # 'ć' - 231: 72, # 'ç' - 232: 244, # 'č' - 233: 15, # 'é' - 234: 83, # 'ę' - 235: 77, # 'ë' - 236: 84, # 'ě' - 237: 30, # 'í' - 238: 76, # 'î' - 239: 85, # 'ď' - 240: 245, # 'đ' - 241: 246, # 'ń' - 242: 247, # 'ň' - 243: 25, # 'ó' - 244: 73, # 'ô' - 245: 42, # 'ő' - 246: 24, # 'ö' - 247: 248, # '÷' - 248: 249, # 'ř' - 249: 250, # 'ů' - 250: 31, # 'ú' - 251: 56, # 'ű' - 252: 29, # 'ü' - 253: 251, # 'ý' - 254: 252, # 'ţ' - 255: 253, # '˙' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 28, # 'A' + 66: 40, # 'B' + 67: 54, # 'C' + 68: 45, # 'D' + 69: 32, # 'E' + 70: 50, # 'F' + 71: 49, # 'G' + 72: 38, # 'H' + 73: 39, # 'I' + 74: 53, # 'J' + 75: 36, # 'K' + 76: 41, # 'L' + 77: 34, # 'M' + 78: 35, # 'N' + 79: 47, # 'O' + 80: 46, # 'P' + 81: 71, # 'Q' + 82: 43, # 'R' + 83: 33, # 'S' + 84: 37, # 'T' + 85: 57, # 'U' + 86: 48, # 'V' + 87: 64, # 'W' + 88: 68, # 'X' + 89: 55, # 'Y' + 90: 52, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 2, # 'a' + 98: 18, # 'b' + 99: 26, # 'c' + 100: 17, # 'd' + 101: 1, # 'e' + 102: 27, # 'f' + 103: 12, # 'g' + 104: 20, # 'h' + 105: 9, # 'i' + 106: 22, # 'j' + 107: 7, # 'k' + 108: 6, # 'l' + 109: 13, # 'm' + 110: 4, # 'n' + 111: 8, # 'o' + 112: 23, # 'p' + 113: 67, # 'q' + 114: 10, # 'r' + 115: 5, # 's' + 116: 3, # 't' + 117: 21, # 'u' + 118: 19, # 'v' + 119: 65, # 'w' + 120: 62, # 'x' + 121: 16, # 'y' + 122: 11, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 159, # '\x80' + 129: 160, # '\x81' + 130: 161, # '\x82' + 131: 162, # '\x83' + 132: 163, # '\x84' + 133: 164, # '\x85' + 134: 165, # '\x86' + 135: 166, # '\x87' + 136: 167, # '\x88' + 137: 168, # '\x89' + 138: 169, # '\x8a' + 139: 170, # '\x8b' + 140: 171, # '\x8c' + 141: 172, # '\x8d' + 142: 173, # '\x8e' + 143: 174, # '\x8f' + 144: 175, # '\x90' + 145: 176, # '\x91' + 146: 177, # '\x92' + 147: 178, # '\x93' + 148: 179, # '\x94' + 149: 180, # '\x95' + 150: 181, # '\x96' + 151: 182, # '\x97' + 152: 183, # '\x98' + 153: 184, # '\x99' + 154: 185, # '\x9a' + 155: 186, # '\x9b' + 156: 187, # '\x9c' + 157: 188, # '\x9d' + 158: 189, # '\x9e' + 159: 190, # '\x9f' + 160: 191, # '\xa0' + 161: 192, # 'Ą' + 162: 193, # '˘' + 163: 194, # 'Ł' + 164: 195, # '¤' + 165: 196, # 'Ľ' + 166: 197, # 'Ś' + 167: 75, # '§' + 168: 198, # '¨' + 169: 199, # 'Š' + 170: 200, # 'Ş' + 171: 201, # 'Ť' + 172: 202, # 'Ź' + 173: 203, # '\xad' + 174: 204, # 'Ž' + 175: 205, # 'Ż' + 176: 79, # '°' + 177: 206, # 'ą' + 178: 207, # '˛' + 179: 208, # 'ł' + 180: 209, # '´' + 181: 210, # 'ľ' + 182: 211, # 'ś' + 183: 212, # 'ˇ' + 184: 213, # '¸' + 185: 214, # 'š' + 186: 215, # 'ş' + 187: 216, # 'ť' + 188: 217, # 'ź' + 189: 218, # '˝' + 190: 219, # 'ž' + 191: 220, # 'ż' + 192: 221, # 'Ŕ' + 193: 51, # 'Á' + 194: 81, # 'Â' + 195: 222, # 'Ă' + 196: 78, # 'Ä' + 197: 223, # 'Ĺ' + 198: 224, # 'Ć' + 199: 225, # 'Ç' + 200: 226, # 'Č' + 201: 44, # 'É' + 202: 227, # 'Ę' + 203: 228, # 'Ë' + 204: 229, # 'Ě' + 205: 61, # 'Í' + 206: 230, # 'Î' + 207: 231, # 'Ď' + 208: 232, # 'Đ' + 209: 233, # 'Ń' + 210: 234, # 'Ň' + 211: 58, # 'Ó' + 212: 235, # 'Ô' + 213: 66, # 'Ő' + 214: 59, # 'Ö' + 215: 236, # '×' + 216: 237, # 'Ř' + 217: 238, # 'Ů' + 218: 60, # 'Ú' + 219: 69, # 'Ű' + 220: 63, # 'Ü' + 221: 239, # 'Ý' + 222: 240, # 'Ţ' + 223: 241, # 'ß' + 224: 82, # 'ŕ' + 225: 14, # 'á' + 226: 74, # 'â' + 227: 242, # 'ă' + 228: 70, # 'ä' + 229: 80, # 'ĺ' + 230: 243, # 'ć' + 231: 72, # 'ç' + 232: 244, # 'č' + 233: 15, # 'é' + 234: 83, # 'ę' + 235: 77, # 'ë' + 236: 84, # 'ě' + 237: 30, # 'í' + 238: 76, # 'î' + 239: 85, # 'ď' + 240: 245, # 'đ' + 241: 246, # 'ń' + 242: 247, # 'ň' + 243: 25, # 'ó' + 244: 73, # 'ô' + 245: 42, # 'ő' + 246: 24, # 'ö' + 247: 248, # '÷' + 248: 249, # 'ř' + 249: 250, # 'ů' + 250: 31, # 'ú' + 251: 56, # 'ű' + 252: 29, # 'ü' + 253: 251, # 'ý' + 254: 252, # 'ţ' + 255: 253, # '˙' } -ISO_8859_2_HUNGARIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-2', - language='Hungarian', - char_to_order_map=ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER, - language_model=HUNGARIAN_LANG_MODEL, - typical_positive_ratio=0.947368, - keep_ascii_letters=True, - alphabet='ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű') - +ISO_8859_2_HUNGARIAN_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-2", + language="Hungarian", + char_to_order_map=ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER, + language_model=HUNGARIAN_LANG_MODEL, + typical_positive_ratio=0.947368, + keep_ascii_letters=True, + alphabet="ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű", +) diff --git a/libs/common/chardet/langrussianmodel.py b/libs/common/chardet/langrussianmodel.py index 569689d0..0d5b1784 100644 --- a/libs/common/chardet/langrussianmodel.py +++ b/libs/common/chardet/langrussianmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,1604 +4111,1615 @@ RUSSIAN_LANG_MODEL = { # Character Mapping Table(s): IBM866_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 37, # 'А' - 129: 44, # 'Б' - 130: 33, # 'В' - 131: 46, # 'Г' - 132: 41, # 'Д' - 133: 48, # 'Е' - 134: 56, # 'Ж' - 135: 51, # 'З' - 136: 42, # 'И' - 137: 60, # 'Й' - 138: 36, # 'К' - 139: 49, # 'Л' - 140: 38, # 'М' - 141: 31, # 'Н' - 142: 34, # 'О' - 143: 35, # 'П' - 144: 45, # 'Р' - 145: 32, # 'С' - 146: 40, # 'Т' - 147: 52, # 'У' - 148: 53, # 'Ф' - 149: 55, # 'Х' - 150: 58, # 'Ц' - 151: 50, # 'Ч' - 152: 57, # 'Ш' - 153: 63, # 'Щ' - 154: 70, # 'Ъ' - 155: 62, # 'Ы' - 156: 61, # 'Ь' - 157: 47, # 'Э' - 158: 59, # 'Ю' - 159: 43, # 'Я' - 160: 3, # 'а' - 161: 21, # 'б' - 162: 10, # 'в' - 163: 19, # 'г' - 164: 13, # 'д' - 165: 2, # 'е' - 166: 24, # 'ж' - 167: 20, # 'з' - 168: 4, # 'и' - 169: 23, # 'й' - 170: 11, # 'к' - 171: 8, # 'л' - 172: 12, # 'м' - 173: 5, # 'н' - 174: 1, # 'о' - 175: 15, # 'п' - 176: 191, # '░' - 177: 192, # '▒' - 178: 193, # '▓' - 179: 194, # '│' - 180: 195, # '┤' - 181: 196, # '╡' - 182: 197, # '╢' - 183: 198, # '╖' - 184: 199, # '╕' - 185: 200, # '╣' - 186: 201, # '║' - 187: 202, # '╗' - 188: 203, # '╝' - 189: 204, # '╜' - 190: 205, # '╛' - 191: 206, # '┐' - 192: 207, # '└' - 193: 208, # '┴' - 194: 209, # '┬' - 195: 210, # '├' - 196: 211, # '─' - 197: 212, # '┼' - 198: 213, # '╞' - 199: 214, # '╟' - 200: 215, # '╚' - 201: 216, # '╔' - 202: 217, # '╩' - 203: 218, # '╦' - 204: 219, # '╠' - 205: 220, # '═' - 206: 221, # '╬' - 207: 222, # '╧' - 208: 223, # '╨' - 209: 224, # '╤' - 210: 225, # '╥' - 211: 226, # '╙' - 212: 227, # '╘' - 213: 228, # '╒' - 214: 229, # '╓' - 215: 230, # '╫' - 216: 231, # '╪' - 217: 232, # '┘' - 218: 233, # '┌' - 219: 234, # '█' - 220: 235, # '▄' - 221: 236, # '▌' - 222: 237, # '▐' - 223: 238, # '▀' - 224: 9, # 'р' - 225: 7, # 'с' - 226: 6, # 'т' - 227: 14, # 'у' - 228: 39, # 'ф' - 229: 26, # 'х' - 230: 28, # 'ц' - 231: 22, # 'ч' - 232: 25, # 'ш' - 233: 29, # 'щ' - 234: 54, # 'ъ' - 235: 18, # 'ы' - 236: 17, # 'ь' - 237: 30, # 'э' - 238: 27, # 'ю' - 239: 16, # 'я' - 240: 239, # 'Ё' - 241: 68, # 'ё' - 242: 240, # 'Є' - 243: 241, # 'є' - 244: 242, # 'Ї' - 245: 243, # 'ї' - 246: 244, # 'Ў' - 247: 245, # 'ў' - 248: 246, # '°' - 249: 247, # '∙' - 250: 248, # '·' - 251: 249, # '√' - 252: 250, # '№' - 253: 251, # '¤' - 254: 252, # '■' - 255: 255, # '\xa0' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 37, # 'А' + 129: 44, # 'Б' + 130: 33, # 'В' + 131: 46, # 'Г' + 132: 41, # 'Д' + 133: 48, # 'Е' + 134: 56, # 'Ж' + 135: 51, # 'З' + 136: 42, # 'И' + 137: 60, # 'Й' + 138: 36, # 'К' + 139: 49, # 'Л' + 140: 38, # 'М' + 141: 31, # 'Н' + 142: 34, # 'О' + 143: 35, # 'П' + 144: 45, # 'Р' + 145: 32, # 'С' + 146: 40, # 'Т' + 147: 52, # 'У' + 148: 53, # 'Ф' + 149: 55, # 'Х' + 150: 58, # 'Ц' + 151: 50, # 'Ч' + 152: 57, # 'Ш' + 153: 63, # 'Щ' + 154: 70, # 'Ъ' + 155: 62, # 'Ы' + 156: 61, # 'Ь' + 157: 47, # 'Э' + 158: 59, # 'Ю' + 159: 43, # 'Я' + 160: 3, # 'а' + 161: 21, # 'б' + 162: 10, # 'в' + 163: 19, # 'г' + 164: 13, # 'д' + 165: 2, # 'е' + 166: 24, # 'ж' + 167: 20, # 'з' + 168: 4, # 'и' + 169: 23, # 'й' + 170: 11, # 'к' + 171: 8, # 'л' + 172: 12, # 'м' + 173: 5, # 'н' + 174: 1, # 'о' + 175: 15, # 'п' + 176: 191, # '░' + 177: 192, # '▒' + 178: 193, # '▓' + 179: 194, # '│' + 180: 195, # '┤' + 181: 196, # '╡' + 182: 197, # '╢' + 183: 198, # '╖' + 184: 199, # '╕' + 185: 200, # '╣' + 186: 201, # '║' + 187: 202, # '╗' + 188: 203, # '╝' + 189: 204, # '╜' + 190: 205, # '╛' + 191: 206, # '┐' + 192: 207, # '└' + 193: 208, # '┴' + 194: 209, # '┬' + 195: 210, # '├' + 196: 211, # '─' + 197: 212, # '┼' + 198: 213, # '╞' + 199: 214, # '╟' + 200: 215, # '╚' + 201: 216, # '╔' + 202: 217, # '╩' + 203: 218, # '╦' + 204: 219, # '╠' + 205: 220, # '═' + 206: 221, # '╬' + 207: 222, # '╧' + 208: 223, # '╨' + 209: 224, # '╤' + 210: 225, # '╥' + 211: 226, # '╙' + 212: 227, # '╘' + 213: 228, # '╒' + 214: 229, # '╓' + 215: 230, # '╫' + 216: 231, # '╪' + 217: 232, # '┘' + 218: 233, # '┌' + 219: 234, # '█' + 220: 235, # '▄' + 221: 236, # '▌' + 222: 237, # '▐' + 223: 238, # '▀' + 224: 9, # 'р' + 225: 7, # 'с' + 226: 6, # 'т' + 227: 14, # 'у' + 228: 39, # 'ф' + 229: 26, # 'х' + 230: 28, # 'ц' + 231: 22, # 'ч' + 232: 25, # 'ш' + 233: 29, # 'щ' + 234: 54, # 'ъ' + 235: 18, # 'ы' + 236: 17, # 'ь' + 237: 30, # 'э' + 238: 27, # 'ю' + 239: 16, # 'я' + 240: 239, # 'Ё' + 241: 68, # 'ё' + 242: 240, # 'Є' + 243: 241, # 'є' + 244: 242, # 'Ї' + 245: 243, # 'ї' + 246: 244, # 'Ў' + 247: 245, # 'ў' + 248: 246, # '°' + 249: 247, # '∙' + 250: 248, # '·' + 251: 249, # '√' + 252: 250, # '№' + 253: 251, # '¤' + 254: 252, # '■' + 255: 255, # '\xa0' } -IBM866_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='IBM866', - language='Russian', - char_to_order_map=IBM866_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +IBM866_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="IBM866", + language="Russian", + char_to_order_map=IBM866_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # 'Ђ' - 129: 192, # 'Ѓ' - 130: 193, # '‚' - 131: 194, # 'ѓ' - 132: 195, # '„' - 133: 196, # '…' - 134: 197, # '†' - 135: 198, # '‡' - 136: 199, # '€' - 137: 200, # '‰' - 138: 201, # 'Љ' - 139: 202, # '‹' - 140: 203, # 'Њ' - 141: 204, # 'Ќ' - 142: 205, # 'Ћ' - 143: 206, # 'Џ' - 144: 207, # 'ђ' - 145: 208, # '‘' - 146: 209, # '’' - 147: 210, # '“' - 148: 211, # '”' - 149: 212, # '•' - 150: 213, # '–' - 151: 214, # '—' - 152: 215, # None - 153: 216, # '™' - 154: 217, # 'љ' - 155: 218, # '›' - 156: 219, # 'њ' - 157: 220, # 'ќ' - 158: 221, # 'ћ' - 159: 222, # 'џ' - 160: 223, # '\xa0' - 161: 224, # 'Ў' - 162: 225, # 'ў' - 163: 226, # 'Ј' - 164: 227, # '¤' - 165: 228, # 'Ґ' - 166: 229, # '¦' - 167: 230, # '§' - 168: 231, # 'Ё' - 169: 232, # '©' - 170: 233, # 'Є' - 171: 234, # '«' - 172: 235, # '¬' - 173: 236, # '\xad' - 174: 237, # '®' - 175: 238, # 'Ї' - 176: 239, # '°' - 177: 240, # '±' - 178: 241, # 'І' - 179: 242, # 'і' - 180: 243, # 'ґ' - 181: 244, # 'µ' - 182: 245, # '¶' - 183: 246, # '·' - 184: 68, # 'ё' - 185: 247, # '№' - 186: 248, # 'є' - 187: 249, # '»' - 188: 250, # 'ј' - 189: 251, # 'Ѕ' - 190: 252, # 'ѕ' - 191: 253, # 'ї' - 192: 37, # 'А' - 193: 44, # 'Б' - 194: 33, # 'В' - 195: 46, # 'Г' - 196: 41, # 'Д' - 197: 48, # 'Е' - 198: 56, # 'Ж' - 199: 51, # 'З' - 200: 42, # 'И' - 201: 60, # 'Й' - 202: 36, # 'К' - 203: 49, # 'Л' - 204: 38, # 'М' - 205: 31, # 'Н' - 206: 34, # 'О' - 207: 35, # 'П' - 208: 45, # 'Р' - 209: 32, # 'С' - 210: 40, # 'Т' - 211: 52, # 'У' - 212: 53, # 'Ф' - 213: 55, # 'Х' - 214: 58, # 'Ц' - 215: 50, # 'Ч' - 216: 57, # 'Ш' - 217: 63, # 'Щ' - 218: 70, # 'Ъ' - 219: 62, # 'Ы' - 220: 61, # 'Ь' - 221: 47, # 'Э' - 222: 59, # 'Ю' - 223: 43, # 'Я' - 224: 3, # 'а' - 225: 21, # 'б' - 226: 10, # 'в' - 227: 19, # 'г' - 228: 13, # 'д' - 229: 2, # 'е' - 230: 24, # 'ж' - 231: 20, # 'з' - 232: 4, # 'и' - 233: 23, # 'й' - 234: 11, # 'к' - 235: 8, # 'л' - 236: 12, # 'м' - 237: 5, # 'н' - 238: 1, # 'о' - 239: 15, # 'п' - 240: 9, # 'р' - 241: 7, # 'с' - 242: 6, # 'т' - 243: 14, # 'у' - 244: 39, # 'ф' - 245: 26, # 'х' - 246: 28, # 'ц' - 247: 22, # 'ч' - 248: 25, # 'ш' - 249: 29, # 'щ' - 250: 54, # 'ъ' - 251: 18, # 'ы' - 252: 17, # 'ь' - 253: 30, # 'э' - 254: 27, # 'ю' - 255: 16, # 'я' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # 'Ђ' + 129: 192, # 'Ѓ' + 130: 193, # '‚' + 131: 194, # 'ѓ' + 132: 195, # '„' + 133: 196, # '…' + 134: 197, # '†' + 135: 198, # '‡' + 136: 199, # '€' + 137: 200, # '‰' + 138: 201, # 'Љ' + 139: 202, # '‹' + 140: 203, # 'Њ' + 141: 204, # 'Ќ' + 142: 205, # 'Ћ' + 143: 206, # 'Џ' + 144: 207, # 'ђ' + 145: 208, # '‘' + 146: 209, # '’' + 147: 210, # '“' + 148: 211, # '”' + 149: 212, # '•' + 150: 213, # '–' + 151: 214, # '—' + 152: 215, # None + 153: 216, # '™' + 154: 217, # 'љ' + 155: 218, # '›' + 156: 219, # 'њ' + 157: 220, # 'ќ' + 158: 221, # 'ћ' + 159: 222, # 'џ' + 160: 223, # '\xa0' + 161: 224, # 'Ў' + 162: 225, # 'ў' + 163: 226, # 'Ј' + 164: 227, # '¤' + 165: 228, # 'Ґ' + 166: 229, # '¦' + 167: 230, # '§' + 168: 231, # 'Ё' + 169: 232, # '©' + 170: 233, # 'Є' + 171: 234, # '«' + 172: 235, # '¬' + 173: 236, # '\xad' + 174: 237, # '®' + 175: 238, # 'Ї' + 176: 239, # '°' + 177: 240, # '±' + 178: 241, # 'І' + 179: 242, # 'і' + 180: 243, # 'ґ' + 181: 244, # 'µ' + 182: 245, # '¶' + 183: 246, # '·' + 184: 68, # 'ё' + 185: 247, # '№' + 186: 248, # 'є' + 187: 249, # '»' + 188: 250, # 'ј' + 189: 251, # 'Ѕ' + 190: 252, # 'ѕ' + 191: 253, # 'ї' + 192: 37, # 'А' + 193: 44, # 'Б' + 194: 33, # 'В' + 195: 46, # 'Г' + 196: 41, # 'Д' + 197: 48, # 'Е' + 198: 56, # 'Ж' + 199: 51, # 'З' + 200: 42, # 'И' + 201: 60, # 'Й' + 202: 36, # 'К' + 203: 49, # 'Л' + 204: 38, # 'М' + 205: 31, # 'Н' + 206: 34, # 'О' + 207: 35, # 'П' + 208: 45, # 'Р' + 209: 32, # 'С' + 210: 40, # 'Т' + 211: 52, # 'У' + 212: 53, # 'Ф' + 213: 55, # 'Х' + 214: 58, # 'Ц' + 215: 50, # 'Ч' + 216: 57, # 'Ш' + 217: 63, # 'Щ' + 218: 70, # 'Ъ' + 219: 62, # 'Ы' + 220: 61, # 'Ь' + 221: 47, # 'Э' + 222: 59, # 'Ю' + 223: 43, # 'Я' + 224: 3, # 'а' + 225: 21, # 'б' + 226: 10, # 'в' + 227: 19, # 'г' + 228: 13, # 'д' + 229: 2, # 'е' + 230: 24, # 'ж' + 231: 20, # 'з' + 232: 4, # 'и' + 233: 23, # 'й' + 234: 11, # 'к' + 235: 8, # 'л' + 236: 12, # 'м' + 237: 5, # 'н' + 238: 1, # 'о' + 239: 15, # 'п' + 240: 9, # 'р' + 241: 7, # 'с' + 242: 6, # 'т' + 243: 14, # 'у' + 244: 39, # 'ф' + 245: 26, # 'х' + 246: 28, # 'ц' + 247: 22, # 'ч' + 248: 25, # 'ш' + 249: 29, # 'щ' + 250: 54, # 'ъ' + 251: 18, # 'ы' + 252: 17, # 'ь' + 253: 30, # 'э' + 254: 27, # 'ю' + 255: 16, # 'я' } -WINDOWS_1251_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1251', - language='Russian', - char_to_order_map=WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +WINDOWS_1251_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="windows-1251", + language="Russian", + char_to_order_map=WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) IBM855_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # 'ђ' - 129: 192, # 'Ђ' - 130: 193, # 'ѓ' - 131: 194, # 'Ѓ' - 132: 68, # 'ё' - 133: 195, # 'Ё' - 134: 196, # 'є' - 135: 197, # 'Є' - 136: 198, # 'ѕ' - 137: 199, # 'Ѕ' - 138: 200, # 'і' - 139: 201, # 'І' - 140: 202, # 'ї' - 141: 203, # 'Ї' - 142: 204, # 'ј' - 143: 205, # 'Ј' - 144: 206, # 'љ' - 145: 207, # 'Љ' - 146: 208, # 'њ' - 147: 209, # 'Њ' - 148: 210, # 'ћ' - 149: 211, # 'Ћ' - 150: 212, # 'ќ' - 151: 213, # 'Ќ' - 152: 214, # 'ў' - 153: 215, # 'Ў' - 154: 216, # 'џ' - 155: 217, # 'Џ' - 156: 27, # 'ю' - 157: 59, # 'Ю' - 158: 54, # 'ъ' - 159: 70, # 'Ъ' - 160: 3, # 'а' - 161: 37, # 'А' - 162: 21, # 'б' - 163: 44, # 'Б' - 164: 28, # 'ц' - 165: 58, # 'Ц' - 166: 13, # 'д' - 167: 41, # 'Д' - 168: 2, # 'е' - 169: 48, # 'Е' - 170: 39, # 'ф' - 171: 53, # 'Ф' - 172: 19, # 'г' - 173: 46, # 'Г' - 174: 218, # '«' - 175: 219, # '»' - 176: 220, # '░' - 177: 221, # '▒' - 178: 222, # '▓' - 179: 223, # '│' - 180: 224, # '┤' - 181: 26, # 'х' - 182: 55, # 'Х' - 183: 4, # 'и' - 184: 42, # 'И' - 185: 225, # '╣' - 186: 226, # '║' - 187: 227, # '╗' - 188: 228, # '╝' - 189: 23, # 'й' - 190: 60, # 'Й' - 191: 229, # '┐' - 192: 230, # '└' - 193: 231, # '┴' - 194: 232, # '┬' - 195: 233, # '├' - 196: 234, # '─' - 197: 235, # '┼' - 198: 11, # 'к' - 199: 36, # 'К' - 200: 236, # '╚' - 201: 237, # '╔' - 202: 238, # '╩' - 203: 239, # '╦' - 204: 240, # '╠' - 205: 241, # '═' - 206: 242, # '╬' - 207: 243, # '¤' - 208: 8, # 'л' - 209: 49, # 'Л' - 210: 12, # 'м' - 211: 38, # 'М' - 212: 5, # 'н' - 213: 31, # 'Н' - 214: 1, # 'о' - 215: 34, # 'О' - 216: 15, # 'п' - 217: 244, # '┘' - 218: 245, # '┌' - 219: 246, # '█' - 220: 247, # '▄' - 221: 35, # 'П' - 222: 16, # 'я' - 223: 248, # '▀' - 224: 43, # 'Я' - 225: 9, # 'р' - 226: 45, # 'Р' - 227: 7, # 'с' - 228: 32, # 'С' - 229: 6, # 'т' - 230: 40, # 'Т' - 231: 14, # 'у' - 232: 52, # 'У' - 233: 24, # 'ж' - 234: 56, # 'Ж' - 235: 10, # 'в' - 236: 33, # 'В' - 237: 17, # 'ь' - 238: 61, # 'Ь' - 239: 249, # '№' - 240: 250, # '\xad' - 241: 18, # 'ы' - 242: 62, # 'Ы' - 243: 20, # 'з' - 244: 51, # 'З' - 245: 25, # 'ш' - 246: 57, # 'Ш' - 247: 30, # 'э' - 248: 47, # 'Э' - 249: 29, # 'щ' - 250: 63, # 'Щ' - 251: 22, # 'ч' - 252: 50, # 'Ч' - 253: 251, # '§' - 254: 252, # '■' - 255: 255, # '\xa0' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # 'ђ' + 129: 192, # 'Ђ' + 130: 193, # 'ѓ' + 131: 194, # 'Ѓ' + 132: 68, # 'ё' + 133: 195, # 'Ё' + 134: 196, # 'є' + 135: 197, # 'Є' + 136: 198, # 'ѕ' + 137: 199, # 'Ѕ' + 138: 200, # 'і' + 139: 201, # 'І' + 140: 202, # 'ї' + 141: 203, # 'Ї' + 142: 204, # 'ј' + 143: 205, # 'Ј' + 144: 206, # 'љ' + 145: 207, # 'Љ' + 146: 208, # 'њ' + 147: 209, # 'Њ' + 148: 210, # 'ћ' + 149: 211, # 'Ћ' + 150: 212, # 'ќ' + 151: 213, # 'Ќ' + 152: 214, # 'ў' + 153: 215, # 'Ў' + 154: 216, # 'џ' + 155: 217, # 'Џ' + 156: 27, # 'ю' + 157: 59, # 'Ю' + 158: 54, # 'ъ' + 159: 70, # 'Ъ' + 160: 3, # 'а' + 161: 37, # 'А' + 162: 21, # 'б' + 163: 44, # 'Б' + 164: 28, # 'ц' + 165: 58, # 'Ц' + 166: 13, # 'д' + 167: 41, # 'Д' + 168: 2, # 'е' + 169: 48, # 'Е' + 170: 39, # 'ф' + 171: 53, # 'Ф' + 172: 19, # 'г' + 173: 46, # 'Г' + 174: 218, # '«' + 175: 219, # '»' + 176: 220, # '░' + 177: 221, # '▒' + 178: 222, # '▓' + 179: 223, # '│' + 180: 224, # '┤' + 181: 26, # 'х' + 182: 55, # 'Х' + 183: 4, # 'и' + 184: 42, # 'И' + 185: 225, # '╣' + 186: 226, # '║' + 187: 227, # '╗' + 188: 228, # '╝' + 189: 23, # 'й' + 190: 60, # 'Й' + 191: 229, # '┐' + 192: 230, # '└' + 193: 231, # '┴' + 194: 232, # '┬' + 195: 233, # '├' + 196: 234, # '─' + 197: 235, # '┼' + 198: 11, # 'к' + 199: 36, # 'К' + 200: 236, # '╚' + 201: 237, # '╔' + 202: 238, # '╩' + 203: 239, # '╦' + 204: 240, # '╠' + 205: 241, # '═' + 206: 242, # '╬' + 207: 243, # '¤' + 208: 8, # 'л' + 209: 49, # 'Л' + 210: 12, # 'м' + 211: 38, # 'М' + 212: 5, # 'н' + 213: 31, # 'Н' + 214: 1, # 'о' + 215: 34, # 'О' + 216: 15, # 'п' + 217: 244, # '┘' + 218: 245, # '┌' + 219: 246, # '█' + 220: 247, # '▄' + 221: 35, # 'П' + 222: 16, # 'я' + 223: 248, # '▀' + 224: 43, # 'Я' + 225: 9, # 'р' + 226: 45, # 'Р' + 227: 7, # 'с' + 228: 32, # 'С' + 229: 6, # 'т' + 230: 40, # 'Т' + 231: 14, # 'у' + 232: 52, # 'У' + 233: 24, # 'ж' + 234: 56, # 'Ж' + 235: 10, # 'в' + 236: 33, # 'В' + 237: 17, # 'ь' + 238: 61, # 'Ь' + 239: 249, # '№' + 240: 250, # '\xad' + 241: 18, # 'ы' + 242: 62, # 'Ы' + 243: 20, # 'з' + 244: 51, # 'З' + 245: 25, # 'ш' + 246: 57, # 'Ш' + 247: 30, # 'э' + 248: 47, # 'Э' + 249: 29, # 'щ' + 250: 63, # 'Щ' + 251: 22, # 'ч' + 252: 50, # 'Ч' + 253: 251, # '§' + 254: 252, # '■' + 255: 255, # '\xa0' } -IBM855_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='IBM855', - language='Russian', - char_to_order_map=IBM855_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +IBM855_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="IBM855", + language="Russian", + char_to_order_map=IBM855_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) KOI8_R_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # '─' - 129: 192, # '│' - 130: 193, # '┌' - 131: 194, # '┐' - 132: 195, # '└' - 133: 196, # '┘' - 134: 197, # '├' - 135: 198, # '┤' - 136: 199, # '┬' - 137: 200, # '┴' - 138: 201, # '┼' - 139: 202, # '▀' - 140: 203, # '▄' - 141: 204, # '█' - 142: 205, # '▌' - 143: 206, # '▐' - 144: 207, # '░' - 145: 208, # '▒' - 146: 209, # '▓' - 147: 210, # '⌠' - 148: 211, # '■' - 149: 212, # '∙' - 150: 213, # '√' - 151: 214, # '≈' - 152: 215, # '≤' - 153: 216, # '≥' - 154: 217, # '\xa0' - 155: 218, # '⌡' - 156: 219, # '°' - 157: 220, # '²' - 158: 221, # '·' - 159: 222, # '÷' - 160: 223, # '═' - 161: 224, # '║' - 162: 225, # '╒' - 163: 68, # 'ё' - 164: 226, # '╓' - 165: 227, # '╔' - 166: 228, # '╕' - 167: 229, # '╖' - 168: 230, # '╗' - 169: 231, # '╘' - 170: 232, # '╙' - 171: 233, # '╚' - 172: 234, # '╛' - 173: 235, # '╜' - 174: 236, # '╝' - 175: 237, # '╞' - 176: 238, # '╟' - 177: 239, # '╠' - 178: 240, # '╡' - 179: 241, # 'Ё' - 180: 242, # '╢' - 181: 243, # '╣' - 182: 244, # '╤' - 183: 245, # '╥' - 184: 246, # '╦' - 185: 247, # '╧' - 186: 248, # '╨' - 187: 249, # '╩' - 188: 250, # '╪' - 189: 251, # '╫' - 190: 252, # '╬' - 191: 253, # '©' - 192: 27, # 'ю' - 193: 3, # 'а' - 194: 21, # 'б' - 195: 28, # 'ц' - 196: 13, # 'д' - 197: 2, # 'е' - 198: 39, # 'ф' - 199: 19, # 'г' - 200: 26, # 'х' - 201: 4, # 'и' - 202: 23, # 'й' - 203: 11, # 'к' - 204: 8, # 'л' - 205: 12, # 'м' - 206: 5, # 'н' - 207: 1, # 'о' - 208: 15, # 'п' - 209: 16, # 'я' - 210: 9, # 'р' - 211: 7, # 'с' - 212: 6, # 'т' - 213: 14, # 'у' - 214: 24, # 'ж' - 215: 10, # 'в' - 216: 17, # 'ь' - 217: 18, # 'ы' - 218: 20, # 'з' - 219: 25, # 'ш' - 220: 30, # 'э' - 221: 29, # 'щ' - 222: 22, # 'ч' - 223: 54, # 'ъ' - 224: 59, # 'Ю' - 225: 37, # 'А' - 226: 44, # 'Б' - 227: 58, # 'Ц' - 228: 41, # 'Д' - 229: 48, # 'Е' - 230: 53, # 'Ф' - 231: 46, # 'Г' - 232: 55, # 'Х' - 233: 42, # 'И' - 234: 60, # 'Й' - 235: 36, # 'К' - 236: 49, # 'Л' - 237: 38, # 'М' - 238: 31, # 'Н' - 239: 34, # 'О' - 240: 35, # 'П' - 241: 43, # 'Я' - 242: 45, # 'Р' - 243: 32, # 'С' - 244: 40, # 'Т' - 245: 52, # 'У' - 246: 56, # 'Ж' - 247: 33, # 'В' - 248: 61, # 'Ь' - 249: 62, # 'Ы' - 250: 51, # 'З' - 251: 57, # 'Ш' - 252: 47, # 'Э' - 253: 63, # 'Щ' - 254: 50, # 'Ч' - 255: 70, # 'Ъ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # '─' + 129: 192, # '│' + 130: 193, # '┌' + 131: 194, # '┐' + 132: 195, # '└' + 133: 196, # '┘' + 134: 197, # '├' + 135: 198, # '┤' + 136: 199, # '┬' + 137: 200, # '┴' + 138: 201, # '┼' + 139: 202, # '▀' + 140: 203, # '▄' + 141: 204, # '█' + 142: 205, # '▌' + 143: 206, # '▐' + 144: 207, # '░' + 145: 208, # '▒' + 146: 209, # '▓' + 147: 210, # '⌠' + 148: 211, # '■' + 149: 212, # '∙' + 150: 213, # '√' + 151: 214, # '≈' + 152: 215, # '≤' + 153: 216, # '≥' + 154: 217, # '\xa0' + 155: 218, # '⌡' + 156: 219, # '°' + 157: 220, # '²' + 158: 221, # '·' + 159: 222, # '÷' + 160: 223, # '═' + 161: 224, # '║' + 162: 225, # '╒' + 163: 68, # 'ё' + 164: 226, # '╓' + 165: 227, # '╔' + 166: 228, # '╕' + 167: 229, # '╖' + 168: 230, # '╗' + 169: 231, # '╘' + 170: 232, # '╙' + 171: 233, # '╚' + 172: 234, # '╛' + 173: 235, # '╜' + 174: 236, # '╝' + 175: 237, # '╞' + 176: 238, # '╟' + 177: 239, # '╠' + 178: 240, # '╡' + 179: 241, # 'Ё' + 180: 242, # '╢' + 181: 243, # '╣' + 182: 244, # '╤' + 183: 245, # '╥' + 184: 246, # '╦' + 185: 247, # '╧' + 186: 248, # '╨' + 187: 249, # '╩' + 188: 250, # '╪' + 189: 251, # '╫' + 190: 252, # '╬' + 191: 253, # '©' + 192: 27, # 'ю' + 193: 3, # 'а' + 194: 21, # 'б' + 195: 28, # 'ц' + 196: 13, # 'д' + 197: 2, # 'е' + 198: 39, # 'ф' + 199: 19, # 'г' + 200: 26, # 'х' + 201: 4, # 'и' + 202: 23, # 'й' + 203: 11, # 'к' + 204: 8, # 'л' + 205: 12, # 'м' + 206: 5, # 'н' + 207: 1, # 'о' + 208: 15, # 'п' + 209: 16, # 'я' + 210: 9, # 'р' + 211: 7, # 'с' + 212: 6, # 'т' + 213: 14, # 'у' + 214: 24, # 'ж' + 215: 10, # 'в' + 216: 17, # 'ь' + 217: 18, # 'ы' + 218: 20, # 'з' + 219: 25, # 'ш' + 220: 30, # 'э' + 221: 29, # 'щ' + 222: 22, # 'ч' + 223: 54, # 'ъ' + 224: 59, # 'Ю' + 225: 37, # 'А' + 226: 44, # 'Б' + 227: 58, # 'Ц' + 228: 41, # 'Д' + 229: 48, # 'Е' + 230: 53, # 'Ф' + 231: 46, # 'Г' + 232: 55, # 'Х' + 233: 42, # 'И' + 234: 60, # 'Й' + 235: 36, # 'К' + 236: 49, # 'Л' + 237: 38, # 'М' + 238: 31, # 'Н' + 239: 34, # 'О' + 240: 35, # 'П' + 241: 43, # 'Я' + 242: 45, # 'Р' + 243: 32, # 'С' + 244: 40, # 'Т' + 245: 52, # 'У' + 246: 56, # 'Ж' + 247: 33, # 'В' + 248: 61, # 'Ь' + 249: 62, # 'Ы' + 250: 51, # 'З' + 251: 57, # 'Ш' + 252: 47, # 'Э' + 253: 63, # 'Щ' + 254: 50, # 'Ч' + 255: 70, # 'Ъ' } -KOI8_R_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='KOI8-R', - language='Russian', - char_to_order_map=KOI8_R_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +KOI8_R_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="KOI8-R", + language="Russian", + char_to_order_map=KOI8_R_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 37, # 'А' - 129: 44, # 'Б' - 130: 33, # 'В' - 131: 46, # 'Г' - 132: 41, # 'Д' - 133: 48, # 'Е' - 134: 56, # 'Ж' - 135: 51, # 'З' - 136: 42, # 'И' - 137: 60, # 'Й' - 138: 36, # 'К' - 139: 49, # 'Л' - 140: 38, # 'М' - 141: 31, # 'Н' - 142: 34, # 'О' - 143: 35, # 'П' - 144: 45, # 'Р' - 145: 32, # 'С' - 146: 40, # 'Т' - 147: 52, # 'У' - 148: 53, # 'Ф' - 149: 55, # 'Х' - 150: 58, # 'Ц' - 151: 50, # 'Ч' - 152: 57, # 'Ш' - 153: 63, # 'Щ' - 154: 70, # 'Ъ' - 155: 62, # 'Ы' - 156: 61, # 'Ь' - 157: 47, # 'Э' - 158: 59, # 'Ю' - 159: 43, # 'Я' - 160: 191, # '†' - 161: 192, # '°' - 162: 193, # 'Ґ' - 163: 194, # '£' - 164: 195, # '§' - 165: 196, # '•' - 166: 197, # '¶' - 167: 198, # 'І' - 168: 199, # '®' - 169: 200, # '©' - 170: 201, # '™' - 171: 202, # 'Ђ' - 172: 203, # 'ђ' - 173: 204, # '≠' - 174: 205, # 'Ѓ' - 175: 206, # 'ѓ' - 176: 207, # '∞' - 177: 208, # '±' - 178: 209, # '≤' - 179: 210, # '≥' - 180: 211, # 'і' - 181: 212, # 'µ' - 182: 213, # 'ґ' - 183: 214, # 'Ј' - 184: 215, # 'Є' - 185: 216, # 'є' - 186: 217, # 'Ї' - 187: 218, # 'ї' - 188: 219, # 'Љ' - 189: 220, # 'љ' - 190: 221, # 'Њ' - 191: 222, # 'њ' - 192: 223, # 'ј' - 193: 224, # 'Ѕ' - 194: 225, # '¬' - 195: 226, # '√' - 196: 227, # 'ƒ' - 197: 228, # '≈' - 198: 229, # '∆' - 199: 230, # '«' - 200: 231, # '»' - 201: 232, # '…' - 202: 233, # '\xa0' - 203: 234, # 'Ћ' - 204: 235, # 'ћ' - 205: 236, # 'Ќ' - 206: 237, # 'ќ' - 207: 238, # 'ѕ' - 208: 239, # '–' - 209: 240, # '—' - 210: 241, # '“' - 211: 242, # '”' - 212: 243, # '‘' - 213: 244, # '’' - 214: 245, # '÷' - 215: 246, # '„' - 216: 247, # 'Ў' - 217: 248, # 'ў' - 218: 249, # 'Џ' - 219: 250, # 'џ' - 220: 251, # '№' - 221: 252, # 'Ё' - 222: 68, # 'ё' - 223: 16, # 'я' - 224: 3, # 'а' - 225: 21, # 'б' - 226: 10, # 'в' - 227: 19, # 'г' - 228: 13, # 'д' - 229: 2, # 'е' - 230: 24, # 'ж' - 231: 20, # 'з' - 232: 4, # 'и' - 233: 23, # 'й' - 234: 11, # 'к' - 235: 8, # 'л' - 236: 12, # 'м' - 237: 5, # 'н' - 238: 1, # 'о' - 239: 15, # 'п' - 240: 9, # 'р' - 241: 7, # 'с' - 242: 6, # 'т' - 243: 14, # 'у' - 244: 39, # 'ф' - 245: 26, # 'х' - 246: 28, # 'ц' - 247: 22, # 'ч' - 248: 25, # 'ш' - 249: 29, # 'щ' - 250: 54, # 'ъ' - 251: 18, # 'ы' - 252: 17, # 'ь' - 253: 30, # 'э' - 254: 27, # 'ю' - 255: 255, # '€' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 37, # 'А' + 129: 44, # 'Б' + 130: 33, # 'В' + 131: 46, # 'Г' + 132: 41, # 'Д' + 133: 48, # 'Е' + 134: 56, # 'Ж' + 135: 51, # 'З' + 136: 42, # 'И' + 137: 60, # 'Й' + 138: 36, # 'К' + 139: 49, # 'Л' + 140: 38, # 'М' + 141: 31, # 'Н' + 142: 34, # 'О' + 143: 35, # 'П' + 144: 45, # 'Р' + 145: 32, # 'С' + 146: 40, # 'Т' + 147: 52, # 'У' + 148: 53, # 'Ф' + 149: 55, # 'Х' + 150: 58, # 'Ц' + 151: 50, # 'Ч' + 152: 57, # 'Ш' + 153: 63, # 'Щ' + 154: 70, # 'Ъ' + 155: 62, # 'Ы' + 156: 61, # 'Ь' + 157: 47, # 'Э' + 158: 59, # 'Ю' + 159: 43, # 'Я' + 160: 191, # '†' + 161: 192, # '°' + 162: 193, # 'Ґ' + 163: 194, # '£' + 164: 195, # '§' + 165: 196, # '•' + 166: 197, # '¶' + 167: 198, # 'І' + 168: 199, # '®' + 169: 200, # '©' + 170: 201, # '™' + 171: 202, # 'Ђ' + 172: 203, # 'ђ' + 173: 204, # '≠' + 174: 205, # 'Ѓ' + 175: 206, # 'ѓ' + 176: 207, # '∞' + 177: 208, # '±' + 178: 209, # '≤' + 179: 210, # '≥' + 180: 211, # 'і' + 181: 212, # 'µ' + 182: 213, # 'ґ' + 183: 214, # 'Ј' + 184: 215, # 'Є' + 185: 216, # 'є' + 186: 217, # 'Ї' + 187: 218, # 'ї' + 188: 219, # 'Љ' + 189: 220, # 'љ' + 190: 221, # 'Њ' + 191: 222, # 'њ' + 192: 223, # 'ј' + 193: 224, # 'Ѕ' + 194: 225, # '¬' + 195: 226, # '√' + 196: 227, # 'ƒ' + 197: 228, # '≈' + 198: 229, # '∆' + 199: 230, # '«' + 200: 231, # '»' + 201: 232, # '…' + 202: 233, # '\xa0' + 203: 234, # 'Ћ' + 204: 235, # 'ћ' + 205: 236, # 'Ќ' + 206: 237, # 'ќ' + 207: 238, # 'ѕ' + 208: 239, # '–' + 209: 240, # '—' + 210: 241, # '“' + 211: 242, # '”' + 212: 243, # '‘' + 213: 244, # '’' + 214: 245, # '÷' + 215: 246, # '„' + 216: 247, # 'Ў' + 217: 248, # 'ў' + 218: 249, # 'Џ' + 219: 250, # 'џ' + 220: 251, # '№' + 221: 252, # 'Ё' + 222: 68, # 'ё' + 223: 16, # 'я' + 224: 3, # 'а' + 225: 21, # 'б' + 226: 10, # 'в' + 227: 19, # 'г' + 228: 13, # 'д' + 229: 2, # 'е' + 230: 24, # 'ж' + 231: 20, # 'з' + 232: 4, # 'и' + 233: 23, # 'й' + 234: 11, # 'к' + 235: 8, # 'л' + 236: 12, # 'м' + 237: 5, # 'н' + 238: 1, # 'о' + 239: 15, # 'п' + 240: 9, # 'р' + 241: 7, # 'с' + 242: 6, # 'т' + 243: 14, # 'у' + 244: 39, # 'ф' + 245: 26, # 'х' + 246: 28, # 'ц' + 247: 22, # 'ч' + 248: 25, # 'ш' + 249: 29, # 'щ' + 250: 54, # 'ъ' + 251: 18, # 'ы' + 252: 17, # 'ь' + 253: 30, # 'э' + 254: 27, # 'ю' + 255: 255, # '€' } -MACCYRILLIC_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='MacCyrillic', - language='Russian', - char_to_order_map=MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +MACCYRILLIC_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="MacCyrillic", + language="Russian", + char_to_order_map=MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) ISO_8859_5_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # '\x80' - 129: 192, # '\x81' - 130: 193, # '\x82' - 131: 194, # '\x83' - 132: 195, # '\x84' - 133: 196, # '\x85' - 134: 197, # '\x86' - 135: 198, # '\x87' - 136: 199, # '\x88' - 137: 200, # '\x89' - 138: 201, # '\x8a' - 139: 202, # '\x8b' - 140: 203, # '\x8c' - 141: 204, # '\x8d' - 142: 205, # '\x8e' - 143: 206, # '\x8f' - 144: 207, # '\x90' - 145: 208, # '\x91' - 146: 209, # '\x92' - 147: 210, # '\x93' - 148: 211, # '\x94' - 149: 212, # '\x95' - 150: 213, # '\x96' - 151: 214, # '\x97' - 152: 215, # '\x98' - 153: 216, # '\x99' - 154: 217, # '\x9a' - 155: 218, # '\x9b' - 156: 219, # '\x9c' - 157: 220, # '\x9d' - 158: 221, # '\x9e' - 159: 222, # '\x9f' - 160: 223, # '\xa0' - 161: 224, # 'Ё' - 162: 225, # 'Ђ' - 163: 226, # 'Ѓ' - 164: 227, # 'Є' - 165: 228, # 'Ѕ' - 166: 229, # 'І' - 167: 230, # 'Ї' - 168: 231, # 'Ј' - 169: 232, # 'Љ' - 170: 233, # 'Њ' - 171: 234, # 'Ћ' - 172: 235, # 'Ќ' - 173: 236, # '\xad' - 174: 237, # 'Ў' - 175: 238, # 'Џ' - 176: 37, # 'А' - 177: 44, # 'Б' - 178: 33, # 'В' - 179: 46, # 'Г' - 180: 41, # 'Д' - 181: 48, # 'Е' - 182: 56, # 'Ж' - 183: 51, # 'З' - 184: 42, # 'И' - 185: 60, # 'Й' - 186: 36, # 'К' - 187: 49, # 'Л' - 188: 38, # 'М' - 189: 31, # 'Н' - 190: 34, # 'О' - 191: 35, # 'П' - 192: 45, # 'Р' - 193: 32, # 'С' - 194: 40, # 'Т' - 195: 52, # 'У' - 196: 53, # 'Ф' - 197: 55, # 'Х' - 198: 58, # 'Ц' - 199: 50, # 'Ч' - 200: 57, # 'Ш' - 201: 63, # 'Щ' - 202: 70, # 'Ъ' - 203: 62, # 'Ы' - 204: 61, # 'Ь' - 205: 47, # 'Э' - 206: 59, # 'Ю' - 207: 43, # 'Я' - 208: 3, # 'а' - 209: 21, # 'б' - 210: 10, # 'в' - 211: 19, # 'г' - 212: 13, # 'д' - 213: 2, # 'е' - 214: 24, # 'ж' - 215: 20, # 'з' - 216: 4, # 'и' - 217: 23, # 'й' - 218: 11, # 'к' - 219: 8, # 'л' - 220: 12, # 'м' - 221: 5, # 'н' - 222: 1, # 'о' - 223: 15, # 'п' - 224: 9, # 'р' - 225: 7, # 'с' - 226: 6, # 'т' - 227: 14, # 'у' - 228: 39, # 'ф' - 229: 26, # 'х' - 230: 28, # 'ц' - 231: 22, # 'ч' - 232: 25, # 'ш' - 233: 29, # 'щ' - 234: 54, # 'ъ' - 235: 18, # 'ы' - 236: 17, # 'ь' - 237: 30, # 'э' - 238: 27, # 'ю' - 239: 16, # 'я' - 240: 239, # '№' - 241: 68, # 'ё' - 242: 240, # 'ђ' - 243: 241, # 'ѓ' - 244: 242, # 'є' - 245: 243, # 'ѕ' - 246: 244, # 'і' - 247: 245, # 'ї' - 248: 246, # 'ј' - 249: 247, # 'љ' - 250: 248, # 'њ' - 251: 249, # 'ћ' - 252: 250, # 'ќ' - 253: 251, # '§' - 254: 252, # 'ў' - 255: 255, # 'џ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # '\x80' + 129: 192, # '\x81' + 130: 193, # '\x82' + 131: 194, # '\x83' + 132: 195, # '\x84' + 133: 196, # '\x85' + 134: 197, # '\x86' + 135: 198, # '\x87' + 136: 199, # '\x88' + 137: 200, # '\x89' + 138: 201, # '\x8a' + 139: 202, # '\x8b' + 140: 203, # '\x8c' + 141: 204, # '\x8d' + 142: 205, # '\x8e' + 143: 206, # '\x8f' + 144: 207, # '\x90' + 145: 208, # '\x91' + 146: 209, # '\x92' + 147: 210, # '\x93' + 148: 211, # '\x94' + 149: 212, # '\x95' + 150: 213, # '\x96' + 151: 214, # '\x97' + 152: 215, # '\x98' + 153: 216, # '\x99' + 154: 217, # '\x9a' + 155: 218, # '\x9b' + 156: 219, # '\x9c' + 157: 220, # '\x9d' + 158: 221, # '\x9e' + 159: 222, # '\x9f' + 160: 223, # '\xa0' + 161: 224, # 'Ё' + 162: 225, # 'Ђ' + 163: 226, # 'Ѓ' + 164: 227, # 'Є' + 165: 228, # 'Ѕ' + 166: 229, # 'І' + 167: 230, # 'Ї' + 168: 231, # 'Ј' + 169: 232, # 'Љ' + 170: 233, # 'Њ' + 171: 234, # 'Ћ' + 172: 235, # 'Ќ' + 173: 236, # '\xad' + 174: 237, # 'Ў' + 175: 238, # 'Џ' + 176: 37, # 'А' + 177: 44, # 'Б' + 178: 33, # 'В' + 179: 46, # 'Г' + 180: 41, # 'Д' + 181: 48, # 'Е' + 182: 56, # 'Ж' + 183: 51, # 'З' + 184: 42, # 'И' + 185: 60, # 'Й' + 186: 36, # 'К' + 187: 49, # 'Л' + 188: 38, # 'М' + 189: 31, # 'Н' + 190: 34, # 'О' + 191: 35, # 'П' + 192: 45, # 'Р' + 193: 32, # 'С' + 194: 40, # 'Т' + 195: 52, # 'У' + 196: 53, # 'Ф' + 197: 55, # 'Х' + 198: 58, # 'Ц' + 199: 50, # 'Ч' + 200: 57, # 'Ш' + 201: 63, # 'Щ' + 202: 70, # 'Ъ' + 203: 62, # 'Ы' + 204: 61, # 'Ь' + 205: 47, # 'Э' + 206: 59, # 'Ю' + 207: 43, # 'Я' + 208: 3, # 'а' + 209: 21, # 'б' + 210: 10, # 'в' + 211: 19, # 'г' + 212: 13, # 'д' + 213: 2, # 'е' + 214: 24, # 'ж' + 215: 20, # 'з' + 216: 4, # 'и' + 217: 23, # 'й' + 218: 11, # 'к' + 219: 8, # 'л' + 220: 12, # 'м' + 221: 5, # 'н' + 222: 1, # 'о' + 223: 15, # 'п' + 224: 9, # 'р' + 225: 7, # 'с' + 226: 6, # 'т' + 227: 14, # 'у' + 228: 39, # 'ф' + 229: 26, # 'х' + 230: 28, # 'ц' + 231: 22, # 'ч' + 232: 25, # 'ш' + 233: 29, # 'щ' + 234: 54, # 'ъ' + 235: 18, # 'ы' + 236: 17, # 'ь' + 237: 30, # 'э' + 238: 27, # 'ю' + 239: 16, # 'я' + 240: 239, # '№' + 241: 68, # 'ё' + 242: 240, # 'ђ' + 243: 241, # 'ѓ' + 244: 242, # 'є' + 245: 243, # 'ѕ' + 246: 244, # 'і' + 247: 245, # 'ї' + 248: 246, # 'ј' + 249: 247, # 'љ' + 250: 248, # 'њ' + 251: 249, # 'ћ' + 252: 250, # 'ќ' + 253: 251, # '§' + 254: 252, # 'ў' + 255: 255, # 'џ' } -ISO_8859_5_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-5', - language='Russian', - char_to_order_map=ISO_8859_5_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') - +ISO_8859_5_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-5", + language="Russian", + char_to_order_map=ISO_8859_5_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) diff --git a/libs/common/chardet/langthaimodel.py b/libs/common/chardet/langthaimodel.py index d0191f24..883fdb1e 100644 --- a/libs/common/chardet/langthaimodel.py +++ b/libs/common/chardet/langthaimodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ THAI_LANG_MODEL = { # Character Mapping Table(s): TIS_620_THAI_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 182, # 'A' - 66: 106, # 'B' - 67: 107, # 'C' - 68: 100, # 'D' - 69: 183, # 'E' - 70: 184, # 'F' - 71: 185, # 'G' - 72: 101, # 'H' - 73: 94, # 'I' - 74: 186, # 'J' - 75: 187, # 'K' - 76: 108, # 'L' - 77: 109, # 'M' - 78: 110, # 'N' - 79: 111, # 'O' - 80: 188, # 'P' - 81: 189, # 'Q' - 82: 190, # 'R' - 83: 89, # 'S' - 84: 95, # 'T' - 85: 112, # 'U' - 86: 113, # 'V' - 87: 191, # 'W' - 88: 192, # 'X' - 89: 193, # 'Y' - 90: 194, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 64, # 'a' - 98: 72, # 'b' - 99: 73, # 'c' - 100: 114, # 'd' - 101: 74, # 'e' - 102: 115, # 'f' - 103: 116, # 'g' - 104: 102, # 'h' - 105: 81, # 'i' - 106: 201, # 'j' - 107: 117, # 'k' - 108: 90, # 'l' - 109: 103, # 'm' - 110: 78, # 'n' - 111: 82, # 'o' - 112: 96, # 'p' - 113: 202, # 'q' - 114: 91, # 'r' - 115: 79, # 's' - 116: 84, # 't' - 117: 104, # 'u' - 118: 105, # 'v' - 119: 97, # 'w' - 120: 98, # 'x' - 121: 92, # 'y' - 122: 203, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 209, # '\x80' - 129: 210, # '\x81' - 130: 211, # '\x82' - 131: 212, # '\x83' - 132: 213, # '\x84' - 133: 88, # '\x85' - 134: 214, # '\x86' - 135: 215, # '\x87' - 136: 216, # '\x88' - 137: 217, # '\x89' - 138: 218, # '\x8a' - 139: 219, # '\x8b' - 140: 220, # '\x8c' - 141: 118, # '\x8d' - 142: 221, # '\x8e' - 143: 222, # '\x8f' - 144: 223, # '\x90' - 145: 224, # '\x91' - 146: 99, # '\x92' - 147: 85, # '\x93' - 148: 83, # '\x94' - 149: 225, # '\x95' - 150: 226, # '\x96' - 151: 227, # '\x97' - 152: 228, # '\x98' - 153: 229, # '\x99' - 154: 230, # '\x9a' - 155: 231, # '\x9b' - 156: 232, # '\x9c' - 157: 233, # '\x9d' - 158: 234, # '\x9e' - 159: 235, # '\x9f' - 160: 236, # None - 161: 5, # 'ก' - 162: 30, # 'ข' - 163: 237, # 'ฃ' - 164: 24, # 'ค' - 165: 238, # 'ฅ' - 166: 75, # 'ฆ' - 167: 8, # 'ง' - 168: 26, # 'จ' - 169: 52, # 'ฉ' - 170: 34, # 'ช' - 171: 51, # 'ซ' - 172: 119, # 'ฌ' - 173: 47, # 'ญ' - 174: 58, # 'ฎ' - 175: 57, # 'ฏ' - 176: 49, # 'ฐ' - 177: 53, # 'ฑ' - 178: 55, # 'ฒ' - 179: 43, # 'ณ' - 180: 20, # 'ด' - 181: 19, # 'ต' - 182: 44, # 'ถ' - 183: 14, # 'ท' - 184: 48, # 'ธ' - 185: 3, # 'น' - 186: 17, # 'บ' - 187: 25, # 'ป' - 188: 39, # 'ผ' - 189: 62, # 'ฝ' - 190: 31, # 'พ' - 191: 54, # 'ฟ' - 192: 45, # 'ภ' - 193: 9, # 'ม' - 194: 16, # 'ย' - 195: 2, # 'ร' - 196: 61, # 'ฤ' - 197: 15, # 'ล' - 198: 239, # 'ฦ' - 199: 12, # 'ว' - 200: 42, # 'ศ' - 201: 46, # 'ษ' - 202: 18, # 'ส' - 203: 21, # 'ห' - 204: 76, # 'ฬ' - 205: 4, # 'อ' - 206: 66, # 'ฮ' - 207: 63, # 'ฯ' - 208: 22, # 'ะ' - 209: 10, # 'ั' - 210: 1, # 'า' - 211: 36, # 'ำ' - 212: 23, # 'ิ' - 213: 13, # 'ี' - 214: 40, # 'ึ' - 215: 27, # 'ื' - 216: 32, # 'ุ' - 217: 35, # 'ู' - 218: 86, # 'ฺ' - 219: 240, # None - 220: 241, # None - 221: 242, # None - 222: 243, # None - 223: 244, # '฿' - 224: 11, # 'เ' - 225: 28, # 'แ' - 226: 41, # 'โ' - 227: 29, # 'ใ' - 228: 33, # 'ไ' - 229: 245, # 'ๅ' - 230: 50, # 'ๆ' - 231: 37, # '็' - 232: 6, # '่' - 233: 7, # '้' - 234: 67, # '๊' - 235: 77, # '๋' - 236: 38, # '์' - 237: 93, # 'ํ' - 238: 246, # '๎' - 239: 247, # '๏' - 240: 68, # '๐' - 241: 56, # '๑' - 242: 59, # '๒' - 243: 65, # '๓' - 244: 69, # '๔' - 245: 60, # '๕' - 246: 70, # '๖' - 247: 80, # '๗' - 248: 71, # '๘' - 249: 87, # '๙' - 250: 248, # '๚' - 251: 249, # '๛' - 252: 250, # None - 253: 251, # None - 254: 252, # None - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 182, # 'A' + 66: 106, # 'B' + 67: 107, # 'C' + 68: 100, # 'D' + 69: 183, # 'E' + 70: 184, # 'F' + 71: 185, # 'G' + 72: 101, # 'H' + 73: 94, # 'I' + 74: 186, # 'J' + 75: 187, # 'K' + 76: 108, # 'L' + 77: 109, # 'M' + 78: 110, # 'N' + 79: 111, # 'O' + 80: 188, # 'P' + 81: 189, # 'Q' + 82: 190, # 'R' + 83: 89, # 'S' + 84: 95, # 'T' + 85: 112, # 'U' + 86: 113, # 'V' + 87: 191, # 'W' + 88: 192, # 'X' + 89: 193, # 'Y' + 90: 194, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 64, # 'a' + 98: 72, # 'b' + 99: 73, # 'c' + 100: 114, # 'd' + 101: 74, # 'e' + 102: 115, # 'f' + 103: 116, # 'g' + 104: 102, # 'h' + 105: 81, # 'i' + 106: 201, # 'j' + 107: 117, # 'k' + 108: 90, # 'l' + 109: 103, # 'm' + 110: 78, # 'n' + 111: 82, # 'o' + 112: 96, # 'p' + 113: 202, # 'q' + 114: 91, # 'r' + 115: 79, # 's' + 116: 84, # 't' + 117: 104, # 'u' + 118: 105, # 'v' + 119: 97, # 'w' + 120: 98, # 'x' + 121: 92, # 'y' + 122: 203, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 209, # '\x80' + 129: 210, # '\x81' + 130: 211, # '\x82' + 131: 212, # '\x83' + 132: 213, # '\x84' + 133: 88, # '\x85' + 134: 214, # '\x86' + 135: 215, # '\x87' + 136: 216, # '\x88' + 137: 217, # '\x89' + 138: 218, # '\x8a' + 139: 219, # '\x8b' + 140: 220, # '\x8c' + 141: 118, # '\x8d' + 142: 221, # '\x8e' + 143: 222, # '\x8f' + 144: 223, # '\x90' + 145: 224, # '\x91' + 146: 99, # '\x92' + 147: 85, # '\x93' + 148: 83, # '\x94' + 149: 225, # '\x95' + 150: 226, # '\x96' + 151: 227, # '\x97' + 152: 228, # '\x98' + 153: 229, # '\x99' + 154: 230, # '\x9a' + 155: 231, # '\x9b' + 156: 232, # '\x9c' + 157: 233, # '\x9d' + 158: 234, # '\x9e' + 159: 235, # '\x9f' + 160: 236, # None + 161: 5, # 'ก' + 162: 30, # 'ข' + 163: 237, # 'ฃ' + 164: 24, # 'ค' + 165: 238, # 'ฅ' + 166: 75, # 'ฆ' + 167: 8, # 'ง' + 168: 26, # 'จ' + 169: 52, # 'ฉ' + 170: 34, # 'ช' + 171: 51, # 'ซ' + 172: 119, # 'ฌ' + 173: 47, # 'ญ' + 174: 58, # 'ฎ' + 175: 57, # 'ฏ' + 176: 49, # 'ฐ' + 177: 53, # 'ฑ' + 178: 55, # 'ฒ' + 179: 43, # 'ณ' + 180: 20, # 'ด' + 181: 19, # 'ต' + 182: 44, # 'ถ' + 183: 14, # 'ท' + 184: 48, # 'ธ' + 185: 3, # 'น' + 186: 17, # 'บ' + 187: 25, # 'ป' + 188: 39, # 'ผ' + 189: 62, # 'ฝ' + 190: 31, # 'พ' + 191: 54, # 'ฟ' + 192: 45, # 'ภ' + 193: 9, # 'ม' + 194: 16, # 'ย' + 195: 2, # 'ร' + 196: 61, # 'ฤ' + 197: 15, # 'ล' + 198: 239, # 'ฦ' + 199: 12, # 'ว' + 200: 42, # 'ศ' + 201: 46, # 'ษ' + 202: 18, # 'ส' + 203: 21, # 'ห' + 204: 76, # 'ฬ' + 205: 4, # 'อ' + 206: 66, # 'ฮ' + 207: 63, # 'ฯ' + 208: 22, # 'ะ' + 209: 10, # 'ั' + 210: 1, # 'า' + 211: 36, # 'ำ' + 212: 23, # 'ิ' + 213: 13, # 'ี' + 214: 40, # 'ึ' + 215: 27, # 'ื' + 216: 32, # 'ุ' + 217: 35, # 'ู' + 218: 86, # 'ฺ' + 219: 240, # None + 220: 241, # None + 221: 242, # None + 222: 243, # None + 223: 244, # '฿' + 224: 11, # 'เ' + 225: 28, # 'แ' + 226: 41, # 'โ' + 227: 29, # 'ใ' + 228: 33, # 'ไ' + 229: 245, # 'ๅ' + 230: 50, # 'ๆ' + 231: 37, # '็' + 232: 6, # '่' + 233: 7, # '้' + 234: 67, # '๊' + 235: 77, # '๋' + 236: 38, # '์' + 237: 93, # 'ํ' + 238: 246, # '๎' + 239: 247, # '๏' + 240: 68, # '๐' + 241: 56, # '๑' + 242: 59, # '๒' + 243: 65, # '๓' + 244: 69, # '๔' + 245: 60, # '๕' + 246: 70, # '๖' + 247: 80, # '๗' + 248: 71, # '๘' + 249: 87, # '๙' + 250: 248, # '๚' + 251: 249, # '๛' + 252: 250, # None + 253: 251, # None + 254: 252, # None + 255: 253, # None } -TIS_620_THAI_MODEL = SingleByteCharSetModel(charset_name='TIS-620', - language='Thai', - char_to_order_map=TIS_620_THAI_CHAR_TO_ORDER, - language_model=THAI_LANG_MODEL, - typical_positive_ratio=0.926386, - keep_ascii_letters=False, - alphabet='กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛') - +TIS_620_THAI_MODEL = SingleByteCharSetModel( + charset_name="TIS-620", + language="Thai", + char_to_order_map=TIS_620_THAI_CHAR_TO_ORDER, + language_model=THAI_LANG_MODEL, + typical_positive_ratio=0.926386, + keep_ascii_letters=False, + alphabet="กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛", +) diff --git a/libs/common/chardet/langturkishmodel.py b/libs/common/chardet/langturkishmodel.py index 8ba93224..64c94336 100644 --- a/libs/common/chardet/langturkishmodel.py +++ b/libs/common/chardet/langturkishmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ TURKISH_LANG_MODEL = { # Character Mapping Table(s): ISO_8859_9_TURKISH_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 255, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 255, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 255, # ' ' - 33: 255, # '!' - 34: 255, # '"' - 35: 255, # '#' - 36: 255, # '$' - 37: 255, # '%' - 38: 255, # '&' - 39: 255, # "'" - 40: 255, # '(' - 41: 255, # ')' - 42: 255, # '*' - 43: 255, # '+' - 44: 255, # ',' - 45: 255, # '-' - 46: 255, # '.' - 47: 255, # '/' - 48: 255, # '0' - 49: 255, # '1' - 50: 255, # '2' - 51: 255, # '3' - 52: 255, # '4' - 53: 255, # '5' - 54: 255, # '6' - 55: 255, # '7' - 56: 255, # '8' - 57: 255, # '9' - 58: 255, # ':' - 59: 255, # ';' - 60: 255, # '<' - 61: 255, # '=' - 62: 255, # '>' - 63: 255, # '?' - 64: 255, # '@' - 65: 23, # 'A' - 66: 37, # 'B' - 67: 47, # 'C' - 68: 39, # 'D' - 69: 29, # 'E' - 70: 52, # 'F' - 71: 36, # 'G' - 72: 45, # 'H' - 73: 53, # 'I' - 74: 60, # 'J' - 75: 16, # 'K' - 76: 49, # 'L' - 77: 20, # 'M' - 78: 46, # 'N' - 79: 42, # 'O' - 80: 48, # 'P' - 81: 69, # 'Q' - 82: 44, # 'R' - 83: 35, # 'S' - 84: 31, # 'T' - 85: 51, # 'U' - 86: 38, # 'V' - 87: 62, # 'W' - 88: 65, # 'X' - 89: 43, # 'Y' - 90: 56, # 'Z' - 91: 255, # '[' - 92: 255, # '\\' - 93: 255, # ']' - 94: 255, # '^' - 95: 255, # '_' - 96: 255, # '`' - 97: 1, # 'a' - 98: 21, # 'b' - 99: 28, # 'c' - 100: 12, # 'd' - 101: 2, # 'e' - 102: 18, # 'f' - 103: 27, # 'g' - 104: 25, # 'h' - 105: 3, # 'i' - 106: 24, # 'j' - 107: 10, # 'k' - 108: 5, # 'l' - 109: 13, # 'm' - 110: 4, # 'n' - 111: 15, # 'o' - 112: 26, # 'p' - 113: 64, # 'q' - 114: 7, # 'r' - 115: 8, # 's' - 116: 9, # 't' - 117: 14, # 'u' - 118: 32, # 'v' - 119: 57, # 'w' - 120: 58, # 'x' - 121: 11, # 'y' - 122: 22, # 'z' - 123: 255, # '{' - 124: 255, # '|' - 125: 255, # '}' - 126: 255, # '~' - 127: 255, # '\x7f' - 128: 180, # '\x80' - 129: 179, # '\x81' - 130: 178, # '\x82' - 131: 177, # '\x83' - 132: 176, # '\x84' - 133: 175, # '\x85' - 134: 174, # '\x86' - 135: 173, # '\x87' - 136: 172, # '\x88' - 137: 171, # '\x89' - 138: 170, # '\x8a' - 139: 169, # '\x8b' - 140: 168, # '\x8c' - 141: 167, # '\x8d' - 142: 166, # '\x8e' - 143: 165, # '\x8f' - 144: 164, # '\x90' - 145: 163, # '\x91' - 146: 162, # '\x92' - 147: 161, # '\x93' - 148: 160, # '\x94' - 149: 159, # '\x95' - 150: 101, # '\x96' - 151: 158, # '\x97' - 152: 157, # '\x98' - 153: 156, # '\x99' - 154: 155, # '\x9a' - 155: 154, # '\x9b' - 156: 153, # '\x9c' - 157: 152, # '\x9d' - 158: 151, # '\x9e' - 159: 106, # '\x9f' - 160: 150, # '\xa0' - 161: 149, # '¡' - 162: 148, # '¢' - 163: 147, # '£' - 164: 146, # '¤' - 165: 145, # '¥' - 166: 144, # '¦' - 167: 100, # '§' - 168: 143, # '¨' - 169: 142, # '©' - 170: 141, # 'ª' - 171: 140, # '«' - 172: 139, # '¬' - 173: 138, # '\xad' - 174: 137, # '®' - 175: 136, # '¯' - 176: 94, # '°' - 177: 80, # '±' - 178: 93, # '²' - 179: 135, # '³' - 180: 105, # '´' - 181: 134, # 'µ' - 182: 133, # '¶' - 183: 63, # '·' - 184: 132, # '¸' - 185: 131, # '¹' - 186: 130, # 'º' - 187: 129, # '»' - 188: 128, # '¼' - 189: 127, # '½' - 190: 126, # '¾' - 191: 125, # '¿' - 192: 124, # 'À' - 193: 104, # 'Á' - 194: 73, # 'Â' - 195: 99, # 'Ã' - 196: 79, # 'Ä' - 197: 85, # 'Å' - 198: 123, # 'Æ' - 199: 54, # 'Ç' - 200: 122, # 'È' - 201: 98, # 'É' - 202: 92, # 'Ê' - 203: 121, # 'Ë' - 204: 120, # 'Ì' - 205: 91, # 'Í' - 206: 103, # 'Î' - 207: 119, # 'Ï' - 208: 68, # 'Ğ' - 209: 118, # 'Ñ' - 210: 117, # 'Ò' - 211: 97, # 'Ó' - 212: 116, # 'Ô' - 213: 115, # 'Õ' - 214: 50, # 'Ö' - 215: 90, # '×' - 216: 114, # 'Ø' - 217: 113, # 'Ù' - 218: 112, # 'Ú' - 219: 111, # 'Û' - 220: 55, # 'Ü' - 221: 41, # 'İ' - 222: 40, # 'Ş' - 223: 86, # 'ß' - 224: 89, # 'à' - 225: 70, # 'á' - 226: 59, # 'â' - 227: 78, # 'ã' - 228: 71, # 'ä' - 229: 82, # 'å' - 230: 88, # 'æ' - 231: 33, # 'ç' - 232: 77, # 'è' - 233: 66, # 'é' - 234: 84, # 'ê' - 235: 83, # 'ë' - 236: 110, # 'ì' - 237: 75, # 'í' - 238: 61, # 'î' - 239: 96, # 'ï' - 240: 30, # 'ğ' - 241: 67, # 'ñ' - 242: 109, # 'ò' - 243: 74, # 'ó' - 244: 87, # 'ô' - 245: 102, # 'õ' - 246: 34, # 'ö' - 247: 95, # '÷' - 248: 81, # 'ø' - 249: 108, # 'ù' - 250: 76, # 'ú' - 251: 72, # 'û' - 252: 17, # 'ü' - 253: 6, # 'ı' - 254: 19, # 'ş' - 255: 107, # 'ÿ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 255, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 255, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 255, # ' ' + 33: 255, # '!' + 34: 255, # '"' + 35: 255, # '#' + 36: 255, # '$' + 37: 255, # '%' + 38: 255, # '&' + 39: 255, # "'" + 40: 255, # '(' + 41: 255, # ')' + 42: 255, # '*' + 43: 255, # '+' + 44: 255, # ',' + 45: 255, # '-' + 46: 255, # '.' + 47: 255, # '/' + 48: 255, # '0' + 49: 255, # '1' + 50: 255, # '2' + 51: 255, # '3' + 52: 255, # '4' + 53: 255, # '5' + 54: 255, # '6' + 55: 255, # '7' + 56: 255, # '8' + 57: 255, # '9' + 58: 255, # ':' + 59: 255, # ';' + 60: 255, # '<' + 61: 255, # '=' + 62: 255, # '>' + 63: 255, # '?' + 64: 255, # '@' + 65: 23, # 'A' + 66: 37, # 'B' + 67: 47, # 'C' + 68: 39, # 'D' + 69: 29, # 'E' + 70: 52, # 'F' + 71: 36, # 'G' + 72: 45, # 'H' + 73: 53, # 'I' + 74: 60, # 'J' + 75: 16, # 'K' + 76: 49, # 'L' + 77: 20, # 'M' + 78: 46, # 'N' + 79: 42, # 'O' + 80: 48, # 'P' + 81: 69, # 'Q' + 82: 44, # 'R' + 83: 35, # 'S' + 84: 31, # 'T' + 85: 51, # 'U' + 86: 38, # 'V' + 87: 62, # 'W' + 88: 65, # 'X' + 89: 43, # 'Y' + 90: 56, # 'Z' + 91: 255, # '[' + 92: 255, # '\\' + 93: 255, # ']' + 94: 255, # '^' + 95: 255, # '_' + 96: 255, # '`' + 97: 1, # 'a' + 98: 21, # 'b' + 99: 28, # 'c' + 100: 12, # 'd' + 101: 2, # 'e' + 102: 18, # 'f' + 103: 27, # 'g' + 104: 25, # 'h' + 105: 3, # 'i' + 106: 24, # 'j' + 107: 10, # 'k' + 108: 5, # 'l' + 109: 13, # 'm' + 110: 4, # 'n' + 111: 15, # 'o' + 112: 26, # 'p' + 113: 64, # 'q' + 114: 7, # 'r' + 115: 8, # 's' + 116: 9, # 't' + 117: 14, # 'u' + 118: 32, # 'v' + 119: 57, # 'w' + 120: 58, # 'x' + 121: 11, # 'y' + 122: 22, # 'z' + 123: 255, # '{' + 124: 255, # '|' + 125: 255, # '}' + 126: 255, # '~' + 127: 255, # '\x7f' + 128: 180, # '\x80' + 129: 179, # '\x81' + 130: 178, # '\x82' + 131: 177, # '\x83' + 132: 176, # '\x84' + 133: 175, # '\x85' + 134: 174, # '\x86' + 135: 173, # '\x87' + 136: 172, # '\x88' + 137: 171, # '\x89' + 138: 170, # '\x8a' + 139: 169, # '\x8b' + 140: 168, # '\x8c' + 141: 167, # '\x8d' + 142: 166, # '\x8e' + 143: 165, # '\x8f' + 144: 164, # '\x90' + 145: 163, # '\x91' + 146: 162, # '\x92' + 147: 161, # '\x93' + 148: 160, # '\x94' + 149: 159, # '\x95' + 150: 101, # '\x96' + 151: 158, # '\x97' + 152: 157, # '\x98' + 153: 156, # '\x99' + 154: 155, # '\x9a' + 155: 154, # '\x9b' + 156: 153, # '\x9c' + 157: 152, # '\x9d' + 158: 151, # '\x9e' + 159: 106, # '\x9f' + 160: 150, # '\xa0' + 161: 149, # '¡' + 162: 148, # '¢' + 163: 147, # '£' + 164: 146, # '¤' + 165: 145, # '¥' + 166: 144, # '¦' + 167: 100, # '§' + 168: 143, # '¨' + 169: 142, # '©' + 170: 141, # 'ª' + 171: 140, # '«' + 172: 139, # '¬' + 173: 138, # '\xad' + 174: 137, # '®' + 175: 136, # '¯' + 176: 94, # '°' + 177: 80, # '±' + 178: 93, # '²' + 179: 135, # '³' + 180: 105, # '´' + 181: 134, # 'µ' + 182: 133, # '¶' + 183: 63, # '·' + 184: 132, # '¸' + 185: 131, # '¹' + 186: 130, # 'º' + 187: 129, # '»' + 188: 128, # '¼' + 189: 127, # '½' + 190: 126, # '¾' + 191: 125, # '¿' + 192: 124, # 'À' + 193: 104, # 'Á' + 194: 73, # 'Â' + 195: 99, # 'Ã' + 196: 79, # 'Ä' + 197: 85, # 'Å' + 198: 123, # 'Æ' + 199: 54, # 'Ç' + 200: 122, # 'È' + 201: 98, # 'É' + 202: 92, # 'Ê' + 203: 121, # 'Ë' + 204: 120, # 'Ì' + 205: 91, # 'Í' + 206: 103, # 'Î' + 207: 119, # 'Ï' + 208: 68, # 'Ğ' + 209: 118, # 'Ñ' + 210: 117, # 'Ò' + 211: 97, # 'Ó' + 212: 116, # 'Ô' + 213: 115, # 'Õ' + 214: 50, # 'Ö' + 215: 90, # '×' + 216: 114, # 'Ø' + 217: 113, # 'Ù' + 218: 112, # 'Ú' + 219: 111, # 'Û' + 220: 55, # 'Ü' + 221: 41, # 'İ' + 222: 40, # 'Ş' + 223: 86, # 'ß' + 224: 89, # 'à' + 225: 70, # 'á' + 226: 59, # 'â' + 227: 78, # 'ã' + 228: 71, # 'ä' + 229: 82, # 'å' + 230: 88, # 'æ' + 231: 33, # 'ç' + 232: 77, # 'è' + 233: 66, # 'é' + 234: 84, # 'ê' + 235: 83, # 'ë' + 236: 110, # 'ì' + 237: 75, # 'í' + 238: 61, # 'î' + 239: 96, # 'ï' + 240: 30, # 'ğ' + 241: 67, # 'ñ' + 242: 109, # 'ò' + 243: 74, # 'ó' + 244: 87, # 'ô' + 245: 102, # 'õ' + 246: 34, # 'ö' + 247: 95, # '÷' + 248: 81, # 'ø' + 249: 108, # 'ù' + 250: 76, # 'ú' + 251: 72, # 'û' + 252: 17, # 'ü' + 253: 6, # 'ı' + 254: 19, # 'ş' + 255: 107, # 'ÿ' } -ISO_8859_9_TURKISH_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-9', - language='Turkish', - char_to_order_map=ISO_8859_9_TURKISH_CHAR_TO_ORDER, - language_model=TURKISH_LANG_MODEL, - typical_positive_ratio=0.97029, - keep_ascii_letters=True, - alphabet='ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş') - +ISO_8859_9_TURKISH_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-9", + language="Turkish", + char_to_order_map=ISO_8859_9_TURKISH_CHAR_TO_ORDER, + language_model=TURKISH_LANG_MODEL, + typical_positive_ratio=0.97029, + keep_ascii_letters=True, + alphabet="ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş", +) diff --git a/libs/common/chardet/latin1prober.py b/libs/common/chardet/latin1prober.py index 7d1e8c20..59a01d91 100644 --- a/libs/common/chardet/latin1prober.py +++ b/libs/common/chardet/latin1prober.py @@ -26,6 +26,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union + from .charsetprober import CharSetProber from .enums import ProbingState @@ -41,6 +43,7 @@ ASV = 6 # accent small vowel ASO = 7 # accent small other CLASS_NUM = 8 # total classes +# fmt: off Latin1_CharToClass = ( OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F @@ -91,34 +94,34 @@ Latin1ClassModel = ( 0, 3, 1, 3, 1, 1, 1, 3, # ASV 0, 3, 1, 3, 1, 1, 3, 3, # ASO ) +# fmt: on class Latin1Prober(CharSetProber): - def __init__(self): - super(Latin1Prober, self).__init__() - self._last_char_class = None - self._freq_counter = None + def __init__(self) -> None: + super().__init__() + self._last_char_class = OTH + self._freq_counter: List[int] = [] self.reset() - def reset(self): + def reset(self) -> None: self._last_char_class = OTH self._freq_counter = [0] * FREQ_CAT_NUM - CharSetProber.reset(self) + super().reset() @property - def charset_name(self): + def charset_name(self) -> str: return "ISO-8859-1" @property - def language(self): + def language(self) -> str: return "" - def feed(self, byte_str): - byte_str = self.filter_with_english_letters(byte_str) + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + byte_str = self.remove_xml_tags(byte_str) for c in byte_str: char_class = Latin1_CharToClass[c] - freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) - + char_class] + freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + char_class] if freq == 0: self._state = ProbingState.NOT_ME break @@ -127,19 +130,18 @@ class Latin1Prober(CharSetProber): return self.state - def get_confidence(self): + def get_confidence(self) -> float: if self.state == ProbingState.NOT_ME: return 0.01 total = sum(self._freq_counter) - if total < 0.01: - confidence = 0.0 - else: - confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) - / total) - if confidence < 0.0: - confidence = 0.0 + confidence = ( + 0.0 + if total < 0.01 + else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total + ) + confidence = max(confidence, 0.0) # lower the confidence of latin1 so that other more accurate # detector can take priority. - confidence = confidence * 0.73 + confidence *= 0.73 return confidence diff --git a/libs/common/chardet/macromanprober.py b/libs/common/chardet/macromanprober.py new file mode 100644 index 00000000..1425d10e --- /dev/null +++ b/libs/common/chardet/macromanprober.py @@ -0,0 +1,162 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This code was modified from latin1prober.py by Rob Speer . +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Rob Speer - adapt to MacRoman encoding +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from typing import List, Union + +from .charsetprober import CharSetProber +from .enums import ProbingState + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +ODD = 8 # character that is unlikely to appear +CLASS_NUM = 9 # total classes + +# The change from Latin1 is that we explicitly look for extended characters +# that are infrequently-occurring symbols, and consider them to always be +# improbable. This should let MacRoman get out of the way of more likely +# encodings in most situations. + +# fmt: off +MacRoman_CharToClass = ( + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87 + ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7 + OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF + OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7 + OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7 + ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF + OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF + ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7 + ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +MacRomanClassModel = ( +# UDF OTH ASC ASS ACV ACO ASV ASO ODD + 0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO + 0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD +) +# fmt: on + + +class MacRomanProber(CharSetProber): + def __init__(self) -> None: + super().__init__() + self._last_char_class = OTH + self._freq_counter: List[int] = [] + self.reset() + + def reset(self) -> None: + self._last_char_class = OTH + self._freq_counter = [0] * FREQ_CAT_NUM + + # express the prior that MacRoman is a somewhat rare encoding; + # this can be done by starting out in a slightly improbable state + # that must be overcome + self._freq_counter[2] = 10 + + super().reset() + + @property + def charset_name(self) -> str: + return "MacRoman" + + @property + def language(self) -> str: + return "" + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + byte_str = self.remove_xml_tags(byte_str) + for c in byte_str: + char_class = MacRoman_CharToClass[c] + freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class] + if freq == 0: + self._state = ProbingState.NOT_ME + break + self._freq_counter[freq] += 1 + self._last_char_class = char_class + + return self.state + + def get_confidence(self) -> float: + if self.state == ProbingState.NOT_ME: + return 0.01 + + total = sum(self._freq_counter) + confidence = ( + 0.0 + if total < 0.01 + else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total + ) + confidence = max(confidence, 0.0) + # lower the confidence of MacRoman so that other more accurate + # detector can take priority. + confidence *= 0.73 + return confidence diff --git a/libs/common/chardet/mbcharsetprober.py b/libs/common/chardet/mbcharsetprober.py index 6256ecfd..666307e8 100644 --- a/libs/common/chardet/mbcharsetprober.py +++ b/libs/common/chardet/mbcharsetprober.py @@ -27,8 +27,12 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + +from .chardistribution import CharDistributionAnalysis from .charsetprober import CharSetProber -from .enums import ProbingState, MachineState +from .codingstatemachine import CodingStateMachine +from .enums import LanguageFilter, MachineState, ProbingState class MultiByteCharSetProber(CharSetProber): @@ -36,56 +40,56 @@ class MultiByteCharSetProber(CharSetProber): MultiByteCharSetProber """ - def __init__(self, lang_filter=None): - super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) - self.distribution_analyzer = None - self.coding_sm = None - self._last_char = [0, 0] + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) + self.distribution_analyzer: Optional[CharDistributionAnalysis] = None + self.coding_sm: Optional[CodingStateMachine] = None + self._last_char = bytearray(b"\0\0") - def reset(self): - super(MultiByteCharSetProber, self).reset() + def reset(self) -> None: + super().reset() if self.coding_sm: self.coding_sm.reset() if self.distribution_analyzer: self.distribution_analyzer.reset() - self._last_char = [0, 0] + self._last_char = bytearray(b"\0\0") - @property - def charset_name(self): - raise NotImplementedError + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None - @property - def language(self): - raise NotImplementedError - - def feed(self, byte_str): - for i in range(len(byte_str)): - coding_state = self.coding_sm.next_state(byte_str[i]) + for i, byte in enumerate(byte_str): + coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: - self.logger.debug('%s %s prober hit error at byte %s', - self.charset_name, self.language, i) + self.logger.debug( + "%s %s prober hit error at byte %s", + self.charset_name, + self.language, + i, + ) self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: char_len = self.coding_sm.get_current_charlen() if i == 0: - self._last_char[1] = byte_str[0] + self._last_char[1] = byte self.distribution_analyzer.feed(self._last_char, char_len) else: - self.distribution_analyzer.feed(byte_str[i - 1:i + 1], - char_len) + self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len) self._last_char[0] = byte_str[-1] if self.state == ProbingState.DETECTING: - if (self.distribution_analyzer.got_enough_data() and - (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + if self.distribution_analyzer.got_enough_data() and ( + self.get_confidence() > self.SHORTCUT_THRESHOLD + ): self._state = ProbingState.FOUND_IT return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None return self.distribution_analyzer.get_confidence() diff --git a/libs/common/chardet/mbcsgroupprober.py b/libs/common/chardet/mbcsgroupprober.py index 530abe75..6cb9cc7b 100644 --- a/libs/common/chardet/mbcsgroupprober.py +++ b/libs/common/chardet/mbcsgroupprober.py @@ -27,20 +27,22 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .charsetgroupprober import CharSetGroupProber -from .utf8prober import UTF8Prober -from .sjisprober import SJISProber -from .eucjpprober import EUCJPProber -from .gb2312prober import GB2312Prober -from .euckrprober import EUCKRProber -from .cp949prober import CP949Prober from .big5prober import Big5Prober +from .charsetgroupprober import CharSetGroupProber +from .cp949prober import CP949Prober +from .enums import LanguageFilter +from .eucjpprober import EUCJPProber +from .euckrprober import EUCKRProber from .euctwprober import EUCTWProber +from .gb2312prober import GB2312Prober +from .johabprober import JOHABProber +from .sjisprober import SJISProber +from .utf8prober import UTF8Prober class MBCSGroupProber(CharSetGroupProber): - def __init__(self, lang_filter=None): - super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) self.probers = [ UTF8Prober(), SJISProber(), @@ -49,6 +51,7 @@ class MBCSGroupProber(CharSetGroupProber): EUCKRProber(), CP949Prober(), Big5Prober(), - EUCTWProber() + EUCTWProber(), + JOHABProber(), ] self.reset() diff --git a/libs/common/chardet/mbcssm.py b/libs/common/chardet/mbcssm.py index 8360d0f2..7bbe97e6 100644 --- a/libs/common/chardet/mbcssm.py +++ b/libs/common/chardet/mbcssm.py @@ -25,43 +25,45 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState # BIG5 +# fmt: off BIG5_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 4,4,4,4,4,4,4,4, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 4,3,3,3,3,3,3,3, # a0 - a7 - 3,3,3,3,3,3,3,3, # a8 - af - 3,3,3,3,3,3,3,3, # b0 - b7 - 3,3,3,3,3,3,3,3, # b8 - bf - 3,3,3,3,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as legal value + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f + 4, 4, 4, 4, 4, 4, 4, 4, # 80 - 87 + 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f + 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97 + 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f + 4, 3, 3, 3, 3, 3, 3, 3, # a0 - a7 + 3, 3, 3, 3, 3, 3, 3, 3, # a8 - af + 3, 3, 3, 3, 3, 3, 3, 3, # b0 - b7 + 3, 3, 3, 3, 3, 3, 3, 3, # b8 - bf + 3, 3, 3, 3, 3, 3, 3, 3, # c0 - c7 + 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf + 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7 + 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df + 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7 + 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef + 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7 + 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff ) BIG5_ST = ( @@ -69,34 +71,37 @@ BIG5_ST = ( MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 ) +# fmt: on BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) -BIG5_SM_MODEL = {'class_table': BIG5_CLS, - 'class_factor': 5, - 'state_table': BIG5_ST, - 'char_len_table': BIG5_CHAR_LEN_TABLE, - 'name': 'Big5'} +BIG5_SM_MODEL: CodingStateMachineDict = { + "class_table": BIG5_CLS, + "class_factor": 5, + "state_table": BIG5_ST, + "char_len_table": BIG5_CHAR_LEN_TABLE, + "name": "Big5", +} # CP949 - +# fmt: off CP949_CLS = ( - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f - 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f - 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f - 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f - 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f - 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f - 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f - 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f - 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af - 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf - 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, # 00 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, # 10 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 3f + 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 4f + 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 50 - 5f + 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, # 60 - 6f + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 70 - 7f + 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 80 - 8f + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 9f + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, # a0 - af + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, # b0 - bf + 7, 7, 7, 7, 7, 7, 9, 2, 2, 3, 2, 2, 2, 2, 2, 2, # c0 - cf + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # d0 - df + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # e0 - ef + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, # f0 - ff ) CP949_ST = ( @@ -109,50 +114,53 @@ CP949_ST = ( MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 ) +# fmt: on CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) -CP949_SM_MODEL = {'class_table': CP949_CLS, - 'class_factor': 10, - 'state_table': CP949_ST, - 'char_len_table': CP949_CHAR_LEN_TABLE, - 'name': 'CP949'} +CP949_SM_MODEL: CodingStateMachineDict = { + "class_table": CP949_CLS, + "class_factor": 10, + "state_table": CP949_ST, + "char_len_table": CP949_CHAR_LEN_TABLE, + "name": "CP949", +} # EUC-JP - +# fmt: off EUCJP_CLS = ( - 4,4,4,4,4,4,4,4, # 00 - 07 - 4,4,4,4,4,4,5,5, # 08 - 0f - 4,4,4,4,4,4,4,4, # 10 - 17 - 4,4,4,5,4,4,4,4, # 18 - 1f - 4,4,4,4,4,4,4,4, # 20 - 27 - 4,4,4,4,4,4,4,4, # 28 - 2f - 4,4,4,4,4,4,4,4, # 30 - 37 - 4,4,4,4,4,4,4,4, # 38 - 3f - 4,4,4,4,4,4,4,4, # 40 - 47 - 4,4,4,4,4,4,4,4, # 48 - 4f - 4,4,4,4,4,4,4,4, # 50 - 57 - 4,4,4,4,4,4,4,4, # 58 - 5f - 4,4,4,4,4,4,4,4, # 60 - 67 - 4,4,4,4,4,4,4,4, # 68 - 6f - 4,4,4,4,4,4,4,4, # 70 - 77 - 4,4,4,4,4,4,4,4, # 78 - 7f - 5,5,5,5,5,5,5,5, # 80 - 87 - 5,5,5,5,5,5,1,3, # 88 - 8f - 5,5,5,5,5,5,5,5, # 90 - 97 - 5,5,5,5,5,5,5,5, # 98 - 9f - 5,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,0,5 # f8 - ff + 4, 4, 4, 4, 4, 4, 4, 4, # 00 - 07 + 4, 4, 4, 4, 4, 4, 5, 5, # 08 - 0f + 4, 4, 4, 4, 4, 4, 4, 4, # 10 - 17 + 4, 4, 4, 5, 4, 4, 4, 4, # 18 - 1f + 4, 4, 4, 4, 4, 4, 4, 4, # 20 - 27 + 4, 4, 4, 4, 4, 4, 4, 4, # 28 - 2f + 4, 4, 4, 4, 4, 4, 4, 4, # 30 - 37 + 4, 4, 4, 4, 4, 4, 4, 4, # 38 - 3f + 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 47 + 4, 4, 4, 4, 4, 4, 4, 4, # 48 - 4f + 4, 4, 4, 4, 4, 4, 4, 4, # 50 - 57 + 4, 4, 4, 4, 4, 4, 4, 4, # 58 - 5f + 4, 4, 4, 4, 4, 4, 4, 4, # 60 - 67 + 4, 4, 4, 4, 4, 4, 4, 4, # 68 - 6f + 4, 4, 4, 4, 4, 4, 4, 4, # 70 - 77 + 4, 4, 4, 4, 4, 4, 4, 4, # 78 - 7f + 5, 5, 5, 5, 5, 5, 5, 5, # 80 - 87 + 5, 5, 5, 5, 5, 5, 1, 3, # 88 - 8f + 5, 5, 5, 5, 5, 5, 5, 5, # 90 - 97 + 5, 5, 5, 5, 5, 5, 5, 5, # 98 - 9f + 5, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7 + 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef + 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7 + 0, 0, 0, 0, 0, 0, 0, 5 # f8 - ff ) EUCJP_ST = ( @@ -162,100 +170,163 @@ EUCJP_ST = ( MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 ) +# fmt: on EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) -EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, - 'class_factor': 6, - 'state_table': EUCJP_ST, - 'char_len_table': EUCJP_CHAR_LEN_TABLE, - 'name': 'EUC-JP'} +EUCJP_SM_MODEL: CodingStateMachineDict = { + "class_table": EUCJP_CLS, + "class_factor": 6, + "state_table": EUCJP_ST, + "char_len_table": EUCJP_CHAR_LEN_TABLE, + "name": "EUC-JP", +} # EUC-KR - +# fmt: off EUCKR_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,3,3,3, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,3,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 2,2,2,2,2,2,2,2, # e0 - e7 - 2,2,2,2,2,2,2,2, # e8 - ef - 2,2,2,2,2,2,2,2, # f0 - f7 - 2,2,2,2,2,2,2,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f + 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57 + 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f + 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67 + 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f + 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77 + 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 3, 3, 3, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 3, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 0 # f8 - ff ) EUCKR_ST = ( MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f ) +# fmt: on EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) -EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, - 'class_factor': 4, - 'state_table': EUCKR_ST, - 'char_len_table': EUCKR_CHAR_LEN_TABLE, - 'name': 'EUC-KR'} +EUCKR_SM_MODEL: CodingStateMachineDict = { + "class_table": EUCKR_CLS, + "class_factor": 4, + "state_table": EUCKR_ST, + "char_len_table": EUCKR_CHAR_LEN_TABLE, + "name": "EUC-KR", +} + +# JOHAB +# fmt: off +JOHAB_CLS = ( + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,0,0, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,0,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,3,3,3,3,3,3,3, # 30 - 37 + 3,3,3,3,3,3,3,3, # 38 - 3f + 3,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,2, # 78 - 7f + 6,6,6,6,8,8,8,8, # 80 - 87 + 8,8,8,8,8,8,8,8, # 88 - 8f + 8,7,7,7,7,7,7,7, # 90 - 97 + 7,7,7,7,7,7,7,7, # 98 - 9f + 7,7,7,7,7,7,7,7, # a0 - a7 + 7,7,7,7,7,7,7,7, # a8 - af + 7,7,7,7,7,7,7,7, # b0 - b7 + 7,7,7,7,7,7,7,7, # b8 - bf + 7,7,7,7,7,7,7,7, # c0 - c7 + 7,7,7,7,7,7,7,7, # c8 - cf + 7,7,7,7,5,5,5,5, # d0 - d7 + 5,9,9,9,9,9,9,5, # d8 - df + 9,9,9,9,9,9,9,9, # e0 - e7 + 9,9,9,9,9,9,9,9, # e8 - ef + 9,9,9,9,9,9,9,9, # f0 - f7 + 9,9,5,5,5,5,5,0 # f8 - ff +) + +JOHAB_ST = ( +# cls = 0 1 2 3 4 5 6 7 8 9 + MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,3 ,3 ,4 , # MachineState.START + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME + MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR , # MachineState.ERROR + MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START , # 3 + MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START , # 4 +) +# fmt: on + +JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2) + +JOHAB_SM_MODEL: CodingStateMachineDict = { + "class_table": JOHAB_CLS, + "class_factor": 10, + "state_table": JOHAB_ST, + "char_len_table": JOHAB_CHAR_LEN_TABLE, + "name": "Johab", +} # EUC-TW - +# fmt: off EUCTW_CLS = ( - 2,2,2,2,2,2,2,2, # 00 - 07 - 2,2,2,2,2,2,0,0, # 08 - 0f - 2,2,2,2,2,2,2,2, # 10 - 17 - 2,2,2,0,2,2,2,2, # 18 - 1f - 2,2,2,2,2,2,2,2, # 20 - 27 - 2,2,2,2,2,2,2,2, # 28 - 2f - 2,2,2,2,2,2,2,2, # 30 - 37 - 2,2,2,2,2,2,2,2, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,2, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,6,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,3,4,4,4,4,4,4, # a0 - a7 - 5,5,1,1,1,1,1,1, # a8 - af - 1,1,1,1,1,1,1,1, # b0 - b7 - 1,1,1,1,1,1,1,1, # b8 - bf - 1,1,3,1,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff + 2, 2, 2, 2, 2, 2, 2, 2, # 00 - 07 + 2, 2, 2, 2, 2, 2, 0, 0, # 08 - 0f + 2, 2, 2, 2, 2, 2, 2, 2, # 10 - 17 + 2, 2, 2, 0, 2, 2, 2, 2, # 18 - 1f + 2, 2, 2, 2, 2, 2, 2, 2, # 20 - 27 + 2, 2, 2, 2, 2, 2, 2, 2, # 28 - 2f + 2, 2, 2, 2, 2, 2, 2, 2, # 30 - 37 + 2, 2, 2, 2, 2, 2, 2, 2, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 2, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 6, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 3, 4, 4, 4, 4, 4, 4, # a0 - a7 + 5, 5, 1, 1, 1, 1, 1, 1, # a8 - af + 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7 + 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf + 1, 1, 3, 1, 3, 3, 3, 3, # c0 - c7 + 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf + 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7 + 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df + 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7 + 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef + 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7 + 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff ) EUCTW_ST = ( @@ -266,50 +337,53 @@ EUCTW_ST = ( 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f ) +# fmt: on EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) -EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, - 'class_factor': 7, - 'state_table': EUCTW_ST, - 'char_len_table': EUCTW_CHAR_LEN_TABLE, - 'name': 'x-euc-tw'} +EUCTW_SM_MODEL: CodingStateMachineDict = { + "class_table": EUCTW_CLS, + "class_factor": 7, + "state_table": EUCTW_ST, + "char_len_table": EUCTW_CHAR_LEN_TABLE, + "name": "x-euc-tw", +} # GB2312 - +# fmt: off GB2312_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 3,3,3,3,3,3,3,3, # 30 - 37 - 3,3,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,4, # 78 - 7f - 5,6,6,6,6,6,6,6, # 80 - 87 - 6,6,6,6,6,6,6,6, # 88 - 8f - 6,6,6,6,6,6,6,6, # 90 - 97 - 6,6,6,6,6,6,6,6, # 98 - 9f - 6,6,6,6,6,6,6,6, # a0 - a7 - 6,6,6,6,6,6,6,6, # a8 - af - 6,6,6,6,6,6,6,6, # b0 - b7 - 6,6,6,6,6,6,6,6, # b8 - bf - 6,6,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 6,6,6,6,6,6,6,6, # e0 - e7 - 6,6,6,6,6,6,6,6, # e8 - ef - 6,6,6,6,6,6,6,6, # f0 - f7 - 6,6,6,6,6,6,6,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 3, 3, 3, 3, 3, 3, 3, 3, # 30 - 37 + 3, 3, 1, 1, 1, 1, 1, 1, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 4, # 78 - 7f + 5, 6, 6, 6, 6, 6, 6, 6, # 80 - 87 + 6, 6, 6, 6, 6, 6, 6, 6, # 88 - 8f + 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 97 + 6, 6, 6, 6, 6, 6, 6, 6, # 98 - 9f + 6, 6, 6, 6, 6, 6, 6, 6, # a0 - a7 + 6, 6, 6, 6, 6, 6, 6, 6, # a8 - af + 6, 6, 6, 6, 6, 6, 6, 6, # b0 - b7 + 6, 6, 6, 6, 6, 6, 6, 6, # b8 - bf + 6, 6, 6, 6, 6, 6, 6, 6, # c0 - c7 + 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf + 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7 + 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df + 6, 6, 6, 6, 6, 6, 6, 6, # e0 - e7 + 6, 6, 6, 6, 6, 6, 6, 6, # e8 - ef + 6, 6, 6, 6, 6, 6, 6, 6, # f0 - f7 + 6, 6, 6, 6, 6, 6, 6, 0 # f8 - ff ) GB2312_ST = ( @@ -320,6 +394,7 @@ GB2312_ST = ( MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f ) +# fmt: on # To be accurate, the length of class 6 can be either 2 or 4. # But it is not necessary to discriminate between the two since @@ -328,100 +403,105 @@ GB2312_ST = ( # 2 here. GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) -GB2312_SM_MODEL = {'class_table': GB2312_CLS, - 'class_factor': 7, - 'state_table': GB2312_ST, - 'char_len_table': GB2312_CHAR_LEN_TABLE, - 'name': 'GB2312'} +GB2312_SM_MODEL: CodingStateMachineDict = { + "class_table": GB2312_CLS, + "class_factor": 7, + "state_table": GB2312_ST, + "char_len_table": GB2312_CHAR_LEN_TABLE, + "name": "GB2312", +} # Shift_JIS - +# fmt: off SJIS_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,2,2,3, # 80 - 87 - 3,3,3,3,3,3,3,3, # 88 - 8f - 3,3,3,3,3,3,3,3, # 90 - 97 - 3,3,3,3,3,3,3,3, # 98 - 9f + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f + 3, 3, 3, 3, 3, 2, 2, 3, # 80 - 87 + 3, 3, 3, 3, 3, 3, 3, 3, # 88 - 8f + 3, 3, 3, 3, 3, 3, 3, 3, # 90 - 97 + 3, 3, 3, 3, 3, 3, 3, 3, # 98 - 9f #0xa0 is illegal in sjis encoding, but some pages does #contain such byte. We need to be more error forgiven. - 2,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,4,4,4, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,0,0,0) # f8 - ff - + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7 + 3, 3, 3, 3, 3, 4, 4, 4, # e8 - ef + 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7 + 3, 3, 3, 3, 3, 0, 0, 0, # f8 - ff +) SJIS_ST = ( MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 ) +# fmt: on SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) -SJIS_SM_MODEL = {'class_table': SJIS_CLS, - 'class_factor': 6, - 'state_table': SJIS_ST, - 'char_len_table': SJIS_CHAR_LEN_TABLE, - 'name': 'Shift_JIS'} +SJIS_SM_MODEL: CodingStateMachineDict = { + "class_table": SJIS_CLS, + "class_factor": 6, + "state_table": SJIS_ST, + "char_len_table": SJIS_CHAR_LEN_TABLE, + "name": "Shift_JIS", +} # UCS2-BE - +# fmt: off UCS2BE_CLS = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff + 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7 + 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af + 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7 + 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf + 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7 + 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf + 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7 + 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df + 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7 + 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef + 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7 + 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff ) UCS2BE_ST = ( @@ -433,50 +513,53 @@ UCS2BE_ST = ( 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 ) +# fmt: on UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) -UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, - 'class_factor': 6, - 'state_table': UCS2BE_ST, - 'char_len_table': UCS2BE_CHAR_LEN_TABLE, - 'name': 'UTF-16BE'} +UCS2BE_SM_MODEL: CodingStateMachineDict = { + "class_table": UCS2BE_CLS, + "class_factor": 6, + "state_table": UCS2BE_ST, + "char_len_table": UCS2BE_CHAR_LEN_TABLE, + "name": "UTF-16BE", +} # UCS2-LE - +# fmt: off UCS2LE_CLS = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff + 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7 + 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af + 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7 + 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf + 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7 + 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf + 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7 + 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df + 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7 + 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef + 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7 + 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff ) UCS2LE_ST = ( @@ -488,50 +571,53 @@ UCS2LE_ST = ( 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 ) +# fmt: on UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) -UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, - 'class_factor': 6, - 'state_table': UCS2LE_ST, - 'char_len_table': UCS2LE_CHAR_LEN_TABLE, - 'name': 'UTF-16LE'} +UCS2LE_SM_MODEL: CodingStateMachineDict = { + "class_table": UCS2LE_CLS, + "class_factor": 6, + "state_table": UCS2LE_ST, + "char_len_table": UCS2LE_CHAR_LEN_TABLE, + "name": "UTF-16LE", +} # UTF-8 - +# fmt: off UTF8_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 2,2,2,2,3,3,3,3, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 5,5,5,5,5,5,5,5, # a0 - a7 - 5,5,5,5,5,5,5,5, # a8 - af - 5,5,5,5,5,5,5,5, # b0 - b7 - 5,5,5,5,5,5,5,5, # b8 - bf - 0,0,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 7,8,8,8,8,8,8,8, # e0 - e7 - 8,8,8,8,8,9,8,8, # e8 - ef - 10,11,11,11,11,11,11,11, # f0 - f7 - 12,13,13,13,14,15,0,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as a legal value + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f + 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57 + 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f + 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67 + 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f + 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77 + 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f + 2, 2, 2, 2, 3, 3, 3, 3, # 80 - 87 + 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f + 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97 + 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f + 5, 5, 5, 5, 5, 5, 5, 5, # a0 - a7 + 5, 5, 5, 5, 5, 5, 5, 5, # a8 - af + 5, 5, 5, 5, 5, 5, 5, 5, # b0 - b7 + 5, 5, 5, 5, 5, 5, 5, 5, # b8 - bf + 0, 0, 6, 6, 6, 6, 6, 6, # c0 - c7 + 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf + 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7 + 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df + 7, 8, 8, 8, 8, 8, 8, 8, # e0 - e7 + 8, 8, 8, 8, 8, 9, 8, 8, # e8 - ef + 10, 11, 11, 11, 11, 11, 11, 11, # f0 - f7 + 12, 13, 13, 13, 14, 15, 0, 0 # f8 - ff ) UTF8_ST = ( @@ -562,11 +648,14 @@ UTF8_ST = ( MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf ) +# fmt: on UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) -UTF8_SM_MODEL = {'class_table': UTF8_CLS, - 'class_factor': 16, - 'state_table': UTF8_ST, - 'char_len_table': UTF8_CHAR_LEN_TABLE, - 'name': 'UTF-8'} +UTF8_SM_MODEL: CodingStateMachineDict = { + "class_table": UTF8_CLS, + "class_factor": 16, + "state_table": UTF8_ST, + "char_len_table": UTF8_CHAR_LEN_TABLE, + "name": "UTF-8", +} diff --git a/libs/common/chardet/metadata/languages.py b/libs/common/chardet/metadata/languages.py index 3237d5ab..eb40c5f0 100644 --- a/libs/common/chardet/metadata/languages.py +++ b/libs/common/chardet/metadata/languages.py @@ -1,19 +1,17 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- """ Metadata about languages used by our model training code for our SingleByteCharSetProbers. Could be used for other things in the future. This code is based on the language metadata from the uchardet project. """ -from __future__ import absolute_import, print_function from string import ascii_letters +from typing import List, Optional + +# TODO: Add Ukrainian (KOI8-U) -# TODO: Add Ukranian (KOI8-U) - -class Language(object): +class Language: """Metadata about a language useful for training models :ivar name: The human name for the language, in English. @@ -33,9 +31,17 @@ class Language(object): Wikipedia for training data. :type wiki_start_pages: list of str """ - def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None, - alphabet=None, wiki_start_pages=None): - super(Language, self).__init__() + + def __init__( + self, + name: Optional[str] = None, + iso_code: Optional[str] = None, + use_ascii: bool = True, + charsets: Optional[List[str]] = None, + alphabet: Optional[str] = None, + wiki_start_pages: Optional[List[str]] = None, + ) -> None: + super().__init__() self.name = name self.iso_code = iso_code self.use_ascii = use_ascii @@ -46,265 +52,301 @@ class Language(object): else: alphabet = ascii_letters elif not alphabet: - raise ValueError('Must supply alphabet if use_ascii is False') - self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None + raise ValueError("Must supply alphabet if use_ascii is False") + self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None self.wiki_start_pages = wiki_start_pages - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, - ', '.join('{}={!r}'.format(k, v) - for k, v in self.__dict__.items() - if not k.startswith('_'))) + def __repr__(self) -> str: + param_str = ", ".join( + f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_") + ) + return f"{self.__class__.__name__}({param_str})" -LANGUAGES = {'Arabic': Language(name='Arabic', - iso_code='ar', - use_ascii=False, - # We only support encodings that use isolated - # forms, because the current recommendation is - # that the rendering system handles presentation - # forms. This means we purposefully skip IBM864. - charsets=['ISO-8859-6', 'WINDOWS-1256', - 'CP720', 'CP864'], - alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ', - wiki_start_pages=[u'الصفحة_الرئيسية']), - 'Belarusian': Language(name='Belarusian', - iso_code='be', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'IBM866', 'MacCyrillic'], - alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ' - u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'), - wiki_start_pages=[u'Галоўная_старонка']), - 'Bulgarian': Language(name='Bulgarian', - iso_code='bg', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'IBM855'], - alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ' - u'абвгдежзийклмнопрстуфхцчшщъьюя'), - wiki_start_pages=[u'Начална_страница']), - 'Czech': Language(name='Czech', - iso_code='cz', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ', - wiki_start_pages=[u'Hlavní_strana']), - 'Danish': Language(name='Danish', - iso_code='da', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'æøåÆØÅ', - wiki_start_pages=[u'Forside']), - 'German': Language(name='German', - iso_code='de', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - alphabet=u'äöüßÄÖÜ', - wiki_start_pages=[u'Wikipedia:Hauptseite']), - 'Greek': Language(name='Greek', - iso_code='el', - use_ascii=False, - charsets=['ISO-8859-7', 'WINDOWS-1253'], - alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ' - u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'), - wiki_start_pages=[u'Πύλη:Κύρια']), - 'English': Language(name='English', - iso_code='en', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - wiki_start_pages=[u'Main_Page']), - 'Esperanto': Language(name='Esperanto', - iso_code='eo', - # Q, W, X, and Y not used at all - use_ascii=False, - charsets=['ISO-8859-3'], - alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz' - u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'), - wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']), - 'Spanish': Language(name='Spanish', - iso_code='es', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ', - wiki_start_pages=[u'Wikipedia:Portada']), - 'Estonian': Language(name='Estonian', - iso_code='et', - use_ascii=False, - charsets=['ISO-8859-4', 'ISO-8859-13', - 'WINDOWS-1257'], - # C, F, Š, Q, W, X, Y, Z, Ž are only for - # loanwords - alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ' - u'abdeghijklmnoprstuvõäöü'), - wiki_start_pages=[u'Esileht']), - 'Finnish': Language(name='Finnish', - iso_code='fi', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÅÄÖŠŽåäöšž', - wiki_start_pages=[u'Wikipedia:Etusivu']), - 'French': Language(name='French', - iso_code='fr', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ', - wiki_start_pages=[u'Wikipédia:Accueil_principal', - u'Bœuf (animal)']), - 'Hebrew': Language(name='Hebrew', - iso_code='he', - use_ascii=False, - charsets=['ISO-8859-8', 'WINDOWS-1255'], - alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ', - wiki_start_pages=[u'עמוד_ראשי']), - 'Croatian': Language(name='Croatian', - iso_code='hr', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcčćdđefghijklmnoprsštuvzž' - u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'), - wiki_start_pages=[u'Glavna_stranica']), - 'Hungarian': Language(name='Hungarian', - iso_code='hu', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű' - u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'), - wiki_start_pages=[u'Kezdőlap']), - 'Italian': Language(name='Italian', - iso_code='it', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÀÈÉÌÒÓÙàèéìòóù', - wiki_start_pages=[u'Pagina_principale']), - 'Lithuanian': Language(name='Lithuanian', - iso_code='lt', - use_ascii=False, - charsets=['ISO-8859-13', 'WINDOWS-1257', - 'ISO-8859-4'], - # Q, W, and X not used at all - alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ' - u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'), - wiki_start_pages=[u'Pagrindinis_puslapis']), - 'Latvian': Language(name='Latvian', - iso_code='lv', - use_ascii=False, - charsets=['ISO-8859-13', 'WINDOWS-1257', - 'ISO-8859-4'], - # Q, W, X, Y are only for loanwords - alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ' - u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'), - wiki_start_pages=[u'Sākumlapa']), - 'Macedonian': Language(name='Macedonian', - iso_code='mk', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'MacCyrillic', 'IBM855'], - alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ' - u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'), - wiki_start_pages=[u'Главна_страница']), - 'Dutch': Language(name='Dutch', - iso_code='nl', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - wiki_start_pages=[u'Hoofdpagina']), - 'Polish': Language(name='Polish', - iso_code='pl', - # Q and X are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ' - u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'), - wiki_start_pages=[u'Wikipedia:Strona_główna']), - 'Portuguese': Language(name='Portuguese', - iso_code='pt', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú', - wiki_start_pages=[u'Wikipédia:Página_principal']), - 'Romanian': Language(name='Romanian', - iso_code='ro', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'ăâîșțĂÂÎȘȚ', - wiki_start_pages=[u'Pagina_principală']), - 'Russian': Language(name='Russian', - iso_code='ru', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'KOI8-R', 'MacCyrillic', 'IBM866', - 'IBM855'], - alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя' - u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'), - wiki_start_pages=[u'Заглавная_страница']), - 'Slovak': Language(name='Slovak', - iso_code='sk', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ', - wiki_start_pages=[u'Hlavná_stránka']), - 'Slovene': Language(name='Slovene', - iso_code='sl', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcčdefghijklmnoprsštuvzž' - u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'), - wiki_start_pages=[u'Glavna_stran']), - # Serbian can be written in both Latin and Cyrillic, but there's no - # simple way to get the Latin alphabet pages from Wikipedia through - # the API, so for now we just support Cyrillic. - 'Serbian': Language(name='Serbian', - iso_code='sr', - alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ' - u'абвгдђежзијклљмнњопрстћуфхцчџш'), - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'MacCyrillic', 'IBM855'], - wiki_start_pages=[u'Главна_страна']), - 'Thai': Language(name='Thai', - iso_code='th', - use_ascii=False, - charsets=['ISO-8859-11', 'TIS-620', 'CP874'], - alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛', - wiki_start_pages=[u'หน้าหลัก']), - 'Turkish': Language(name='Turkish', - iso_code='tr', - # Q, W, and X are not used by Turkish - use_ascii=False, - charsets=['ISO-8859-3', 'ISO-8859-9', - 'WINDOWS-1254'], - alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû' - u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'), - wiki_start_pages=[u'Ana_Sayfa']), - 'Vietnamese': Language(name='Vietnamese', - iso_code='vi', - use_ascii=False, - # Windows-1258 is the only common 8-bit - # Vietnamese encoding supported by Python. - # From Wikipedia: - # For systems that lack support for Unicode, - # dozens of 8-bit Vietnamese code pages are - # available.[1] The most common are VISCII - # (TCVN 5712:1993), VPS, and Windows-1258.[3] - # Where ASCII is required, such as when - # ensuring readability in plain text e-mail, - # Vietnamese letters are often encoded - # according to Vietnamese Quoted-Readable - # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] - # though usage of either variable-width - # scheme has declined dramatically following - # the adoption of Unicode on the World Wide - # Web. - charsets=['WINDOWS-1258'], - alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy' - u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'), - wiki_start_pages=[u'Chữ_Quốc_ngữ']), - } +LANGUAGES = { + "Arabic": Language( + name="Arabic", + iso_code="ar", + use_ascii=False, + # We only support encodings that use isolated + # forms, because the current recommendation is + # that the rendering system handles presentation + # forms. This means we purposefully skip IBM864. + charsets=["ISO-8859-6", "WINDOWS-1256", "CP720", "CP864"], + alphabet="ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ", + wiki_start_pages=["الصفحة_الرئيسية"], + ), + "Belarusian": Language( + name="Belarusian", + iso_code="be", + use_ascii=False, + charsets=["ISO-8859-5", "WINDOWS-1251", "IBM866", "MacCyrillic"], + alphabet="АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯабвгдеёжзійклмнопрстуўфхцчшыьэюяʼ", + wiki_start_pages=["Галоўная_старонка"], + ), + "Bulgarian": Language( + name="Bulgarian", + iso_code="bg", + use_ascii=False, + charsets=["ISO-8859-5", "WINDOWS-1251", "IBM855"], + alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя", + wiki_start_pages=["Начална_страница"], + ), + "Czech": Language( + name="Czech", + iso_code="cz", + use_ascii=True, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ", + wiki_start_pages=["Hlavní_strana"], + ), + "Danish": Language( + name="Danish", + iso_code="da", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="æøåÆØÅ", + wiki_start_pages=["Forside"], + ), + "German": Language( + name="German", + iso_code="de", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="äöüßẞÄÖÜ", + wiki_start_pages=["Wikipedia:Hauptseite"], + ), + "Greek": Language( + name="Greek", + iso_code="el", + use_ascii=False, + charsets=["ISO-8859-7", "WINDOWS-1253"], + alphabet="αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ", + wiki_start_pages=["Πύλη:Κύρια"], + ), + "English": Language( + name="English", + iso_code="en", + use_ascii=True, + charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"], + wiki_start_pages=["Main_Page"], + ), + "Esperanto": Language( + name="Esperanto", + iso_code="eo", + # Q, W, X, and Y not used at all + use_ascii=False, + charsets=["ISO-8859-3"], + alphabet="abcĉdefgĝhĥijĵklmnoprsŝtuŭvzABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ", + wiki_start_pages=["Vikipedio:Ĉefpaĝo"], + ), + "Spanish": Language( + name="Spanish", + iso_code="es", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ñáéíóúüÑÁÉÍÓÚÜ", + wiki_start_pages=["Wikipedia:Portada"], + ), + "Estonian": Language( + name="Estonian", + iso_code="et", + use_ascii=False, + charsets=["ISO-8859-4", "ISO-8859-13", "WINDOWS-1257"], + # C, F, Š, Q, W, X, Y, Z, Ž are only for + # loanwords + alphabet="ABDEGHIJKLMNOPRSTUVÕÄÖÜabdeghijklmnoprstuvõäöü", + wiki_start_pages=["Esileht"], + ), + "Finnish": Language( + name="Finnish", + iso_code="fi", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ÅÄÖŠŽåäöšž", + wiki_start_pages=["Wikipedia:Etusivu"], + ), + "French": Language( + name="French", + iso_code="fr", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ", + wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"], + ), + "Hebrew": Language( + name="Hebrew", + iso_code="he", + use_ascii=False, + charsets=["ISO-8859-8", "WINDOWS-1255"], + alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ", + wiki_start_pages=["עמוד_ראשי"], + ), + "Croatian": Language( + name="Croatian", + iso_code="hr", + # Q, W, X, Y are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="abcčćdđefghijklmnoprsštuvzžABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ", + wiki_start_pages=["Glavna_stranica"], + ), + "Hungarian": Language( + name="Hungarian", + iso_code="hu", + # Q, W, X, Y are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="abcdefghijklmnoprstuvzáéíóöőúüűABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ", + wiki_start_pages=["Kezdőlap"], + ), + "Italian": Language( + name="Italian", + iso_code="it", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ÀÈÉÌÒÓÙàèéìòóù", + wiki_start_pages=["Pagina_principale"], + ), + "Lithuanian": Language( + name="Lithuanian", + iso_code="lt", + use_ascii=False, + charsets=["ISO-8859-13", "WINDOWS-1257", "ISO-8859-4"], + # Q, W, and X not used at all + alphabet="AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽaąbcčdeęėfghiįyjklmnoprsštuųūvzž", + wiki_start_pages=["Pagrindinis_puslapis"], + ), + "Latvian": Language( + name="Latvian", + iso_code="lv", + use_ascii=False, + charsets=["ISO-8859-13", "WINDOWS-1257", "ISO-8859-4"], + # Q, W, X, Y are only for loanwords + alphabet="AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽaābcčdeēfgģhiījkķlļmnņoprsštuūvzž", + wiki_start_pages=["Sākumlapa"], + ), + "Macedonian": Language( + name="Macedonian", + iso_code="mk", + use_ascii=False, + charsets=["ISO-8859-5", "WINDOWS-1251", "MacCyrillic", "IBM855"], + alphabet="АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШабвгдѓежзѕијклљмнњопрстќуфхцчџш", + wiki_start_pages=["Главна_страница"], + ), + "Dutch": Language( + name="Dutch", + iso_code="nl", + use_ascii=True, + charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"], + wiki_start_pages=["Hoofdpagina"], + ), + "Polish": Language( + name="Polish", + iso_code="pl", + # Q and X are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻaąbcćdeęfghijklłmnńoóprsśtuwyzźż", + wiki_start_pages=["Wikipedia:Strona_główna"], + ), + "Portuguese": Language( + name="Portuguese", + iso_code="pt", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú", + wiki_start_pages=["Wikipédia:Página_principal"], + ), + "Romanian": Language( + name="Romanian", + iso_code="ro", + use_ascii=True, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="ăâîșțĂÂÎȘȚ", + wiki_start_pages=["Pagina_principală"], + ), + "Russian": Language( + name="Russian", + iso_code="ru", + use_ascii=False, + charsets=[ + "ISO-8859-5", + "WINDOWS-1251", + "KOI8-R", + "MacCyrillic", + "IBM866", + "IBM855", + ], + alphabet="абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", + wiki_start_pages=["Заглавная_страница"], + ), + "Slovak": Language( + name="Slovak", + iso_code="sk", + use_ascii=True, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ", + wiki_start_pages=["Hlavná_stránka"], + ), + "Slovene": Language( + name="Slovene", + iso_code="sl", + # Q, W, X, Y are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="abcčdefghijklmnoprsštuvzžABCČDEFGHIJKLMNOPRSŠTUVZŽ", + wiki_start_pages=["Glavna_stran"], + ), + # Serbian can be written in both Latin and Cyrillic, but there's no + # simple way to get the Latin alphabet pages from Wikipedia through + # the API, so for now we just support Cyrillic. + "Serbian": Language( + name="Serbian", + iso_code="sr", + alphabet="АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШабвгдђежзијклљмнњопрстћуфхцчџш", + charsets=["ISO-8859-5", "WINDOWS-1251", "MacCyrillic", "IBM855"], + wiki_start_pages=["Главна_страна"], + ), + "Thai": Language( + name="Thai", + iso_code="th", + use_ascii=False, + charsets=["ISO-8859-11", "TIS-620", "CP874"], + alphabet="กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛", + wiki_start_pages=["หน้าหลัก"], + ), + "Turkish": Language( + name="Turkish", + iso_code="tr", + # Q, W, and X are not used by Turkish + use_ascii=False, + charsets=["ISO-8859-3", "ISO-8859-9", "WINDOWS-1254"], + alphabet="abcçdefgğhıijklmnoöprsştuüvyzâîûABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ", + wiki_start_pages=["Ana_Sayfa"], + ), + "Vietnamese": Language( + name="Vietnamese", + iso_code="vi", + use_ascii=False, + # Windows-1258 is the only common 8-bit + # Vietnamese encoding supported by Python. + # From Wikipedia: + # For systems that lack support for Unicode, + # dozens of 8-bit Vietnamese code pages are + # available.[1] The most common are VISCII + # (TCVN 5712:1993), VPS, and Windows-1258.[3] + # Where ASCII is required, such as when + # ensuring readability in plain text e-mail, + # Vietnamese letters are often encoded + # according to Vietnamese Quoted-Readable + # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] + # though usage of either variable-width + # scheme has declined dramatically following + # the adoption of Unicode on the World Wide + # Web. + charsets=["WINDOWS-1258"], + alphabet="aăâbcdđeêghiklmnoôơpqrstuưvxyAĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY", + wiki_start_pages=["Chữ_Quốc_ngữ"], + ), +} diff --git a/libs/common/chardet/py.typed b/libs/common/chardet/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/chardet/resultdict.py b/libs/common/chardet/resultdict.py new file mode 100644 index 00000000..7d36e64c --- /dev/null +++ b/libs/common/chardet/resultdict.py @@ -0,0 +1,16 @@ +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + # TypedDict was introduced in Python 3.8. + # + # TODO: Remove the else block and TYPE_CHECKING check when dropping support + # for Python 3.7. + from typing import TypedDict + + class ResultDict(TypedDict): + encoding: Optional[str] + confidence: float + language: Optional[str] + +else: + ResultDict = dict diff --git a/libs/common/chardet/sbcharsetprober.py b/libs/common/chardet/sbcharsetprober.py index 46ba835c..0ffbcdd2 100644 --- a/libs/common/chardet/sbcharsetprober.py +++ b/libs/common/chardet/sbcharsetprober.py @@ -26,70 +26,77 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from collections import namedtuple +from typing import Dict, List, NamedTuple, Optional, Union from .charsetprober import CharSetProber from .enums import CharacterCategory, ProbingState, SequenceLikelihood -SingleByteCharSetModel = namedtuple('SingleByteCharSetModel', - ['charset_name', - 'language', - 'char_to_order_map', - 'language_model', - 'typical_positive_ratio', - 'keep_ascii_letters', - 'alphabet']) +class SingleByteCharSetModel(NamedTuple): + charset_name: str + language: str + char_to_order_map: Dict[int, int] + language_model: Dict[int, Dict[int, int]] + typical_positive_ratio: float + keep_ascii_letters: bool + alphabet: str class SingleByteCharSetProber(CharSetProber): SAMPLE_SIZE = 64 - SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 + SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 POSITIVE_SHORTCUT_THRESHOLD = 0.95 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 - def __init__(self, model, reversed=False, name_prober=None): - super(SingleByteCharSetProber, self).__init__() + def __init__( + self, + model: SingleByteCharSetModel, + is_reversed: bool = False, + name_prober: Optional[CharSetProber] = None, + ) -> None: + super().__init__() self._model = model # TRUE if we need to reverse every pair in the model lookup - self._reversed = reversed + self._reversed = is_reversed # Optional auxiliary prober for name decision self._name_prober = name_prober - self._last_order = None - self._seq_counters = None - self._total_seqs = None - self._total_char = None - self._freq_char = None + self._last_order = 255 + self._seq_counters: List[int] = [] + self._total_seqs = 0 + self._total_char = 0 + self._control_char = 0 + self._freq_char = 0 self.reset() - def reset(self): - super(SingleByteCharSetProber, self).reset() + def reset(self) -> None: + super().reset() # char order of last character self._last_order = 255 self._seq_counters = [0] * SequenceLikelihood.get_num_categories() self._total_seqs = 0 self._total_char = 0 + self._control_char = 0 # characters that fall in our sampling range self._freq_char = 0 @property - def charset_name(self): + def charset_name(self) -> Optional[str]: if self._name_prober: return self._name_prober.charset_name - else: - return self._model.charset_name + return self._model.charset_name @property - def language(self): + def language(self) -> Optional[str]: if self._name_prober: return self._name_prober.language - else: - return self._model.language + return self._model.language - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: # TODO: Make filter_international_words keep things in self.alphabet if not self._model.keep_ascii_letters: byte_str = self.filter_international_words(byte_str) + else: + byte_str = self.remove_xml_tags(byte_str) if not byte_str: return self.state char_to_order_map = self._model.char_to_order_map @@ -103,9 +110,6 @@ class SingleByteCharSetProber(CharSetProber): # _total_char purposes. if order < CharacterCategory.CONTROL: self._total_char += 1 - # TODO: Follow uchardet's lead and discount confidence for frequent - # control characters. - # See https://github.com/BYVoid/uchardet/commit/55b4f23971db61 if order < self.SAMPLE_SIZE: self._freq_char += 1 if self._last_order < self.SAMPLE_SIZE: @@ -122,23 +126,36 @@ class SingleByteCharSetProber(CharSetProber): if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: confidence = self.get_confidence() if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: - self.logger.debug('%s confidence = %s, we have a winner', - charset_name, confidence) + self.logger.debug( + "%s confidence = %s, we have a winner", charset_name, confidence + ) self._state = ProbingState.FOUND_IT elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: - self.logger.debug('%s confidence = %s, below negative ' - 'shortcut threshhold %s', charset_name, - confidence, - self.NEGATIVE_SHORTCUT_THRESHOLD) + self.logger.debug( + "%s confidence = %s, below negative shortcut threshold %s", + charset_name, + confidence, + self.NEGATIVE_SHORTCUT_THRESHOLD, + ) self._state = ProbingState.NOT_ME return self.state - def get_confidence(self): + def get_confidence(self) -> float: r = 0.01 if self._total_seqs > 0: - r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / - self._total_seqs / self._model.typical_positive_ratio) + r = ( + ( + self._seq_counters[SequenceLikelihood.POSITIVE] + + 0.25 * self._seq_counters[SequenceLikelihood.LIKELY] + ) + / self._total_seqs + / self._model.typical_positive_ratio + ) + # The more control characters (proportionnaly to the size + # of the text), the less confident we become in the current + # charset. + r = r * (self._total_char - self._control_char) / self._total_char r = r * self._freq_char / self._total_char if r >= 1.0: r = 0.99 diff --git a/libs/common/chardet/sbcsgroupprober.py b/libs/common/chardet/sbcsgroupprober.py index bdeef4e1..890ae846 100644 --- a/libs/common/chardet/sbcsgroupprober.py +++ b/libs/common/chardet/sbcsgroupprober.py @@ -28,33 +28,38 @@ from .charsetgroupprober import CharSetGroupProber from .hebrewprober import HebrewProber -from .langbulgarianmodel import (ISO_8859_5_BULGARIAN_MODEL, - WINDOWS_1251_BULGARIAN_MODEL) +from .langbulgarianmodel import ISO_8859_5_BULGARIAN_MODEL, WINDOWS_1251_BULGARIAN_MODEL from .langgreekmodel import ISO_8859_7_GREEK_MODEL, WINDOWS_1253_GREEK_MODEL from .langhebrewmodel import WINDOWS_1255_HEBREW_MODEL + # from .langhungarianmodel import (ISO_8859_2_HUNGARIAN_MODEL, # WINDOWS_1250_HUNGARIAN_MODEL) -from .langrussianmodel import (IBM855_RUSSIAN_MODEL, IBM866_RUSSIAN_MODEL, - ISO_8859_5_RUSSIAN_MODEL, KOI8_R_RUSSIAN_MODEL, - MACCYRILLIC_RUSSIAN_MODEL, - WINDOWS_1251_RUSSIAN_MODEL) +from .langrussianmodel import ( + IBM855_RUSSIAN_MODEL, + IBM866_RUSSIAN_MODEL, + ISO_8859_5_RUSSIAN_MODEL, + KOI8_R_RUSSIAN_MODEL, + MACCYRILLIC_RUSSIAN_MODEL, + WINDOWS_1251_RUSSIAN_MODEL, +) from .langthaimodel import TIS_620_THAI_MODEL from .langturkishmodel import ISO_8859_9_TURKISH_MODEL from .sbcharsetprober import SingleByteCharSetProber class SBCSGroupProber(CharSetGroupProber): - def __init__(self): - super(SBCSGroupProber, self).__init__() + def __init__(self) -> None: + super().__init__() hebrew_prober = HebrewProber() - logical_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL, - False, hebrew_prober) + logical_hebrew_prober = SingleByteCharSetProber( + WINDOWS_1255_HEBREW_MODEL, is_reversed=False, name_prober=hebrew_prober + ) # TODO: See if using ISO-8859-8 Hebrew model works better here, since # it's actually the visual one - visual_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL, - True, hebrew_prober) - hebrew_prober.set_model_probers(logical_hebrew_prober, - visual_hebrew_prober) + visual_hebrew_prober = SingleByteCharSetProber( + WINDOWS_1255_HEBREW_MODEL, is_reversed=True, name_prober=hebrew_prober + ) + hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) # TODO: ORDER MATTERS HERE. I changed the order vs what was in master # and several tests failed that did not before. Some thought # should be put into the ordering, and we should consider making diff --git a/libs/common/chardet/sjisprober.py b/libs/common/chardet/sjisprober.py index 9e29623b..91df0779 100644 --- a/libs/common/chardet/sjisprober.py +++ b/libs/common/chardet/sjisprober.py @@ -25,68 +25,81 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine +from typing import Union + from .chardistribution import SJISDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .enums import MachineState, ProbingState from .jpcntx import SJISContextAnalysis +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import SJIS_SM_MODEL -from .enums import ProbingState, MachineState class SJISProber(MultiByteCharSetProber): - def __init__(self): - super(SJISProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) self.distribution_analyzer = SJISDistributionAnalysis() self.context_analyzer = SJISContextAnalysis() self.reset() - def reset(self): - super(SJISProber, self).reset() + def reset(self) -> None: + super().reset() self.context_analyzer.reset() @property - def charset_name(self): + def charset_name(self) -> str: return self.context_analyzer.charset_name @property - def language(self): + def language(self) -> str: return "Japanese" - def feed(self, byte_str): - for i in range(len(byte_str)): - coding_state = self.coding_sm.next_state(byte_str[i]) + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None + + for i, byte in enumerate(byte_str): + coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: - self.logger.debug('%s %s prober hit error at byte %s', - self.charset_name, self.language, i) + self.logger.debug( + "%s %s prober hit error at byte %s", + self.charset_name, + self.language, + i, + ) self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: char_len = self.coding_sm.get_current_charlen() if i == 0: - self._last_char[1] = byte_str[0] - self.context_analyzer.feed(self._last_char[2 - char_len:], - char_len) + self._last_char[1] = byte + self.context_analyzer.feed( + self._last_char[2 - char_len :], char_len + ) self.distribution_analyzer.feed(self._last_char, char_len) else: - self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 - - char_len], char_len) - self.distribution_analyzer.feed(byte_str[i - 1:i + 1], - char_len) + self.context_analyzer.feed( + byte_str[i + 1 - char_len : i + 3 - char_len], char_len + ) + self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len) self._last_char[0] = byte_str[-1] if self.state == ProbingState.DETECTING: - if (self.context_analyzer.got_enough_data() and - (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + if self.context_analyzer.got_enough_data() and ( + self.get_confidence() > self.SHORTCUT_THRESHOLD + ): self._state = ProbingState.FOUND_IT return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None + context_conf = self.context_analyzer.get_confidence() distrib_conf = self.distribution_analyzer.get_confidence() return max(context_conf, distrib_conf) diff --git a/libs/common/chardet/universaldetector.py b/libs/common/chardet/universaldetector.py index 055a8ac1..30c441dc 100644 --- a/libs/common/chardet/universaldetector.py +++ b/libs/common/chardet/universaldetector.py @@ -39,16 +39,21 @@ class a user of ``chardet`` should use. import codecs import logging import re +from typing import List, Optional, Union from .charsetgroupprober import CharSetGroupProber +from .charsetprober import CharSetProber from .enums import InputState, LanguageFilter, ProbingState from .escprober import EscCharSetProber from .latin1prober import Latin1Prober +from .macromanprober import MacRomanProber from .mbcsgroupprober import MBCSGroupProber +from .resultdict import ResultDict from .sbcsgroupprober import SBCSGroupProber +from .utf1632prober import UTF1632Prober -class UniversalDetector(object): +class UniversalDetector: """ The ``UniversalDetector`` class underlies the ``chardet.detect`` function and coordinates all of the different charset probers. @@ -66,49 +71,87 @@ class UniversalDetector(object): """ MINIMUM_THRESHOLD = 0.20 - HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]') - ESC_DETECTOR = re.compile(b'(\033|~{)') - WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]') - ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252', - 'iso-8859-2': 'Windows-1250', - 'iso-8859-5': 'Windows-1251', - 'iso-8859-6': 'Windows-1256', - 'iso-8859-7': 'Windows-1253', - 'iso-8859-8': 'Windows-1255', - 'iso-8859-9': 'Windows-1254', - 'iso-8859-13': 'Windows-1257'} + HIGH_BYTE_DETECTOR = re.compile(b"[\x80-\xFF]") + ESC_DETECTOR = re.compile(b"(\033|~{)") + WIN_BYTE_DETECTOR = re.compile(b"[\x80-\x9F]") + ISO_WIN_MAP = { + "iso-8859-1": "Windows-1252", + "iso-8859-2": "Windows-1250", + "iso-8859-5": "Windows-1251", + "iso-8859-6": "Windows-1256", + "iso-8859-7": "Windows-1253", + "iso-8859-8": "Windows-1255", + "iso-8859-9": "Windows-1254", + "iso-8859-13": "Windows-1257", + } + # Based on https://encoding.spec.whatwg.org/#names-and-labels + # but altered to match Python names for encodings and remove mappings + # that break tests. + LEGACY_MAP = { + "ascii": "Windows-1252", + "iso-8859-1": "Windows-1252", + "tis-620": "ISO-8859-11", + "iso-8859-9": "Windows-1254", + "gb2312": "GB18030", + "euc-kr": "CP949", + "utf-16le": "UTF-16", + } - def __init__(self, lang_filter=LanguageFilter.ALL): - self._esc_charset_prober = None - self._charset_probers = [] - self.result = None - self.done = None - self._got_data = None - self._input_state = None - self._last_char = None + def __init__( + self, + lang_filter: LanguageFilter = LanguageFilter.ALL, + should_rename_legacy: bool = False, + ) -> None: + self._esc_charset_prober: Optional[EscCharSetProber] = None + self._utf1632_prober: Optional[UTF1632Prober] = None + self._charset_probers: List[CharSetProber] = [] + self.result: ResultDict = { + "encoding": None, + "confidence": 0.0, + "language": None, + } + self.done = False + self._got_data = False + self._input_state = InputState.PURE_ASCII + self._last_char = b"" self.lang_filter = lang_filter self.logger = logging.getLogger(__name__) - self._has_win_bytes = None + self._has_win_bytes = False + self.should_rename_legacy = should_rename_legacy self.reset() - def reset(self): + @property + def input_state(self) -> int: + return self._input_state + + @property + def has_win_bytes(self) -> bool: + return self._has_win_bytes + + @property + def charset_probers(self) -> List[CharSetProber]: + return self._charset_probers + + def reset(self) -> None: """ Reset the UniversalDetector and all of its probers back to their initial states. This is called by ``__init__``, so you only need to call this directly in between analyses of different documents. """ - self.result = {'encoding': None, 'confidence': 0.0, 'language': None} + self.result = {"encoding": None, "confidence": 0.0, "language": None} self.done = False self._got_data = False self._has_win_bytes = False self._input_state = InputState.PURE_ASCII - self._last_char = b'' + self._last_char = b"" if self._esc_charset_prober: self._esc_charset_prober.reset() + if self._utf1632_prober: + self._utf1632_prober.reset() for prober in self._charset_probers: prober.reset() - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> None: """ Takes a chunk of a document and feeds it through all of the relevant charset probers. @@ -125,7 +168,7 @@ class UniversalDetector(object): if self.done: return - if not len(byte_str): + if not byte_str: return if not isinstance(byte_str, bytearray): @@ -136,35 +179,38 @@ class UniversalDetector(object): # If the data starts with BOM, we know it is UTF if byte_str.startswith(codecs.BOM_UTF8): # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8-SIG", - 'confidence': 1.0, - 'language': ''} - elif byte_str.startswith((codecs.BOM_UTF32_LE, - codecs.BOM_UTF32_BE)): + self.result = { + "encoding": "UTF-8-SIG", + "confidence": 1.0, + "language": "", + } + elif byte_str.startswith((codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)): # FF FE 00 00 UTF-32, little-endian BOM # 00 00 FE FF UTF-32, big-endian BOM - self.result = {'encoding': "UTF-32", - 'confidence': 1.0, - 'language': ''} - elif byte_str.startswith(b'\xFE\xFF\x00\x00'): + self.result = {"encoding": "UTF-32", "confidence": 1.0, "language": ""} + elif byte_str.startswith(b"\xFE\xFF\x00\x00"): # FE FF 00 00 UCS-4, unusual octet order BOM (3412) - self.result = {'encoding': "X-ISO-10646-UCS-4-3412", - 'confidence': 1.0, - 'language': ''} - elif byte_str.startswith(b'\x00\x00\xFF\xFE'): + self.result = { + # TODO: This encoding is not supported by Python. Should remove? + "encoding": "X-ISO-10646-UCS-4-3412", + "confidence": 1.0, + "language": "", + } + elif byte_str.startswith(b"\x00\x00\xFF\xFE"): # 00 00 FF FE UCS-4, unusual octet order BOM (2143) - self.result = {'encoding': "X-ISO-10646-UCS-4-2143", - 'confidence': 1.0, - 'language': ''} + self.result = { + # TODO: This encoding is not supported by Python. Should remove? + "encoding": "X-ISO-10646-UCS-4-2143", + "confidence": 1.0, + "language": "", + } elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)): # FF FE UTF-16, little endian BOM # FE FF UTF-16, big endian BOM - self.result = {'encoding': "UTF-16", - 'confidence': 1.0, - 'language': ''} + self.result = {"encoding": "UTF-16", "confidence": 1.0, "language": ""} self._got_data = True - if self.result['encoding'] is not None: + if self.result["encoding"] is not None: self.done = True return @@ -173,12 +219,29 @@ class UniversalDetector(object): if self._input_state == InputState.PURE_ASCII: if self.HIGH_BYTE_DETECTOR.search(byte_str): self._input_state = InputState.HIGH_BYTE - elif self._input_state == InputState.PURE_ASCII and \ - self.ESC_DETECTOR.search(self._last_char + byte_str): + elif ( + self._input_state == InputState.PURE_ASCII + and self.ESC_DETECTOR.search(self._last_char + byte_str) + ): self._input_state = InputState.ESC_ASCII self._last_char = byte_str[-1:] + # next we will look to see if it is appears to be either a UTF-16 or + # UTF-32 encoding + if not self._utf1632_prober: + self._utf1632_prober = UTF1632Prober() + + if self._utf1632_prober.state == ProbingState.DETECTING: + if self._utf1632_prober.feed(byte_str) == ProbingState.FOUND_IT: + self.result = { + "encoding": self._utf1632_prober.charset_name, + "confidence": self._utf1632_prober.get_confidence(), + "language": "", + } + self.done = True + return + # If we've seen escape sequences, use the EscCharSetProber, which # uses a simple state machine to check for known escape sequences in # HZ and ISO-2022 encodings, since those are the only encodings that @@ -187,12 +250,11 @@ class UniversalDetector(object): if not self._esc_charset_prober: self._esc_charset_prober = EscCharSetProber(self.lang_filter) if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT: - self.result = {'encoding': - self._esc_charset_prober.charset_name, - 'confidence': - self._esc_charset_prober.get_confidence(), - 'language': - self._esc_charset_prober.language} + self.result = { + "encoding": self._esc_charset_prober.charset_name, + "confidence": self._esc_charset_prober.get_confidence(), + "language": self._esc_charset_prober.language, + } self.done = True # If we've seen high bytes (i.e., those with values greater than 127), # we need to do more complicated checks using all our multi-byte and @@ -207,17 +269,20 @@ class UniversalDetector(object): if self.lang_filter & LanguageFilter.NON_CJK: self._charset_probers.append(SBCSGroupProber()) self._charset_probers.append(Latin1Prober()) + self._charset_probers.append(MacRomanProber()) for prober in self._charset_probers: if prober.feed(byte_str) == ProbingState.FOUND_IT: - self.result = {'encoding': prober.charset_name, - 'confidence': prober.get_confidence(), - 'language': prober.language} + self.result = { + "encoding": prober.charset_name, + "confidence": prober.get_confidence(), + "language": prober.language, + } self.done = True break if self.WIN_BYTE_DETECTOR.search(byte_str): self._has_win_bytes = True - def close(self): + def close(self) -> ResultDict: """ Stop analyzing the current document and come up with a final prediction. @@ -231,13 +296,11 @@ class UniversalDetector(object): self.done = True if not self._got_data: - self.logger.debug('no data received!') + self.logger.debug("no data received!") # Default to ASCII if it is all we've seen so far elif self._input_state == InputState.PURE_ASCII: - self.result = {'encoding': 'ascii', - 'confidence': 1.0, - 'language': ''} + self.result = {"encoding": "ascii", "confidence": 1.0, "language": ""} # If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD elif self._input_state == InputState.HIGH_BYTE: @@ -253,34 +316,47 @@ class UniversalDetector(object): max_prober = prober if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): charset_name = max_prober.charset_name - lower_charset_name = max_prober.charset_name.lower() + assert charset_name is not None + lower_charset_name = charset_name.lower() confidence = max_prober.get_confidence() # Use Windows encoding name instead of ISO-8859 if we saw any # extra Windows-specific bytes - if lower_charset_name.startswith('iso-8859'): + if lower_charset_name.startswith("iso-8859"): if self._has_win_bytes: - charset_name = self.ISO_WIN_MAP.get(lower_charset_name, - charset_name) - self.result = {'encoding': charset_name, - 'confidence': confidence, - 'language': max_prober.language} + charset_name = self.ISO_WIN_MAP.get( + lower_charset_name, charset_name + ) + # Rename legacy encodings with superset encodings if asked + if self.should_rename_legacy: + charset_name = self.LEGACY_MAP.get( + (charset_name or "").lower(), charset_name + ) + self.result = { + "encoding": charset_name, + "confidence": confidence, + "language": max_prober.language, + } # Log all prober confidences if none met MINIMUM_THRESHOLD if self.logger.getEffectiveLevel() <= logging.DEBUG: - if self.result['encoding'] is None: - self.logger.debug('no probers hit minimum threshold') + if self.result["encoding"] is None: + self.logger.debug("no probers hit minimum threshold") for group_prober in self._charset_probers: if not group_prober: continue if isinstance(group_prober, CharSetGroupProber): for prober in group_prober.probers: - self.logger.debug('%s %s confidence = %s', - prober.charset_name, - prober.language, - prober.get_confidence()) + self.logger.debug( + "%s %s confidence = %s", + prober.charset_name, + prober.language, + prober.get_confidence(), + ) else: - self.logger.debug('%s %s confidence = %s', - group_prober.charset_name, - group_prober.language, - group_prober.get_confidence()) + self.logger.debug( + "%s %s confidence = %s", + group_prober.charset_name, + group_prober.language, + group_prober.get_confidence(), + ) return self.result diff --git a/libs/common/chardet/utf1632prober.py b/libs/common/chardet/utf1632prober.py new file mode 100644 index 00000000..6bdec63d --- /dev/null +++ b/libs/common/chardet/utf1632prober.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# +# Contributor(s): +# Jason Zavaglia +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### +from typing import List, Union + +from .charsetprober import CharSetProber +from .enums import ProbingState + + +class UTF1632Prober(CharSetProber): + """ + This class simply looks for occurrences of zero bytes, and infers + whether the file is UTF16 or UTF32 (low-endian or big-endian) + For instance, files looking like ( \0 \0 \0 [nonzero] )+ + have a good probability to be UTF32BE. Files looking like ( \0 [nonzero] )+ + may be guessed to be UTF16BE, and inversely for little-endian varieties. + """ + + # how many logical characters to scan before feeling confident of prediction + MIN_CHARS_FOR_DETECTION = 20 + # a fixed constant ratio of expected zeros or non-zeros in modulo-position. + EXPECTED_RATIO = 0.94 + + def __init__(self) -> None: + super().__init__() + self.position = 0 + self.zeros_at_mod = [0] * 4 + self.nonzeros_at_mod = [0] * 4 + self._state = ProbingState.DETECTING + self.quad = [0, 0, 0, 0] + self.invalid_utf16be = False + self.invalid_utf16le = False + self.invalid_utf32be = False + self.invalid_utf32le = False + self.first_half_surrogate_pair_detected_16be = False + self.first_half_surrogate_pair_detected_16le = False + self.reset() + + def reset(self) -> None: + super().reset() + self.position = 0 + self.zeros_at_mod = [0] * 4 + self.nonzeros_at_mod = [0] * 4 + self._state = ProbingState.DETECTING + self.invalid_utf16be = False + self.invalid_utf16le = False + self.invalid_utf32be = False + self.invalid_utf32le = False + self.first_half_surrogate_pair_detected_16be = False + self.first_half_surrogate_pair_detected_16le = False + self.quad = [0, 0, 0, 0] + + @property + def charset_name(self) -> str: + if self.is_likely_utf32be(): + return "utf-32be" + if self.is_likely_utf32le(): + return "utf-32le" + if self.is_likely_utf16be(): + return "utf-16be" + if self.is_likely_utf16le(): + return "utf-16le" + # default to something valid + return "utf-16" + + @property + def language(self) -> str: + return "" + + def approx_32bit_chars(self) -> float: + return max(1.0, self.position / 4.0) + + def approx_16bit_chars(self) -> float: + return max(1.0, self.position / 2.0) + + def is_likely_utf32be(self) -> bool: + approx_chars = self.approx_32bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[1] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[2] / approx_chars > self.EXPECTED_RATIO + and self.nonzeros_at_mod[3] / approx_chars > self.EXPECTED_RATIO + and not self.invalid_utf32be + ) + + def is_likely_utf32le(self) -> bool: + approx_chars = self.approx_32bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[1] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[2] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[3] / approx_chars > self.EXPECTED_RATIO + and not self.invalid_utf32le + ) + + def is_likely_utf16be(self) -> bool: + approx_chars = self.approx_16bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + (self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars + > self.EXPECTED_RATIO + and (self.zeros_at_mod[0] + self.zeros_at_mod[2]) / approx_chars + > self.EXPECTED_RATIO + and not self.invalid_utf16be + ) + + def is_likely_utf16le(self) -> bool: + approx_chars = self.approx_16bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + (self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars + > self.EXPECTED_RATIO + and (self.zeros_at_mod[1] + self.zeros_at_mod[3]) / approx_chars + > self.EXPECTED_RATIO + and not self.invalid_utf16le + ) + + def validate_utf32_characters(self, quad: List[int]) -> None: + """ + Validate if the quad of bytes is valid UTF-32. + + UTF-32 is valid in the range 0x00000000 - 0x0010FFFF + excluding 0x0000D800 - 0x0000DFFF + + https://en.wikipedia.org/wiki/UTF-32 + """ + if ( + quad[0] != 0 + or quad[1] > 0x10 + or (quad[0] == 0 and quad[1] == 0 and 0xD8 <= quad[2] <= 0xDF) + ): + self.invalid_utf32be = True + if ( + quad[3] != 0 + or quad[2] > 0x10 + or (quad[3] == 0 and quad[2] == 0 and 0xD8 <= quad[1] <= 0xDF) + ): + self.invalid_utf32le = True + + def validate_utf16_characters(self, pair: List[int]) -> None: + """ + Validate if the pair of bytes is valid UTF-16. + + UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF + with an exception for surrogate pairs, which must be in the range + 0xD800-0xDBFF followed by 0xDC00-0xDFFF + + https://en.wikipedia.org/wiki/UTF-16 + """ + if not self.first_half_surrogate_pair_detected_16be: + if 0xD8 <= pair[0] <= 0xDB: + self.first_half_surrogate_pair_detected_16be = True + elif 0xDC <= pair[0] <= 0xDF: + self.invalid_utf16be = True + else: + if 0xDC <= pair[0] <= 0xDF: + self.first_half_surrogate_pair_detected_16be = False + else: + self.invalid_utf16be = True + + if not self.first_half_surrogate_pair_detected_16le: + if 0xD8 <= pair[1] <= 0xDB: + self.first_half_surrogate_pair_detected_16le = True + elif 0xDC <= pair[1] <= 0xDF: + self.invalid_utf16le = True + else: + if 0xDC <= pair[1] <= 0xDF: + self.first_half_surrogate_pair_detected_16le = False + else: + self.invalid_utf16le = True + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + for c in byte_str: + mod4 = self.position % 4 + self.quad[mod4] = c + if mod4 == 3: + self.validate_utf32_characters(self.quad) + self.validate_utf16_characters(self.quad[0:2]) + self.validate_utf16_characters(self.quad[2:4]) + if c == 0: + self.zeros_at_mod[mod4] += 1 + else: + self.nonzeros_at_mod[mod4] += 1 + self.position += 1 + return self.state + + @property + def state(self) -> ProbingState: + if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}: + # terminal, decided states + return self._state + if self.get_confidence() > 0.80: + self._state = ProbingState.FOUND_IT + elif self.position > 4 * 1024: + # if we get to 4kb into the file, and we can't conclude it's UTF, + # let's give up + self._state = ProbingState.NOT_ME + return self._state + + def get_confidence(self) -> float: + return ( + 0.85 + if ( + self.is_likely_utf16le() + or self.is_likely_utf16be() + or self.is_likely_utf32le() + or self.is_likely_utf32be() + ) + else 0.00 + ) diff --git a/libs/common/chardet/utf8prober.py b/libs/common/chardet/utf8prober.py index 6c3196cc..d96354d9 100644 --- a/libs/common/chardet/utf8prober.py +++ b/libs/common/chardet/utf8prober.py @@ -25,45 +25,46 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .charsetprober import CharSetProber -from .enums import ProbingState, MachineState -from .codingstatemachine import CodingStateMachine -from .mbcssm import UTF8_SM_MODEL +from typing import Union +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .enums import MachineState, ProbingState +from .mbcssm import UTF8_SM_MODEL class UTF8Prober(CharSetProber): ONE_CHAR_PROB = 0.5 - def __init__(self): - super(UTF8Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) - self._num_mb_chars = None + self._num_mb_chars = 0 self.reset() - def reset(self): - super(UTF8Prober, self).reset() + def reset(self) -> None: + super().reset() self.coding_sm.reset() self._num_mb_chars = 0 @property - def charset_name(self): + def charset_name(self) -> str: return "utf-8" @property - def language(self): + def language(self) -> str: return "" - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: coding_state = self.coding_sm.next_state(c) if coding_state == MachineState.ERROR: self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: if self.coding_sm.get_current_charlen() >= 2: self._num_mb_chars += 1 @@ -73,10 +74,9 @@ class UTF8Prober(CharSetProber): return self.state - def get_confidence(self): + def get_confidence(self) -> float: unlike = 0.99 if self._num_mb_chars < 6: - unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars + unlike *= self.ONE_CHAR_PROB**self._num_mb_chars return 1.0 - unlike - else: - return unlike + return unlike diff --git a/libs/common/chardet/version.py b/libs/common/chardet/version.py index 70369b9d..c5e9d85c 100644 --- a/libs/common/chardet/version.py +++ b/libs/common/chardet/version.py @@ -1,9 +1,9 @@ """ This module exists only to simplify retrieving the version number of chardet -from within setup.py and from chardet subpackages. +from within setuptools and from chardet subpackages. :author: Dan Blanchard (dan.blanchard@gmail.com) """ -__version__ = "4.0.0" -VERSION = __version__.split('.') +__version__ = "5.1.0" +VERSION = __version__.split(".") diff --git a/libs/common/charset_normalizer/__init__.py b/libs/common/charset_normalizer/__init__.py new file mode 100644 index 00000000..2dcaf56f --- /dev/null +++ b/libs/common/charset_normalizer/__init__.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +""" +Charset-Normalizer +~~~~~~~~~~~~~~ +The Real First Universal Charset Detector. +A library that helps you read text from an unknown charset encoding. +Motivated by chardet, This package is trying to resolve the issue by taking a new approach. +All IANA character set names for which the Python core library provides codecs are supported. + +Basic usage: + >>> from charset_normalizer import from_bytes + >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8')) + >>> best_guess = results.best() + >>> str(best_guess) + 'Bсеки човек има право на образование. Oбразованието!' + +Others methods and usages are available - see the full documentation +at . +:copyright: (c) 2021 by Ahmed TAHRI +:license: MIT, see LICENSE for more details. +""" +import logging + +from .api import from_bytes, from_fp, from_path, normalize +from .legacy import ( + CharsetDetector, + CharsetDoctor, + CharsetNormalizerMatch, + CharsetNormalizerMatches, + detect, +) +from .models import CharsetMatch, CharsetMatches +from .utils import set_logging_handler +from .version import VERSION, __version__ + +__all__ = ( + "from_fp", + "from_path", + "from_bytes", + "normalize", + "detect", + "CharsetMatch", + "CharsetMatches", + "CharsetNormalizerMatch", + "CharsetNormalizerMatches", + "CharsetDetector", + "CharsetDoctor", + "__version__", + "VERSION", + "set_logging_handler", +) + +# Attach a NullHandler to the top level logger by default +# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library + +logging.getLogger("charset_normalizer").addHandler(logging.NullHandler()) diff --git a/libs/common/charset_normalizer/api.py b/libs/common/charset_normalizer/api.py new file mode 100644 index 00000000..72907f94 --- /dev/null +++ b/libs/common/charset_normalizer/api.py @@ -0,0 +1,584 @@ +import logging +import warnings +from os import PathLike +from os.path import basename, splitext +from typing import Any, BinaryIO, List, Optional, Set + +from .cd import ( + coherence_ratio, + encoding_languages, + mb_encoding_languages, + merge_coherence_ratios, +) +from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE +from .md import mess_ratio +from .models import CharsetMatch, CharsetMatches +from .utils import ( + any_specified_encoding, + cut_sequence_chunks, + iana_name, + identify_sig_or_bom, + is_cp_similar, + is_multi_byte_encoding, + should_strip_sig_or_bom, +) + +# Will most likely be controversial +# logging.addLevelName(TRACE, "TRACE") +logger = logging.getLogger("charset_normalizer") +explain_handler = logging.StreamHandler() +explain_handler.setFormatter( + logging.Formatter("%(asctime)s | %(levelname)s | %(message)s") +) + + +def from_bytes( + sequences: bytes, + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.2, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, + preemptive_behaviour: bool = True, + explain: bool = False, +) -> CharsetMatches: + """ + Given a raw bytes sequence, return the best possibles charset usable to render str objects. + If there is no results, it is a strong indicator that the source is binary/not text. + By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence. + And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will. + + The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page + but never take it for granted. Can improve the performance. + + You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that + purpose. + + This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32. + By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain' + toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging. + Custom logging format and handler can be set manually. + """ + + if not isinstance(sequences, (bytearray, bytes)): + raise TypeError( + "Expected object of type bytes or bytearray, got: {0}".format( + type(sequences) + ) + ) + + if explain: + previous_logger_level: int = logger.level + logger.addHandler(explain_handler) + logger.setLevel(TRACE) + + length: int = len(sequences) + + if length == 0: + logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.") + if explain: + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level or logging.WARNING) + return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")]) + + if cp_isolation is not None: + logger.log( + TRACE, + "cp_isolation is set. use this flag for debugging purpose. " + "limited list of encoding allowed : %s.", + ", ".join(cp_isolation), + ) + cp_isolation = [iana_name(cp, False) for cp in cp_isolation] + else: + cp_isolation = [] + + if cp_exclusion is not None: + logger.log( + TRACE, + "cp_exclusion is set. use this flag for debugging purpose. " + "limited list of encoding excluded : %s.", + ", ".join(cp_exclusion), + ) + cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion] + else: + cp_exclusion = [] + + if length <= (chunk_size * steps): + logger.log( + TRACE, + "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.", + steps, + chunk_size, + length, + ) + steps = 1 + chunk_size = length + + if steps > 1 and length / steps < chunk_size: + chunk_size = int(length / steps) + + is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE + is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE + + if is_too_small_sequence: + logger.log( + TRACE, + "Trying to detect encoding from a tiny portion of ({}) byte(s).".format( + length + ), + ) + elif is_too_large_sequence: + logger.log( + TRACE, + "Using lazy str decoding because the payload is quite large, ({}) byte(s).".format( + length + ), + ) + + prioritized_encodings: List[str] = [] + + specified_encoding: Optional[str] = ( + any_specified_encoding(sequences) if preemptive_behaviour else None + ) + + if specified_encoding is not None: + prioritized_encodings.append(specified_encoding) + logger.log( + TRACE, + "Detected declarative mark in sequence. Priority +1 given for %s.", + specified_encoding, + ) + + tested: Set[str] = set() + tested_but_hard_failure: List[str] = [] + tested_but_soft_failure: List[str] = [] + + fallback_ascii: Optional[CharsetMatch] = None + fallback_u8: Optional[CharsetMatch] = None + fallback_specified: Optional[CharsetMatch] = None + + results: CharsetMatches = CharsetMatches() + + sig_encoding, sig_payload = identify_sig_or_bom(sequences) + + if sig_encoding is not None: + prioritized_encodings.append(sig_encoding) + logger.log( + TRACE, + "Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.", + len(sig_payload), + sig_encoding, + ) + + prioritized_encodings.append("ascii") + + if "utf_8" not in prioritized_encodings: + prioritized_encodings.append("utf_8") + + for encoding_iana in prioritized_encodings + IANA_SUPPORTED: + + if cp_isolation and encoding_iana not in cp_isolation: + continue + + if cp_exclusion and encoding_iana in cp_exclusion: + continue + + if encoding_iana in tested: + continue + + tested.add(encoding_iana) + + decoded_payload: Optional[str] = None + bom_or_sig_available: bool = sig_encoding == encoding_iana + strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom( + encoding_iana + ) + + if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: + logger.log( + TRACE, + "Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.", + encoding_iana, + ) + continue + + try: + is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana) + except (ModuleNotFoundError, ImportError): + logger.log( + TRACE, + "Encoding %s does not provide an IncrementalDecoder", + encoding_iana, + ) + continue + + try: + if is_too_large_sequence and is_multi_byte_decoder is False: + str( + sequences[: int(50e4)] + if strip_sig_or_bom is False + else sequences[len(sig_payload) : int(50e4)], + encoding=encoding_iana, + ) + else: + decoded_payload = str( + sequences + if strip_sig_or_bom is False + else sequences[len(sig_payload) :], + encoding=encoding_iana, + ) + except (UnicodeDecodeError, LookupError) as e: + if not isinstance(e, LookupError): + logger.log( + TRACE, + "Code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + tested_but_hard_failure.append(encoding_iana) + continue + + similar_soft_failure_test: bool = False + + for encoding_soft_failed in tested_but_soft_failure: + if is_cp_similar(encoding_iana, encoding_soft_failed): + similar_soft_failure_test = True + break + + if similar_soft_failure_test: + logger.log( + TRACE, + "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!", + encoding_iana, + encoding_soft_failed, + ) + continue + + r_ = range( + 0 if not bom_or_sig_available else len(sig_payload), + length, + int(length / steps), + ) + + multi_byte_bonus: bool = ( + is_multi_byte_decoder + and decoded_payload is not None + and len(decoded_payload) < length + ) + + if multi_byte_bonus: + logger.log( + TRACE, + "Code page %s is a multi byte encoding table and it appear that at least one character " + "was encoded using n-bytes.", + encoding_iana, + ) + + max_chunk_gave_up: int = int(len(r_) / 4) + + max_chunk_gave_up = max(max_chunk_gave_up, 2) + early_stop_count: int = 0 + lazy_str_hard_failure = False + + md_chunks: List[str] = [] + md_ratios = [] + + try: + for chunk in cut_sequence_chunks( + sequences, + encoding_iana, + r_, + chunk_size, + bom_or_sig_available, + strip_sig_or_bom, + sig_payload, + is_multi_byte_decoder, + decoded_payload, + ): + md_chunks.append(chunk) + + md_ratios.append(mess_ratio(chunk, threshold)) + + if md_ratios[-1] >= threshold: + early_stop_count += 1 + + if (early_stop_count >= max_chunk_gave_up) or ( + bom_or_sig_available and strip_sig_or_bom is False + ): + break + except UnicodeDecodeError as e: # Lazy str loading may have missed something there + logger.log( + TRACE, + "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + early_stop_count = max_chunk_gave_up + lazy_str_hard_failure = True + + # We might want to check the sequence again with the whole content + # Only if initial MD tests passes + if ( + not lazy_str_hard_failure + and is_too_large_sequence + and not is_multi_byte_decoder + ): + try: + sequences[int(50e3) :].decode(encoding_iana, errors="strict") + except UnicodeDecodeError as e: + logger.log( + TRACE, + "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + tested_but_hard_failure.append(encoding_iana) + continue + + mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0 + if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up: + tested_but_soft_failure.append(encoding_iana) + logger.log( + TRACE, + "%s was excluded because of initial chaos probing. Gave up %i time(s). " + "Computed mean chaos is %f %%.", + encoding_iana, + early_stop_count, + round(mean_mess_ratio * 100, ndigits=3), + ) + # Preparing those fallbacks in case we got nothing. + if ( + encoding_iana in ["ascii", "utf_8", specified_encoding] + and not lazy_str_hard_failure + ): + fallback_entry = CharsetMatch( + sequences, encoding_iana, threshold, False, [], decoded_payload + ) + if encoding_iana == specified_encoding: + fallback_specified = fallback_entry + elif encoding_iana == "ascii": + fallback_ascii = fallback_entry + else: + fallback_u8 = fallback_entry + continue + + logger.log( + TRACE, + "%s passed initial chaos probing. Mean measured chaos is %f %%", + encoding_iana, + round(mean_mess_ratio * 100, ndigits=3), + ) + + if not is_multi_byte_decoder: + target_languages: List[str] = encoding_languages(encoding_iana) + else: + target_languages = mb_encoding_languages(encoding_iana) + + if target_languages: + logger.log( + TRACE, + "{} should target any language(s) of {}".format( + encoding_iana, str(target_languages) + ), + ) + + cd_ratios = [] + + # We shall skip the CD when its about ASCII + # Most of the time its not relevant to run "language-detection" on it. + if encoding_iana != "ascii": + for chunk in md_chunks: + chunk_languages = coherence_ratio( + chunk, 0.1, ",".join(target_languages) if target_languages else None + ) + + cd_ratios.append(chunk_languages) + + cd_ratios_merged = merge_coherence_ratios(cd_ratios) + + if cd_ratios_merged: + logger.log( + TRACE, + "We detected language {} using {}".format( + cd_ratios_merged, encoding_iana + ), + ) + + results.append( + CharsetMatch( + sequences, + encoding_iana, + mean_mess_ratio, + bom_or_sig_available, + cd_ratios_merged, + decoded_payload, + ) + ) + + if ( + encoding_iana in [specified_encoding, "ascii", "utf_8"] + and mean_mess_ratio < 0.1 + ): + logger.debug( + "Encoding detection: %s is most likely the one.", encoding_iana + ) + if explain: + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + return CharsetMatches([results[encoding_iana]]) + + if encoding_iana == sig_encoding: + logger.debug( + "Encoding detection: %s is most likely the one as we detected a BOM or SIG within " + "the beginning of the sequence.", + encoding_iana, + ) + if explain: + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + return CharsetMatches([results[encoding_iana]]) + + if len(results) == 0: + if fallback_u8 or fallback_ascii or fallback_specified: + logger.log( + TRACE, + "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.", + ) + + if fallback_specified: + logger.debug( + "Encoding detection: %s will be used as a fallback match", + fallback_specified.encoding, + ) + results.append(fallback_specified) + elif ( + (fallback_u8 and fallback_ascii is None) + or ( + fallback_u8 + and fallback_ascii + and fallback_u8.fingerprint != fallback_ascii.fingerprint + ) + or (fallback_u8 is not None) + ): + logger.debug("Encoding detection: utf_8 will be used as a fallback match") + results.append(fallback_u8) + elif fallback_ascii: + logger.debug("Encoding detection: ascii will be used as a fallback match") + results.append(fallback_ascii) + + if results: + logger.debug( + "Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.", + results.best().encoding, # type: ignore + len(results) - 1, + ) + else: + logger.debug("Encoding detection: Unable to determine any suitable charset.") + + if explain: + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + + return results + + +def from_fp( + fp: BinaryIO, + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.20, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, + preemptive_behaviour: bool = True, + explain: bool = False, +) -> CharsetMatches: + """ + Same thing than the function from_bytes but using a file pointer that is already ready. + Will not close the file pointer. + """ + return from_bytes( + fp.read(), + steps, + chunk_size, + threshold, + cp_isolation, + cp_exclusion, + preemptive_behaviour, + explain, + ) + + +def from_path( + path: "PathLike[Any]", + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.20, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, + preemptive_behaviour: bool = True, + explain: bool = False, +) -> CharsetMatches: + """ + Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode. + Can raise IOError. + """ + with open(path, "rb") as fp: + return from_fp( + fp, + steps, + chunk_size, + threshold, + cp_isolation, + cp_exclusion, + preemptive_behaviour, + explain, + ) + + +def normalize( + path: "PathLike[Any]", + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.20, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, + preemptive_behaviour: bool = True, +) -> CharsetMatch: + """ + Take a (text-based) file path and try to create another file next to it, this time using UTF-8. + """ + warnings.warn( + "normalize is deprecated and will be removed in 3.0", + DeprecationWarning, + ) + + results = from_path( + path, + steps, + chunk_size, + threshold, + cp_isolation, + cp_exclusion, + preemptive_behaviour, + ) + + filename = basename(path) + target_extensions = list(splitext(filename)) + + if len(results) == 0: + raise IOError( + 'Unable to normalize "{}", no encoding charset seems to fit.'.format( + filename + ) + ) + + result = results.best() + + target_extensions[0] += "-" + result.encoding # type: ignore + + with open( + "{}".format(str(path).replace(filename, "".join(target_extensions))), "wb" + ) as fp: + fp.write(result.output()) # type: ignore + + return result # type: ignore diff --git a/libs/common/charset_normalizer/assets/__init__.py b/libs/common/charset_normalizer/assets/__init__.py new file mode 100644 index 00000000..3c33ba30 --- /dev/null +++ b/libs/common/charset_normalizer/assets/__init__.py @@ -0,0 +1,1122 @@ +# -*- coding: utf-8 -*- +from typing import Dict, List + +FREQUENCIES: Dict[str, List[str]] = { + "English": [ + "e", + "a", + "t", + "i", + "o", + "n", + "s", + "r", + "h", + "l", + "d", + "c", + "u", + "m", + "f", + "p", + "g", + "w", + "y", + "b", + "v", + "k", + "x", + "j", + "z", + "q", + ], + "German": [ + "e", + "n", + "i", + "r", + "s", + "t", + "a", + "d", + "h", + "u", + "l", + "g", + "o", + "c", + "m", + "b", + "f", + "k", + "w", + "z", + "p", + "v", + "ü", + "ä", + "ö", + "j", + ], + "French": [ + "e", + "a", + "s", + "n", + "i", + "t", + "r", + "l", + "u", + "o", + "d", + "c", + "p", + "m", + "é", + "v", + "g", + "f", + "b", + "h", + "q", + "à", + "x", + "è", + "y", + "j", + ], + "Dutch": [ + "e", + "n", + "a", + "i", + "r", + "t", + "o", + "d", + "s", + "l", + "g", + "h", + "v", + "m", + "u", + "k", + "c", + "p", + "b", + "w", + "j", + "z", + "f", + "y", + "x", + "ë", + ], + "Italian": [ + "e", + "i", + "a", + "o", + "n", + "l", + "t", + "r", + "s", + "c", + "d", + "u", + "p", + "m", + "g", + "v", + "f", + "b", + "z", + "h", + "q", + "è", + "à", + "k", + "y", + "ò", + ], + "Polish": [ + "a", + "i", + "o", + "e", + "n", + "r", + "z", + "w", + "s", + "c", + "t", + "k", + "y", + "d", + "p", + "m", + "u", + "l", + "j", + "ł", + "g", + "b", + "h", + "ą", + "ę", + "ó", + ], + "Spanish": [ + "e", + "a", + "o", + "n", + "s", + "r", + "i", + "l", + "d", + "t", + "c", + "u", + "m", + "p", + "b", + "g", + "v", + "f", + "y", + "ó", + "h", + "q", + "í", + "j", + "z", + "á", + ], + "Russian": [ + "о", + "а", + "е", + "и", + "н", + "с", + "т", + "р", + "в", + "л", + "к", + "м", + "д", + "п", + "у", + "г", + "я", + "ы", + "з", + "б", + "й", + "ь", + "ч", + "х", + "ж", + "ц", + ], + "Japanese": [ + "の", + "に", + "る", + "た", + "は", + "ー", + "と", + "し", + "を", + "で", + "て", + "が", + "い", + "ン", + "れ", + "な", + "年", + "ス", + "っ", + "ル", + "か", + "ら", + "あ", + "さ", + "も", + "り", + ], + "Portuguese": [ + "a", + "e", + "o", + "s", + "i", + "r", + "d", + "n", + "t", + "m", + "u", + "c", + "l", + "p", + "g", + "v", + "b", + "f", + "h", + "ã", + "q", + "é", + "ç", + "á", + "z", + "í", + ], + "Swedish": [ + "e", + "a", + "n", + "r", + "t", + "s", + "i", + "l", + "d", + "o", + "m", + "k", + "g", + "v", + "h", + "f", + "u", + "p", + "ä", + "c", + "b", + "ö", + "å", + "y", + "j", + "x", + ], + "Chinese": [ + "的", + "一", + "是", + "不", + "了", + "在", + "人", + "有", + "我", + "他", + "这", + "个", + "们", + "中", + "来", + "上", + "大", + "为", + "和", + "国", + "地", + "到", + "以", + "说", + "时", + "要", + "就", + "出", + "会", + ], + "Ukrainian": [ + "о", + "а", + "н", + "і", + "и", + "р", + "в", + "т", + "е", + "с", + "к", + "л", + "у", + "д", + "м", + "п", + "з", + "я", + "ь", + "б", + "г", + "й", + "ч", + "х", + "ц", + "ї", + ], + "Norwegian": [ + "e", + "r", + "n", + "t", + "a", + "s", + "i", + "o", + "l", + "d", + "g", + "k", + "m", + "v", + "f", + "p", + "u", + "b", + "h", + "å", + "y", + "j", + "ø", + "c", + "æ", + "w", + ], + "Finnish": [ + "a", + "i", + "n", + "t", + "e", + "s", + "l", + "o", + "u", + "k", + "ä", + "m", + "r", + "v", + "j", + "h", + "p", + "y", + "d", + "ö", + "g", + "c", + "b", + "f", + "w", + "z", + ], + "Vietnamese": [ + "n", + "h", + "t", + "i", + "c", + "g", + "a", + "o", + "u", + "m", + "l", + "r", + "à", + "đ", + "s", + "e", + "v", + "p", + "b", + "y", + "ư", + "d", + "á", + "k", + "ộ", + "ế", + ], + "Czech": [ + "o", + "e", + "a", + "n", + "t", + "s", + "i", + "l", + "v", + "r", + "k", + "d", + "u", + "m", + "p", + "í", + "c", + "h", + "z", + "á", + "y", + "j", + "b", + "ě", + "é", + "ř", + ], + "Hungarian": [ + "e", + "a", + "t", + "l", + "s", + "n", + "k", + "r", + "i", + "o", + "z", + "á", + "é", + "g", + "m", + "b", + "y", + "v", + "d", + "h", + "u", + "p", + "j", + "ö", + "f", + "c", + ], + "Korean": [ + "이", + "다", + "에", + "의", + "는", + "로", + "하", + "을", + "가", + "고", + "지", + "서", + "한", + "은", + "기", + "으", + "년", + "대", + "사", + "시", + "를", + "리", + "도", + "인", + "스", + "일", + ], + "Indonesian": [ + "a", + "n", + "e", + "i", + "r", + "t", + "u", + "s", + "d", + "k", + "m", + "l", + "g", + "p", + "b", + "o", + "h", + "y", + "j", + "c", + "w", + "f", + "v", + "z", + "x", + "q", + ], + "Turkish": [ + "a", + "e", + "i", + "n", + "r", + "l", + "ı", + "k", + "d", + "t", + "s", + "m", + "y", + "u", + "o", + "b", + "ü", + "ş", + "v", + "g", + "z", + "h", + "c", + "p", + "ç", + "ğ", + ], + "Romanian": [ + "e", + "i", + "a", + "r", + "n", + "t", + "u", + "l", + "o", + "c", + "s", + "d", + "p", + "m", + "ă", + "f", + "v", + "î", + "g", + "b", + "ș", + "ț", + "z", + "h", + "â", + "j", + ], + "Farsi": [ + "ا", + "ی", + "ر", + "د", + "ن", + "ه", + "و", + "م", + "ت", + "ب", + "س", + "ل", + "ک", + "ش", + "ز", + "ف", + "گ", + "ع", + "خ", + "ق", + "ج", + "آ", + "پ", + "ح", + "ط", + "ص", + ], + "Arabic": [ + "ا", + "ل", + "ي", + "م", + "و", + "ن", + "ر", + "ت", + "ب", + "ة", + "ع", + "د", + "س", + "ف", + "ه", + "ك", + "ق", + "أ", + "ح", + "ج", + "ش", + "ط", + "ص", + "ى", + "خ", + "إ", + ], + "Danish": [ + "e", + "r", + "n", + "t", + "a", + "i", + "s", + "d", + "l", + "o", + "g", + "m", + "k", + "f", + "v", + "u", + "b", + "h", + "p", + "å", + "y", + "ø", + "æ", + "c", + "j", + "w", + ], + "Serbian": [ + "а", + "и", + "о", + "е", + "н", + "р", + "с", + "у", + "т", + "к", + "ј", + "в", + "д", + "м", + "п", + "л", + "г", + "з", + "б", + "a", + "i", + "e", + "o", + "n", + "ц", + "ш", + ], + "Lithuanian": [ + "i", + "a", + "s", + "o", + "r", + "e", + "t", + "n", + "u", + "k", + "m", + "l", + "p", + "v", + "d", + "j", + "g", + "ė", + "b", + "y", + "ų", + "š", + "ž", + "c", + "ą", + "į", + ], + "Slovene": [ + "e", + "a", + "i", + "o", + "n", + "r", + "s", + "l", + "t", + "j", + "v", + "k", + "d", + "p", + "m", + "u", + "z", + "b", + "g", + "h", + "č", + "c", + "š", + "ž", + "f", + "y", + ], + "Slovak": [ + "o", + "a", + "e", + "n", + "i", + "r", + "v", + "t", + "s", + "l", + "k", + "d", + "m", + "p", + "u", + "c", + "h", + "j", + "b", + "z", + "á", + "y", + "ý", + "í", + "č", + "é", + ], + "Hebrew": [ + "י", + "ו", + "ה", + "ל", + "ר", + "ב", + "ת", + "מ", + "א", + "ש", + "נ", + "ע", + "ם", + "ד", + "ק", + "ח", + "פ", + "ס", + "כ", + "ג", + "ט", + "צ", + "ן", + "ז", + "ך", + ], + "Bulgarian": [ + "а", + "и", + "о", + "е", + "н", + "т", + "р", + "с", + "в", + "л", + "к", + "д", + "п", + "м", + "з", + "г", + "я", + "ъ", + "у", + "б", + "ч", + "ц", + "й", + "ж", + "щ", + "х", + ], + "Croatian": [ + "a", + "i", + "o", + "e", + "n", + "r", + "j", + "s", + "t", + "u", + "k", + "l", + "v", + "d", + "m", + "p", + "g", + "z", + "b", + "c", + "č", + "h", + "š", + "ž", + "ć", + "f", + ], + "Hindi": [ + "क", + "र", + "स", + "न", + "त", + "म", + "ह", + "प", + "य", + "ल", + "व", + "ज", + "द", + "ग", + "ब", + "श", + "ट", + "अ", + "ए", + "थ", + "भ", + "ड", + "च", + "ध", + "ष", + "इ", + ], + "Estonian": [ + "a", + "i", + "e", + "s", + "t", + "l", + "u", + "n", + "o", + "k", + "r", + "d", + "m", + "v", + "g", + "p", + "j", + "h", + "ä", + "b", + "õ", + "ü", + "f", + "c", + "ö", + "y", + ], + "Simple English": [ + "e", + "a", + "t", + "i", + "o", + "n", + "s", + "r", + "h", + "l", + "d", + "c", + "m", + "u", + "f", + "p", + "g", + "w", + "b", + "y", + "v", + "k", + "j", + "x", + "z", + "q", + ], + "Thai": [ + "า", + "น", + "ร", + "อ", + "ก", + "เ", + "ง", + "ม", + "ย", + "ล", + "ว", + "ด", + "ท", + "ส", + "ต", + "ะ", + "ป", + "บ", + "ค", + "ห", + "แ", + "จ", + "พ", + "ช", + "ข", + "ใ", + ], + "Greek": [ + "α", + "τ", + "ο", + "ι", + "ε", + "ν", + "ρ", + "σ", + "κ", + "η", + "π", + "ς", + "υ", + "μ", + "λ", + "ί", + "ό", + "ά", + "γ", + "έ", + "δ", + "ή", + "ω", + "χ", + "θ", + "ύ", + ], + "Tamil": [ + "க", + "த", + "ப", + "ட", + "ர", + "ம", + "ல", + "ன", + "வ", + "ற", + "ய", + "ள", + "ச", + "ந", + "இ", + "ண", + "அ", + "ஆ", + "ழ", + "ங", + "எ", + "உ", + "ஒ", + "ஸ", + ], + "Classical Chinese": [ + "之", + "年", + "為", + "也", + "以", + "一", + "人", + "其", + "者", + "國", + "有", + "二", + "十", + "於", + "曰", + "三", + "不", + "大", + "而", + "子", + "中", + "五", + "四", + ], + "Kazakh": [ + "а", + "ы", + "е", + "н", + "т", + "р", + "л", + "і", + "д", + "с", + "м", + "қ", + "к", + "о", + "б", + "и", + "у", + "ғ", + "ж", + "ң", + "з", + "ш", + "й", + "п", + "г", + "ө", + ], +} diff --git a/libs/common/charset_normalizer/cd.py b/libs/common/charset_normalizer/cd.py new file mode 100644 index 00000000..ee4b7424 --- /dev/null +++ b/libs/common/charset_normalizer/cd.py @@ -0,0 +1,339 @@ +import importlib +from codecs import IncrementalDecoder +from collections import Counter +from functools import lru_cache +from typing import Counter as TypeCounter, Dict, List, Optional, Tuple + +from .assets import FREQUENCIES +from .constant import KO_NAMES, LANGUAGE_SUPPORTED_COUNT, TOO_SMALL_SEQUENCE, ZH_NAMES +from .md import is_suspiciously_successive_range +from .models import CoherenceMatches +from .utils import ( + is_accentuated, + is_latin, + is_multi_byte_encoding, + is_unicode_range_secondary, + unicode_range, +) + + +def encoding_unicode_range(iana_name: str) -> List[str]: + """ + Return associated unicode ranges in a single byte code page. + """ + if is_multi_byte_encoding(iana_name): + raise IOError("Function not supported on multi-byte code page") + + decoder = importlib.import_module( + "encodings.{}".format(iana_name) + ).IncrementalDecoder + + p: IncrementalDecoder = decoder(errors="ignore") + seen_ranges: Dict[str, int] = {} + character_count: int = 0 + + for i in range(0x40, 0xFF): + chunk: str = p.decode(bytes([i])) + + if chunk: + character_range: Optional[str] = unicode_range(chunk) + + if character_range is None: + continue + + if is_unicode_range_secondary(character_range) is False: + if character_range not in seen_ranges: + seen_ranges[character_range] = 0 + seen_ranges[character_range] += 1 + character_count += 1 + + return sorted( + [ + character_range + for character_range in seen_ranges + if seen_ranges[character_range] / character_count >= 0.15 + ] + ) + + +def unicode_range_languages(primary_range: str) -> List[str]: + """ + Return inferred languages used with a unicode range. + """ + languages: List[str] = [] + + for language, characters in FREQUENCIES.items(): + for character in characters: + if unicode_range(character) == primary_range: + languages.append(language) + break + + return languages + + +@lru_cache() +def encoding_languages(iana_name: str) -> List[str]: + """ + Single-byte encoding language association. Some code page are heavily linked to particular language(s). + This function does the correspondence. + """ + unicode_ranges: List[str] = encoding_unicode_range(iana_name) + primary_range: Optional[str] = None + + for specified_range in unicode_ranges: + if "Latin" not in specified_range: + primary_range = specified_range + break + + if primary_range is None: + return ["Latin Based"] + + return unicode_range_languages(primary_range) + + +@lru_cache() +def mb_encoding_languages(iana_name: str) -> List[str]: + """ + Multi-byte encoding language association. Some code page are heavily linked to particular language(s). + This function does the correspondence. + """ + if ( + iana_name.startswith("shift_") + or iana_name.startswith("iso2022_jp") + or iana_name.startswith("euc_j") + or iana_name == "cp932" + ): + return ["Japanese"] + if iana_name.startswith("gb") or iana_name in ZH_NAMES: + return ["Chinese", "Classical Chinese"] + if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES: + return ["Korean"] + + return [] + + +@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT) +def get_target_features(language: str) -> Tuple[bool, bool]: + """ + Determine main aspects from a supported language if it contains accents and if is pure Latin. + """ + target_have_accents: bool = False + target_pure_latin: bool = True + + for character in FREQUENCIES[language]: + if not target_have_accents and is_accentuated(character): + target_have_accents = True + if target_pure_latin and is_latin(character) is False: + target_pure_latin = False + + return target_have_accents, target_pure_latin + + +def alphabet_languages( + characters: List[str], ignore_non_latin: bool = False +) -> List[str]: + """ + Return associated languages associated to given characters. + """ + languages: List[Tuple[str, float]] = [] + + source_have_accents = any(is_accentuated(character) for character in characters) + + for language, language_characters in FREQUENCIES.items(): + + target_have_accents, target_pure_latin = get_target_features(language) + + if ignore_non_latin and target_pure_latin is False: + continue + + if target_have_accents is False and source_have_accents: + continue + + character_count: int = len(language_characters) + + character_match_count: int = len( + [c for c in language_characters if c in characters] + ) + + ratio: float = character_match_count / character_count + + if ratio >= 0.2: + languages.append((language, ratio)) + + languages = sorted(languages, key=lambda x: x[1], reverse=True) + + return [compatible_language[0] for compatible_language in languages] + + +def characters_popularity_compare( + language: str, ordered_characters: List[str] +) -> float: + """ + Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language. + The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit). + Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.) + """ + if language not in FREQUENCIES: + raise ValueError("{} not available".format(language)) + + character_approved_count: int = 0 + FREQUENCIES_language_set = set(FREQUENCIES[language]) + + for character in ordered_characters: + if character not in FREQUENCIES_language_set: + continue + + characters_before_source: List[str] = FREQUENCIES[language][ + 0 : FREQUENCIES[language].index(character) + ] + characters_after_source: List[str] = FREQUENCIES[language][ + FREQUENCIES[language].index(character) : + ] + characters_before: List[str] = ordered_characters[ + 0 : ordered_characters.index(character) + ] + characters_after: List[str] = ordered_characters[ + ordered_characters.index(character) : + ] + + before_match_count: int = len( + set(characters_before) & set(characters_before_source) + ) + + after_match_count: int = len( + set(characters_after) & set(characters_after_source) + ) + + if len(characters_before_source) == 0 and before_match_count <= 4: + character_approved_count += 1 + continue + + if len(characters_after_source) == 0 and after_match_count <= 4: + character_approved_count += 1 + continue + + if ( + before_match_count / len(characters_before_source) >= 0.4 + or after_match_count / len(characters_after_source) >= 0.4 + ): + character_approved_count += 1 + continue + + return character_approved_count / len(ordered_characters) + + +def alpha_unicode_split(decoded_sequence: str) -> List[str]: + """ + Given a decoded text sequence, return a list of str. Unicode range / alphabet separation. + Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list; + One containing the latin letters and the other hebrew. + """ + layers: Dict[str, str] = {} + + for character in decoded_sequence: + if character.isalpha() is False: + continue + + character_range: Optional[str] = unicode_range(character) + + if character_range is None: + continue + + layer_target_range: Optional[str] = None + + for discovered_range in layers: + if ( + is_suspiciously_successive_range(discovered_range, character_range) + is False + ): + layer_target_range = discovered_range + break + + if layer_target_range is None: + layer_target_range = character_range + + if layer_target_range not in layers: + layers[layer_target_range] = character.lower() + continue + + layers[layer_target_range] += character.lower() + + return list(layers.values()) + + +def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches: + """ + This function merge results previously given by the function coherence_ratio. + The return type is the same as coherence_ratio. + """ + per_language_ratios: Dict[str, List[float]] = {} + for result in results: + for sub_result in result: + language, ratio = sub_result + if language not in per_language_ratios: + per_language_ratios[language] = [ratio] + continue + per_language_ratios[language].append(ratio) + + merge = [ + ( + language, + round( + sum(per_language_ratios[language]) / len(per_language_ratios[language]), + 4, + ), + ) + for language in per_language_ratios + ] + + return sorted(merge, key=lambda x: x[1], reverse=True) + + +@lru_cache(maxsize=2048) +def coherence_ratio( + decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None +) -> CoherenceMatches: + """ + Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers. + A layer = Character extraction by alphabets/ranges. + """ + + results: List[Tuple[str, float]] = [] + ignore_non_latin: bool = False + + sufficient_match_count: int = 0 + + lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else [] + if "Latin Based" in lg_inclusion_list: + ignore_non_latin = True + lg_inclusion_list.remove("Latin Based") + + for layer in alpha_unicode_split(decoded_sequence): + sequence_frequencies: TypeCounter[str] = Counter(layer) + most_common = sequence_frequencies.most_common() + + character_count: int = sum(o for c, o in most_common) + + if character_count <= TOO_SMALL_SEQUENCE: + continue + + popular_character_ordered: List[str] = [c for c, o in most_common] + + for language in lg_inclusion_list or alphabet_languages( + popular_character_ordered, ignore_non_latin + ): + ratio: float = characters_popularity_compare( + language, popular_character_ordered + ) + + if ratio < threshold: + continue + elif ratio >= 0.8: + sufficient_match_count += 1 + + results.append((language, round(ratio, 4))) + + if sufficient_match_count >= 3: + break + + return sorted(results, key=lambda x: x[1], reverse=True) diff --git a/libs/common/charset_normalizer/cli/__init__.py b/libs/common/charset_normalizer/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/charset_normalizer/cli/normalizer.py b/libs/common/charset_normalizer/cli/normalizer.py new file mode 100644 index 00000000..b8b652a5 --- /dev/null +++ b/libs/common/charset_normalizer/cli/normalizer.py @@ -0,0 +1,295 @@ +import argparse +import sys +from json import dumps +from os.path import abspath +from platform import python_version +from typing import List, Optional + +try: + from unicodedata2 import unidata_version +except ImportError: + from unicodedata import unidata_version + +from charset_normalizer import from_fp +from charset_normalizer.models import CliDetectionResult +from charset_normalizer.version import __version__ + + +def query_yes_no(question: str, default: str = "yes") -> bool: + """Ask a yes/no question via input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits . + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is True for "yes" or False for "no". + + Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input + """ + valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if default is not None and choice == "": + return valid[default] + elif choice in valid: + return valid[choice] + else: + sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") + + +def cli_detect(argv: Optional[List[str]] = None) -> int: + """ + CLI assistant using ARGV and ArgumentParser + :param argv: + :return: 0 if everything is fine, anything else equal trouble + """ + parser = argparse.ArgumentParser( + description="The Real First Universal Charset Detector. " + "Discover originating encoding used on text file. " + "Normalize text to unicode." + ) + + parser.add_argument( + "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed" + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + default=False, + dest="verbose", + help="Display complementary information about file if any. " + "Stdout will contain logs about the detection process.", + ) + parser.add_argument( + "-a", + "--with-alternative", + action="store_true", + default=False, + dest="alternatives", + help="Output complementary possibilities if any. Top-level JSON WILL be a list.", + ) + parser.add_argument( + "-n", + "--normalize", + action="store_true", + default=False, + dest="normalize", + help="Permit to normalize input file. If not set, program does not write anything.", + ) + parser.add_argument( + "-m", + "--minimal", + action="store_true", + default=False, + dest="minimal", + help="Only output the charset detected to STDOUT. Disabling JSON output.", + ) + parser.add_argument( + "-r", + "--replace", + action="store_true", + default=False, + dest="replace", + help="Replace file when trying to normalize it instead of creating a new one.", + ) + parser.add_argument( + "-f", + "--force", + action="store_true", + default=False, + dest="force", + help="Replace file without asking if you are sure, use this flag with caution.", + ) + parser.add_argument( + "-t", + "--threshold", + action="store", + default=0.2, + type=float, + dest="threshold", + help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.", + ) + parser.add_argument( + "--version", + action="version", + version="Charset-Normalizer {} - Python {} - Unicode {}".format( + __version__, python_version(), unidata_version + ), + help="Show version information and exit.", + ) + + args = parser.parse_args(argv) + + if args.replace is True and args.normalize is False: + print("Use --replace in addition of --normalize only.", file=sys.stderr) + return 1 + + if args.force is True and args.replace is False: + print("Use --force in addition of --replace only.", file=sys.stderr) + return 1 + + if args.threshold < 0.0 or args.threshold > 1.0: + print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr) + return 1 + + x_ = [] + + for my_file in args.files: + + matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose) + + best_guess = matches.best() + + if best_guess is None: + print( + 'Unable to identify originating encoding for "{}". {}'.format( + my_file.name, + "Maybe try increasing maximum amount of chaos." + if args.threshold < 1.0 + else "", + ), + file=sys.stderr, + ) + x_.append( + CliDetectionResult( + abspath(my_file.name), + None, + [], + [], + "Unknown", + [], + False, + 1.0, + 0.0, + None, + True, + ) + ) + else: + x_.append( + CliDetectionResult( + abspath(my_file.name), + best_guess.encoding, + best_guess.encoding_aliases, + [ + cp + for cp in best_guess.could_be_from_charset + if cp != best_guess.encoding + ], + best_guess.language, + best_guess.alphabets, + best_guess.bom, + best_guess.percent_chaos, + best_guess.percent_coherence, + None, + True, + ) + ) + + if len(matches) > 1 and args.alternatives: + for el in matches: + if el != best_guess: + x_.append( + CliDetectionResult( + abspath(my_file.name), + el.encoding, + el.encoding_aliases, + [ + cp + for cp in el.could_be_from_charset + if cp != el.encoding + ], + el.language, + el.alphabets, + el.bom, + el.percent_chaos, + el.percent_coherence, + None, + False, + ) + ) + + if args.normalize is True: + + if best_guess.encoding.startswith("utf") is True: + print( + '"{}" file does not need to be normalized, as it already came from unicode.'.format( + my_file.name + ), + file=sys.stderr, + ) + if my_file.closed is False: + my_file.close() + continue + + o_: List[str] = my_file.name.split(".") + + if args.replace is False: + o_.insert(-1, best_guess.encoding) + if my_file.closed is False: + my_file.close() + elif ( + args.force is False + and query_yes_no( + 'Are you sure to normalize "{}" by replacing it ?'.format( + my_file.name + ), + "no", + ) + is False + ): + if my_file.closed is False: + my_file.close() + continue + + try: + x_[0].unicode_path = abspath("./{}".format(".".join(o_))) + + with open(x_[0].unicode_path, "w", encoding="utf-8") as fp: + fp.write(str(best_guess)) + except IOError as e: + print(str(e), file=sys.stderr) + if my_file.closed is False: + my_file.close() + return 2 + + if my_file.closed is False: + my_file.close() + + if args.minimal is False: + print( + dumps( + [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__, + ensure_ascii=True, + indent=4, + ) + ) + else: + for my_file in args.files: + print( + ", ".join( + [ + el.encoding or "undefined" + for el in x_ + if el.path == abspath(my_file.name) + ] + ) + ) + + return 0 + + +if __name__ == "__main__": + cli_detect() diff --git a/libs/common/charset_normalizer/constant.py b/libs/common/charset_normalizer/constant.py new file mode 100644 index 00000000..ac840c46 --- /dev/null +++ b/libs/common/charset_normalizer/constant.py @@ -0,0 +1,497 @@ +from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE +from encodings.aliases import aliases +from re import IGNORECASE, compile as re_compile +from typing import Dict, List, Set, Union + +from .assets import FREQUENCIES + +# Contain for each eligible encoding a list of/item bytes SIG/BOM +ENCODING_MARKS: Dict[str, Union[bytes, List[bytes]]] = { + "utf_8": BOM_UTF8, + "utf_7": [ + b"\x2b\x2f\x76\x38", + b"\x2b\x2f\x76\x39", + b"\x2b\x2f\x76\x2b", + b"\x2b\x2f\x76\x2f", + b"\x2b\x2f\x76\x38\x2d", + ], + "gb18030": b"\x84\x31\x95\x33", + "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE], + "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE], +} + +TOO_SMALL_SEQUENCE: int = 32 +TOO_BIG_SEQUENCE: int = int(10e6) + +UTF8_MAXIMAL_ALLOCATION: int = 1112064 + +UNICODE_RANGES_COMBINED: Dict[str, range] = { + "Control character": range(31 + 1), + "Basic Latin": range(32, 127 + 1), + "Latin-1 Supplement": range(128, 255 + 1), + "Latin Extended-A": range(256, 383 + 1), + "Latin Extended-B": range(384, 591 + 1), + "IPA Extensions": range(592, 687 + 1), + "Spacing Modifier Letters": range(688, 767 + 1), + "Combining Diacritical Marks": range(768, 879 + 1), + "Greek and Coptic": range(880, 1023 + 1), + "Cyrillic": range(1024, 1279 + 1), + "Cyrillic Supplement": range(1280, 1327 + 1), + "Armenian": range(1328, 1423 + 1), + "Hebrew": range(1424, 1535 + 1), + "Arabic": range(1536, 1791 + 1), + "Syriac": range(1792, 1871 + 1), + "Arabic Supplement": range(1872, 1919 + 1), + "Thaana": range(1920, 1983 + 1), + "NKo": range(1984, 2047 + 1), + "Samaritan": range(2048, 2111 + 1), + "Mandaic": range(2112, 2143 + 1), + "Syriac Supplement": range(2144, 2159 + 1), + "Arabic Extended-A": range(2208, 2303 + 1), + "Devanagari": range(2304, 2431 + 1), + "Bengali": range(2432, 2559 + 1), + "Gurmukhi": range(2560, 2687 + 1), + "Gujarati": range(2688, 2815 + 1), + "Oriya": range(2816, 2943 + 1), + "Tamil": range(2944, 3071 + 1), + "Telugu": range(3072, 3199 + 1), + "Kannada": range(3200, 3327 + 1), + "Malayalam": range(3328, 3455 + 1), + "Sinhala": range(3456, 3583 + 1), + "Thai": range(3584, 3711 + 1), + "Lao": range(3712, 3839 + 1), + "Tibetan": range(3840, 4095 + 1), + "Myanmar": range(4096, 4255 + 1), + "Georgian": range(4256, 4351 + 1), + "Hangul Jamo": range(4352, 4607 + 1), + "Ethiopic": range(4608, 4991 + 1), + "Ethiopic Supplement": range(4992, 5023 + 1), + "Cherokee": range(5024, 5119 + 1), + "Unified Canadian Aboriginal Syllabics": range(5120, 5759 + 1), + "Ogham": range(5760, 5791 + 1), + "Runic": range(5792, 5887 + 1), + "Tagalog": range(5888, 5919 + 1), + "Hanunoo": range(5920, 5951 + 1), + "Buhid": range(5952, 5983 + 1), + "Tagbanwa": range(5984, 6015 + 1), + "Khmer": range(6016, 6143 + 1), + "Mongolian": range(6144, 6319 + 1), + "Unified Canadian Aboriginal Syllabics Extended": range(6320, 6399 + 1), + "Limbu": range(6400, 6479 + 1), + "Tai Le": range(6480, 6527 + 1), + "New Tai Lue": range(6528, 6623 + 1), + "Khmer Symbols": range(6624, 6655 + 1), + "Buginese": range(6656, 6687 + 1), + "Tai Tham": range(6688, 6831 + 1), + "Combining Diacritical Marks Extended": range(6832, 6911 + 1), + "Balinese": range(6912, 7039 + 1), + "Sundanese": range(7040, 7103 + 1), + "Batak": range(7104, 7167 + 1), + "Lepcha": range(7168, 7247 + 1), + "Ol Chiki": range(7248, 7295 + 1), + "Cyrillic Extended C": range(7296, 7311 + 1), + "Sundanese Supplement": range(7360, 7375 + 1), + "Vedic Extensions": range(7376, 7423 + 1), + "Phonetic Extensions": range(7424, 7551 + 1), + "Phonetic Extensions Supplement": range(7552, 7615 + 1), + "Combining Diacritical Marks Supplement": range(7616, 7679 + 1), + "Latin Extended Additional": range(7680, 7935 + 1), + "Greek Extended": range(7936, 8191 + 1), + "General Punctuation": range(8192, 8303 + 1), + "Superscripts and Subscripts": range(8304, 8351 + 1), + "Currency Symbols": range(8352, 8399 + 1), + "Combining Diacritical Marks for Symbols": range(8400, 8447 + 1), + "Letterlike Symbols": range(8448, 8527 + 1), + "Number Forms": range(8528, 8591 + 1), + "Arrows": range(8592, 8703 + 1), + "Mathematical Operators": range(8704, 8959 + 1), + "Miscellaneous Technical": range(8960, 9215 + 1), + "Control Pictures": range(9216, 9279 + 1), + "Optical Character Recognition": range(9280, 9311 + 1), + "Enclosed Alphanumerics": range(9312, 9471 + 1), + "Box Drawing": range(9472, 9599 + 1), + "Block Elements": range(9600, 9631 + 1), + "Geometric Shapes": range(9632, 9727 + 1), + "Miscellaneous Symbols": range(9728, 9983 + 1), + "Dingbats": range(9984, 10175 + 1), + "Miscellaneous Mathematical Symbols-A": range(10176, 10223 + 1), + "Supplemental Arrows-A": range(10224, 10239 + 1), + "Braille Patterns": range(10240, 10495 + 1), + "Supplemental Arrows-B": range(10496, 10623 + 1), + "Miscellaneous Mathematical Symbols-B": range(10624, 10751 + 1), + "Supplemental Mathematical Operators": range(10752, 11007 + 1), + "Miscellaneous Symbols and Arrows": range(11008, 11263 + 1), + "Glagolitic": range(11264, 11359 + 1), + "Latin Extended-C": range(11360, 11391 + 1), + "Coptic": range(11392, 11519 + 1), + "Georgian Supplement": range(11520, 11567 + 1), + "Tifinagh": range(11568, 11647 + 1), + "Ethiopic Extended": range(11648, 11743 + 1), + "Cyrillic Extended-A": range(11744, 11775 + 1), + "Supplemental Punctuation": range(11776, 11903 + 1), + "CJK Radicals Supplement": range(11904, 12031 + 1), + "Kangxi Radicals": range(12032, 12255 + 1), + "Ideographic Description Characters": range(12272, 12287 + 1), + "CJK Symbols and Punctuation": range(12288, 12351 + 1), + "Hiragana": range(12352, 12447 + 1), + "Katakana": range(12448, 12543 + 1), + "Bopomofo": range(12544, 12591 + 1), + "Hangul Compatibility Jamo": range(12592, 12687 + 1), + "Kanbun": range(12688, 12703 + 1), + "Bopomofo Extended": range(12704, 12735 + 1), + "CJK Strokes": range(12736, 12783 + 1), + "Katakana Phonetic Extensions": range(12784, 12799 + 1), + "Enclosed CJK Letters and Months": range(12800, 13055 + 1), + "CJK Compatibility": range(13056, 13311 + 1), + "CJK Unified Ideographs Extension A": range(13312, 19903 + 1), + "Yijing Hexagram Symbols": range(19904, 19967 + 1), + "CJK Unified Ideographs": range(19968, 40959 + 1), + "Yi Syllables": range(40960, 42127 + 1), + "Yi Radicals": range(42128, 42191 + 1), + "Lisu": range(42192, 42239 + 1), + "Vai": range(42240, 42559 + 1), + "Cyrillic Extended-B": range(42560, 42655 + 1), + "Bamum": range(42656, 42751 + 1), + "Modifier Tone Letters": range(42752, 42783 + 1), + "Latin Extended-D": range(42784, 43007 + 1), + "Syloti Nagri": range(43008, 43055 + 1), + "Common Indic Number Forms": range(43056, 43071 + 1), + "Phags-pa": range(43072, 43135 + 1), + "Saurashtra": range(43136, 43231 + 1), + "Devanagari Extended": range(43232, 43263 + 1), + "Kayah Li": range(43264, 43311 + 1), + "Rejang": range(43312, 43359 + 1), + "Hangul Jamo Extended-A": range(43360, 43391 + 1), + "Javanese": range(43392, 43487 + 1), + "Myanmar Extended-B": range(43488, 43519 + 1), + "Cham": range(43520, 43615 + 1), + "Myanmar Extended-A": range(43616, 43647 + 1), + "Tai Viet": range(43648, 43743 + 1), + "Meetei Mayek Extensions": range(43744, 43775 + 1), + "Ethiopic Extended-A": range(43776, 43823 + 1), + "Latin Extended-E": range(43824, 43887 + 1), + "Cherokee Supplement": range(43888, 43967 + 1), + "Meetei Mayek": range(43968, 44031 + 1), + "Hangul Syllables": range(44032, 55215 + 1), + "Hangul Jamo Extended-B": range(55216, 55295 + 1), + "High Surrogates": range(55296, 56191 + 1), + "High Private Use Surrogates": range(56192, 56319 + 1), + "Low Surrogates": range(56320, 57343 + 1), + "Private Use Area": range(57344, 63743 + 1), + "CJK Compatibility Ideographs": range(63744, 64255 + 1), + "Alphabetic Presentation Forms": range(64256, 64335 + 1), + "Arabic Presentation Forms-A": range(64336, 65023 + 1), + "Variation Selectors": range(65024, 65039 + 1), + "Vertical Forms": range(65040, 65055 + 1), + "Combining Half Marks": range(65056, 65071 + 1), + "CJK Compatibility Forms": range(65072, 65103 + 1), + "Small Form Variants": range(65104, 65135 + 1), + "Arabic Presentation Forms-B": range(65136, 65279 + 1), + "Halfwidth and Fullwidth Forms": range(65280, 65519 + 1), + "Specials": range(65520, 65535 + 1), + "Linear B Syllabary": range(65536, 65663 + 1), + "Linear B Ideograms": range(65664, 65791 + 1), + "Aegean Numbers": range(65792, 65855 + 1), + "Ancient Greek Numbers": range(65856, 65935 + 1), + "Ancient Symbols": range(65936, 65999 + 1), + "Phaistos Disc": range(66000, 66047 + 1), + "Lycian": range(66176, 66207 + 1), + "Carian": range(66208, 66271 + 1), + "Coptic Epact Numbers": range(66272, 66303 + 1), + "Old Italic": range(66304, 66351 + 1), + "Gothic": range(66352, 66383 + 1), + "Old Permic": range(66384, 66431 + 1), + "Ugaritic": range(66432, 66463 + 1), + "Old Persian": range(66464, 66527 + 1), + "Deseret": range(66560, 66639 + 1), + "Shavian": range(66640, 66687 + 1), + "Osmanya": range(66688, 66735 + 1), + "Osage": range(66736, 66815 + 1), + "Elbasan": range(66816, 66863 + 1), + "Caucasian Albanian": range(66864, 66927 + 1), + "Linear A": range(67072, 67455 + 1), + "Cypriot Syllabary": range(67584, 67647 + 1), + "Imperial Aramaic": range(67648, 67679 + 1), + "Palmyrene": range(67680, 67711 + 1), + "Nabataean": range(67712, 67759 + 1), + "Hatran": range(67808, 67839 + 1), + "Phoenician": range(67840, 67871 + 1), + "Lydian": range(67872, 67903 + 1), + "Meroitic Hieroglyphs": range(67968, 67999 + 1), + "Meroitic Cursive": range(68000, 68095 + 1), + "Kharoshthi": range(68096, 68191 + 1), + "Old South Arabian": range(68192, 68223 + 1), + "Old North Arabian": range(68224, 68255 + 1), + "Manichaean": range(68288, 68351 + 1), + "Avestan": range(68352, 68415 + 1), + "Inscriptional Parthian": range(68416, 68447 + 1), + "Inscriptional Pahlavi": range(68448, 68479 + 1), + "Psalter Pahlavi": range(68480, 68527 + 1), + "Old Turkic": range(68608, 68687 + 1), + "Old Hungarian": range(68736, 68863 + 1), + "Rumi Numeral Symbols": range(69216, 69247 + 1), + "Brahmi": range(69632, 69759 + 1), + "Kaithi": range(69760, 69839 + 1), + "Sora Sompeng": range(69840, 69887 + 1), + "Chakma": range(69888, 69967 + 1), + "Mahajani": range(69968, 70015 + 1), + "Sharada": range(70016, 70111 + 1), + "Sinhala Archaic Numbers": range(70112, 70143 + 1), + "Khojki": range(70144, 70223 + 1), + "Multani": range(70272, 70319 + 1), + "Khudawadi": range(70320, 70399 + 1), + "Grantha": range(70400, 70527 + 1), + "Newa": range(70656, 70783 + 1), + "Tirhuta": range(70784, 70879 + 1), + "Siddham": range(71040, 71167 + 1), + "Modi": range(71168, 71263 + 1), + "Mongolian Supplement": range(71264, 71295 + 1), + "Takri": range(71296, 71375 + 1), + "Ahom": range(71424, 71487 + 1), + "Warang Citi": range(71840, 71935 + 1), + "Zanabazar Square": range(72192, 72271 + 1), + "Soyombo": range(72272, 72367 + 1), + "Pau Cin Hau": range(72384, 72447 + 1), + "Bhaiksuki": range(72704, 72815 + 1), + "Marchen": range(72816, 72895 + 1), + "Masaram Gondi": range(72960, 73055 + 1), + "Cuneiform": range(73728, 74751 + 1), + "Cuneiform Numbers and Punctuation": range(74752, 74879 + 1), + "Early Dynastic Cuneiform": range(74880, 75087 + 1), + "Egyptian Hieroglyphs": range(77824, 78895 + 1), + "Anatolian Hieroglyphs": range(82944, 83583 + 1), + "Bamum Supplement": range(92160, 92735 + 1), + "Mro": range(92736, 92783 + 1), + "Bassa Vah": range(92880, 92927 + 1), + "Pahawh Hmong": range(92928, 93071 + 1), + "Miao": range(93952, 94111 + 1), + "Ideographic Symbols and Punctuation": range(94176, 94207 + 1), + "Tangut": range(94208, 100351 + 1), + "Tangut Components": range(100352, 101119 + 1), + "Kana Supplement": range(110592, 110847 + 1), + "Kana Extended-A": range(110848, 110895 + 1), + "Nushu": range(110960, 111359 + 1), + "Duployan": range(113664, 113823 + 1), + "Shorthand Format Controls": range(113824, 113839 + 1), + "Byzantine Musical Symbols": range(118784, 119039 + 1), + "Musical Symbols": range(119040, 119295 + 1), + "Ancient Greek Musical Notation": range(119296, 119375 + 1), + "Tai Xuan Jing Symbols": range(119552, 119647 + 1), + "Counting Rod Numerals": range(119648, 119679 + 1), + "Mathematical Alphanumeric Symbols": range(119808, 120831 + 1), + "Sutton SignWriting": range(120832, 121519 + 1), + "Glagolitic Supplement": range(122880, 122927 + 1), + "Mende Kikakui": range(124928, 125151 + 1), + "Adlam": range(125184, 125279 + 1), + "Arabic Mathematical Alphabetic Symbols": range(126464, 126719 + 1), + "Mahjong Tiles": range(126976, 127023 + 1), + "Domino Tiles": range(127024, 127135 + 1), + "Playing Cards": range(127136, 127231 + 1), + "Enclosed Alphanumeric Supplement": range(127232, 127487 + 1), + "Enclosed Ideographic Supplement": range(127488, 127743 + 1), + "Miscellaneous Symbols and Pictographs": range(127744, 128511 + 1), + "Emoticons range(Emoji)": range(128512, 128591 + 1), + "Ornamental Dingbats": range(128592, 128639 + 1), + "Transport and Map Symbols": range(128640, 128767 + 1), + "Alchemical Symbols": range(128768, 128895 + 1), + "Geometric Shapes Extended": range(128896, 129023 + 1), + "Supplemental Arrows-C": range(129024, 129279 + 1), + "Supplemental Symbols and Pictographs": range(129280, 129535 + 1), + "CJK Unified Ideographs Extension B": range(131072, 173791 + 1), + "CJK Unified Ideographs Extension C": range(173824, 177983 + 1), + "CJK Unified Ideographs Extension D": range(177984, 178207 + 1), + "CJK Unified Ideographs Extension E": range(178208, 183983 + 1), + "CJK Unified Ideographs Extension F": range(183984, 191471 + 1), + "CJK Compatibility Ideographs Supplement": range(194560, 195103 + 1), + "Tags": range(917504, 917631 + 1), + "Variation Selectors Supplement": range(917760, 917999 + 1), +} + + +UNICODE_SECONDARY_RANGE_KEYWORD: List[str] = [ + "Supplement", + "Extended", + "Extensions", + "Modifier", + "Marks", + "Punctuation", + "Symbols", + "Forms", + "Operators", + "Miscellaneous", + "Drawing", + "Block", + "Shapes", + "Supplemental", + "Tags", +] + +RE_POSSIBLE_ENCODING_INDICATION = re_compile( + r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)", + IGNORECASE, +) + +IANA_SUPPORTED: List[str] = sorted( + filter( + lambda x: x.endswith("_codec") is False + and x not in {"rot_13", "tactis", "mbcs"}, + list(set(aliases.values())), + ) +) + +IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED) + +# pre-computed code page that are similar using the function cp_similarity. +IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = { + "cp037": ["cp1026", "cp1140", "cp273", "cp500"], + "cp1026": ["cp037", "cp1140", "cp273", "cp500"], + "cp1125": ["cp866"], + "cp1140": ["cp037", "cp1026", "cp273", "cp500"], + "cp1250": ["iso8859_2"], + "cp1251": ["kz1048", "ptcp154"], + "cp1252": ["iso8859_15", "iso8859_9", "latin_1"], + "cp1253": ["iso8859_7"], + "cp1254": ["iso8859_15", "iso8859_9", "latin_1"], + "cp1257": ["iso8859_13"], + "cp273": ["cp037", "cp1026", "cp1140", "cp500"], + "cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"], + "cp500": ["cp037", "cp1026", "cp1140", "cp273"], + "cp850": ["cp437", "cp857", "cp858", "cp865"], + "cp857": ["cp850", "cp858", "cp865"], + "cp858": ["cp437", "cp850", "cp857", "cp865"], + "cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"], + "cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"], + "cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"], + "cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"], + "cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"], + "cp866": ["cp1125"], + "iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"], + "iso8859_11": ["tis_620"], + "iso8859_13": ["cp1257"], + "iso8859_14": [ + "iso8859_10", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + ], + "iso8859_15": [ + "cp1252", + "cp1254", + "iso8859_10", + "iso8859_14", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + ], + "iso8859_16": [ + "iso8859_14", + "iso8859_15", + "iso8859_2", + "iso8859_3", + "iso8859_9", + "latin_1", + ], + "iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"], + "iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"], + "iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"], + "iso8859_7": ["cp1253"], + "iso8859_9": [ + "cp1252", + "cp1254", + "cp1258", + "iso8859_10", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_4", + "latin_1", + ], + "kz1048": ["cp1251", "ptcp154"], + "latin_1": [ + "cp1252", + "cp1254", + "cp1258", + "iso8859_10", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_4", + "iso8859_9", + ], + "mac_iceland": ["mac_roman", "mac_turkish"], + "mac_roman": ["mac_iceland", "mac_turkish"], + "mac_turkish": ["mac_iceland", "mac_roman"], + "ptcp154": ["cp1251", "kz1048"], + "tis_620": ["iso8859_11"], +} + + +CHARDET_CORRESPONDENCE: Dict[str, str] = { + "iso2022_kr": "ISO-2022-KR", + "iso2022_jp": "ISO-2022-JP", + "euc_kr": "EUC-KR", + "tis_620": "TIS-620", + "utf_32": "UTF-32", + "euc_jp": "EUC-JP", + "koi8_r": "KOI8-R", + "iso8859_1": "ISO-8859-1", + "iso8859_2": "ISO-8859-2", + "iso8859_5": "ISO-8859-5", + "iso8859_6": "ISO-8859-6", + "iso8859_7": "ISO-8859-7", + "iso8859_8": "ISO-8859-8", + "utf_16": "UTF-16", + "cp855": "IBM855", + "mac_cyrillic": "MacCyrillic", + "gb2312": "GB2312", + "gb18030": "GB18030", + "cp932": "CP932", + "cp866": "IBM866", + "utf_8": "utf-8", + "utf_8_sig": "UTF-8-SIG", + "shift_jis": "SHIFT_JIS", + "big5": "Big5", + "cp1250": "windows-1250", + "cp1251": "windows-1251", + "cp1252": "Windows-1252", + "cp1253": "windows-1253", + "cp1255": "windows-1255", + "cp1256": "windows-1256", + "cp1254": "Windows-1254", + "cp949": "CP949", +} + + +COMMON_SAFE_ASCII_CHARACTERS: Set[str] = { + "<", + ">", + "=", + ":", + "/", + "&", + ";", + "{", + "}", + "[", + "]", + ",", + "|", + '"', + "-", +} + + +KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"} +ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"} + +NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+") + +LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES) + +# Logging LEVEL bellow DEBUG +TRACE: int = 5 diff --git a/libs/common/charset_normalizer/legacy.py b/libs/common/charset_normalizer/legacy.py new file mode 100644 index 00000000..cdebe2b8 --- /dev/null +++ b/libs/common/charset_normalizer/legacy.py @@ -0,0 +1,95 @@ +import warnings +from typing import Dict, Optional, Union + +from .api import from_bytes, from_fp, from_path, normalize +from .constant import CHARDET_CORRESPONDENCE +from .models import CharsetMatch, CharsetMatches + + +def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]: + """ + chardet legacy method + Detect the encoding of the given byte string. It should be mostly backward-compatible. + Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it) + This function is deprecated and should be used to migrate your project easily, consult the documentation for + further information. Not planned for removal. + + :param byte_str: The byte sequence to examine. + """ + if not isinstance(byte_str, (bytearray, bytes)): + raise TypeError( # pragma: nocover + "Expected object of type bytes or bytearray, got: " + "{0}".format(type(byte_str)) + ) + + if isinstance(byte_str, bytearray): + byte_str = bytes(byte_str) + + r = from_bytes(byte_str).best() + + encoding = r.encoding if r is not None else None + language = r.language if r is not None and r.language != "Unknown" else "" + confidence = 1.0 - r.chaos if r is not None else None + + # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process + # but chardet does return 'utf-8-sig' and it is a valid codec name. + if r is not None and encoding == "utf_8" and r.bom: + encoding += "_sig" + + return { + "encoding": encoding + if encoding not in CHARDET_CORRESPONDENCE + else CHARDET_CORRESPONDENCE[encoding], + "language": language, + "confidence": confidence, + } + + +class CharsetNormalizerMatch(CharsetMatch): + pass + + +class CharsetNormalizerMatches(CharsetMatches): + @staticmethod + def from_fp(*args, **kwargs): # type: ignore + warnings.warn( # pragma: nocover + "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " + "and scheduled to be removed in 3.0", + DeprecationWarning, + ) + return from_fp(*args, **kwargs) # pragma: nocover + + @staticmethod + def from_bytes(*args, **kwargs): # type: ignore + warnings.warn( # pragma: nocover + "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " + "and scheduled to be removed in 3.0", + DeprecationWarning, + ) + return from_bytes(*args, **kwargs) # pragma: nocover + + @staticmethod + def from_path(*args, **kwargs): # type: ignore + warnings.warn( # pragma: nocover + "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " + "and scheduled to be removed in 3.0", + DeprecationWarning, + ) + return from_path(*args, **kwargs) # pragma: nocover + + @staticmethod + def normalize(*args, **kwargs): # type: ignore + warnings.warn( # pragma: nocover + "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " + "and scheduled to be removed in 3.0", + DeprecationWarning, + ) + return normalize(*args, **kwargs) # pragma: nocover + + +class CharsetDetector(CharsetNormalizerMatches): + pass + + +class CharsetDoctor(CharsetNormalizerMatches): + pass diff --git a/libs/common/charset_normalizer/md.py b/libs/common/charset_normalizer/md.py new file mode 100644 index 00000000..31808af8 --- /dev/null +++ b/libs/common/charset_normalizer/md.py @@ -0,0 +1,553 @@ +from functools import lru_cache +from typing import List, Optional + +from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD +from .utils import ( + is_accentuated, + is_ascii, + is_case_variable, + is_cjk, + is_emoticon, + is_hangul, + is_hiragana, + is_katakana, + is_latin, + is_punctuation, + is_separator, + is_symbol, + is_thai, + is_unprintable, + remove_accent, + unicode_range, +) + + +class MessDetectorPlugin: + """ + Base abstract class used for mess detection plugins. + All detectors MUST extend and implement given methods. + """ + + def eligible(self, character: str) -> bool: + """ + Determine if given character should be fed in. + """ + raise NotImplementedError # pragma: nocover + + def feed(self, character: str) -> None: + """ + The main routine to be executed upon character. + Insert the logic in witch the text would be considered chaotic. + """ + raise NotImplementedError # pragma: nocover + + def reset(self) -> None: # pragma: no cover + """ + Permit to reset the plugin to the initial state. + """ + raise NotImplementedError + + @property + def ratio(self) -> float: + """ + Compute the chaos ratio based on what your feed() has seen. + Must NOT be lower than 0.; No restriction gt 0. + """ + raise NotImplementedError # pragma: nocover + + +class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._punctuation_count: int = 0 + self._symbol_count: int = 0 + self._character_count: int = 0 + + self._last_printable_char: Optional[str] = None + self._frenzy_symbol_in_word: bool = False + + def eligible(self, character: str) -> bool: + return character.isprintable() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if ( + character != self._last_printable_char + and character not in COMMON_SAFE_ASCII_CHARACTERS + ): + if is_punctuation(character): + self._punctuation_count += 1 + elif ( + character.isdigit() is False + and is_symbol(character) + and is_emoticon(character) is False + ): + self._symbol_count += 2 + + self._last_printable_char = character + + def reset(self) -> None: # pragma: no cover + self._punctuation_count = 0 + self._character_count = 0 + self._symbol_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + ratio_of_punctuation: float = ( + self._punctuation_count + self._symbol_count + ) / self._character_count + + return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0 + + +class TooManyAccentuatedPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._character_count: int = 0 + self._accentuated_count: int = 0 + + def eligible(self, character: str) -> bool: + return character.isalpha() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if is_accentuated(character): + self._accentuated_count += 1 + + def reset(self) -> None: # pragma: no cover + self._character_count = 0 + self._accentuated_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + ratio_of_accentuation: float = self._accentuated_count / self._character_count + return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0 + + +class UnprintablePlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._unprintable_count: int = 0 + self._character_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if is_unprintable(character): + self._unprintable_count += 1 + self._character_count += 1 + + def reset(self) -> None: # pragma: no cover + self._unprintable_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return (self._unprintable_count * 8) / self._character_count + + +class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._successive_count: int = 0 + self._character_count: int = 0 + + self._last_latin_character: Optional[str] = None + + def eligible(self, character: str) -> bool: + return character.isalpha() and is_latin(character) + + def feed(self, character: str) -> None: + self._character_count += 1 + if ( + self._last_latin_character is not None + and is_accentuated(character) + and is_accentuated(self._last_latin_character) + ): + if character.isupper() and self._last_latin_character.isupper(): + self._successive_count += 1 + # Worse if its the same char duplicated with different accent. + if remove_accent(character) == remove_accent(self._last_latin_character): + self._successive_count += 1 + self._last_latin_character = character + + def reset(self) -> None: # pragma: no cover + self._successive_count = 0 + self._character_count = 0 + self._last_latin_character = None + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return (self._successive_count * 2) / self._character_count + + +class SuspiciousRange(MessDetectorPlugin): + def __init__(self) -> None: + self._suspicious_successive_range_count: int = 0 + self._character_count: int = 0 + self._last_printable_seen: Optional[str] = None + + def eligible(self, character: str) -> bool: + return character.isprintable() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if ( + character.isspace() + or is_punctuation(character) + or character in COMMON_SAFE_ASCII_CHARACTERS + ): + self._last_printable_seen = None + return + + if self._last_printable_seen is None: + self._last_printable_seen = character + return + + unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen) + unicode_range_b: Optional[str] = unicode_range(character) + + if is_suspiciously_successive_range(unicode_range_a, unicode_range_b): + self._suspicious_successive_range_count += 1 + + self._last_printable_seen = character + + def reset(self) -> None: # pragma: no cover + self._character_count = 0 + self._suspicious_successive_range_count = 0 + self._last_printable_seen = None + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + ratio_of_suspicious_range_usage: float = ( + self._suspicious_successive_range_count * 2 + ) / self._character_count + + if ratio_of_suspicious_range_usage < 0.1: + return 0.0 + + return ratio_of_suspicious_range_usage + + +class SuperWeirdWordPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._word_count: int = 0 + self._bad_word_count: int = 0 + self._foreign_long_count: int = 0 + + self._is_current_word_bad: bool = False + self._foreign_long_watch: bool = False + + self._character_count: int = 0 + self._bad_character_count: int = 0 + + self._buffer: str = "" + self._buffer_accent_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if character.isalpha(): + self._buffer += character + if is_accentuated(character): + self._buffer_accent_count += 1 + if ( + self._foreign_long_watch is False + and (is_latin(character) is False or is_accentuated(character)) + and is_cjk(character) is False + and is_hangul(character) is False + and is_katakana(character) is False + and is_hiragana(character) is False + and is_thai(character) is False + ): + self._foreign_long_watch = True + return + if not self._buffer: + return + if ( + character.isspace() or is_punctuation(character) or is_separator(character) + ) and self._buffer: + self._word_count += 1 + buffer_length: int = len(self._buffer) + + self._character_count += buffer_length + + if buffer_length >= 4: + if self._buffer_accent_count / buffer_length > 0.34: + self._is_current_word_bad = True + # Word/Buffer ending with a upper case accentuated letter are so rare, + # that we will consider them all as suspicious. Same weight as foreign_long suspicious. + if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper(): + self._foreign_long_count += 1 + self._is_current_word_bad = True + if buffer_length >= 24 and self._foreign_long_watch: + self._foreign_long_count += 1 + self._is_current_word_bad = True + + if self._is_current_word_bad: + self._bad_word_count += 1 + self._bad_character_count += len(self._buffer) + self._is_current_word_bad = False + + self._foreign_long_watch = False + self._buffer = "" + self._buffer_accent_count = 0 + elif ( + character not in {"<", ">", "-", "=", "~", "|", "_"} + and character.isdigit() is False + and is_symbol(character) + ): + self._is_current_word_bad = True + self._buffer += character + + def reset(self) -> None: # pragma: no cover + self._buffer = "" + self._is_current_word_bad = False + self._foreign_long_watch = False + self._bad_word_count = 0 + self._word_count = 0 + self._character_count = 0 + self._bad_character_count = 0 + self._foreign_long_count = 0 + + @property + def ratio(self) -> float: + if self._word_count <= 10 and self._foreign_long_count == 0: + return 0.0 + + return self._bad_character_count / self._character_count + + +class CjkInvalidStopPlugin(MessDetectorPlugin): + """ + GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and + can be easily detected. Searching for the overuse of '丅' and '丄'. + """ + + def __init__(self) -> None: + self._wrong_stop_count: int = 0 + self._cjk_character_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if character in {"丅", "丄"}: + self._wrong_stop_count += 1 + return + if is_cjk(character): + self._cjk_character_count += 1 + + def reset(self) -> None: # pragma: no cover + self._wrong_stop_count = 0 + self._cjk_character_count = 0 + + @property + def ratio(self) -> float: + if self._cjk_character_count < 16: + return 0.0 + return self._wrong_stop_count / self._cjk_character_count + + +class ArchaicUpperLowerPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._buf: bool = False + + self._character_count_since_last_sep: int = 0 + + self._successive_upper_lower_count: int = 0 + self._successive_upper_lower_count_final: int = 0 + + self._character_count: int = 0 + + self._last_alpha_seen: Optional[str] = None + self._current_ascii_only: bool = True + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + is_concerned = character.isalpha() and is_case_variable(character) + chunk_sep = is_concerned is False + + if chunk_sep and self._character_count_since_last_sep > 0: + if ( + self._character_count_since_last_sep <= 64 + and character.isdigit() is False + and self._current_ascii_only is False + ): + self._successive_upper_lower_count_final += ( + self._successive_upper_lower_count + ) + + self._successive_upper_lower_count = 0 + self._character_count_since_last_sep = 0 + self._last_alpha_seen = None + self._buf = False + self._character_count += 1 + self._current_ascii_only = True + + return + + if self._current_ascii_only is True and is_ascii(character) is False: + self._current_ascii_only = False + + if self._last_alpha_seen is not None: + if (character.isupper() and self._last_alpha_seen.islower()) or ( + character.islower() and self._last_alpha_seen.isupper() + ): + if self._buf is True: + self._successive_upper_lower_count += 2 + self._buf = False + else: + self._buf = True + else: + self._buf = False + + self._character_count += 1 + self._character_count_since_last_sep += 1 + self._last_alpha_seen = character + + def reset(self) -> None: # pragma: no cover + self._character_count = 0 + self._character_count_since_last_sep = 0 + self._successive_upper_lower_count = 0 + self._successive_upper_lower_count_final = 0 + self._last_alpha_seen = None + self._buf = False + self._current_ascii_only = True + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return self._successive_upper_lower_count_final / self._character_count + + +@lru_cache(maxsize=1024) +def is_suspiciously_successive_range( + unicode_range_a: Optional[str], unicode_range_b: Optional[str] +) -> bool: + """ + Determine if two Unicode range seen next to each other can be considered as suspicious. + """ + if unicode_range_a is None or unicode_range_b is None: + return True + + if unicode_range_a == unicode_range_b: + return False + + if "Latin" in unicode_range_a and "Latin" in unicode_range_b: + return False + + if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b: + return False + + # Latin characters can be accompanied with a combining diacritical mark + # eg. Vietnamese. + if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and ( + "Combining" in unicode_range_a or "Combining" in unicode_range_b + ): + return False + + keywords_range_a, keywords_range_b = unicode_range_a.split( + " " + ), unicode_range_b.split(" ") + + for el in keywords_range_a: + if el in UNICODE_SECONDARY_RANGE_KEYWORD: + continue + if el in keywords_range_b: + return False + + # Japanese Exception + range_a_jp_chars, range_b_jp_chars = ( + unicode_range_a + in ( + "Hiragana", + "Katakana", + ), + unicode_range_b in ("Hiragana", "Katakana"), + ) + if (range_a_jp_chars or range_b_jp_chars) and ( + "CJK" in unicode_range_a or "CJK" in unicode_range_b + ): + return False + if range_a_jp_chars and range_b_jp_chars: + return False + + if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b: + if "CJK" in unicode_range_a or "CJK" in unicode_range_b: + return False + if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": + return False + + # Chinese/Japanese use dedicated range for punctuation and/or separators. + if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or ( + unicode_range_a in ["Katakana", "Hiragana"] + and unicode_range_b in ["Katakana", "Hiragana"] + ): + if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b: + return False + if "Forms" in unicode_range_a or "Forms" in unicode_range_b: + return False + + return True + + +@lru_cache(maxsize=2048) +def mess_ratio( + decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False +) -> float: + """ + Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. + """ + + detectors: List[MessDetectorPlugin] = [ + md_class() for md_class in MessDetectorPlugin.__subclasses__() + ] + + length: int = len(decoded_sequence) + 1 + + mean_mess_ratio: float = 0.0 + + if length < 512: + intermediary_mean_mess_ratio_calc: int = 32 + elif length <= 1024: + intermediary_mean_mess_ratio_calc = 64 + else: + intermediary_mean_mess_ratio_calc = 128 + + for character, index in zip(decoded_sequence + "\n", range(length)): + for detector in detectors: + if detector.eligible(character): + detector.feed(character) + + if ( + index > 0 and index % intermediary_mean_mess_ratio_calc == 0 + ) or index == length - 1: + mean_mess_ratio = sum(dt.ratio for dt in detectors) + + if mean_mess_ratio >= maximum_threshold: + break + + if debug: + for dt in detectors: # pragma: nocover + print(dt.__class__, dt.ratio) + + return round(mean_mess_ratio, 3) diff --git a/libs/common/charset_normalizer/models.py b/libs/common/charset_normalizer/models.py new file mode 100644 index 00000000..ccb0d475 --- /dev/null +++ b/libs/common/charset_normalizer/models.py @@ -0,0 +1,401 @@ +import warnings +from collections import Counter +from encodings.aliases import aliases +from hashlib import sha256 +from json import dumps +from re import sub +from typing import ( + Any, + Counter as TypeCounter, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, +) + +from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE +from .md import mess_ratio +from .utils import iana_name, is_multi_byte_encoding, unicode_range + + +class CharsetMatch: + def __init__( + self, + payload: bytes, + guessed_encoding: str, + mean_mess_ratio: float, + has_sig_or_bom: bool, + languages: "CoherenceMatches", + decoded_payload: Optional[str] = None, + ): + self._payload: bytes = payload + + self._encoding: str = guessed_encoding + self._mean_mess_ratio: float = mean_mess_ratio + self._languages: CoherenceMatches = languages + self._has_sig_or_bom: bool = has_sig_or_bom + self._unicode_ranges: Optional[List[str]] = None + + self._leaves: List[CharsetMatch] = [] + self._mean_coherence_ratio: float = 0.0 + + self._output_payload: Optional[bytes] = None + self._output_encoding: Optional[str] = None + + self._string: Optional[str] = decoded_payload + + def __eq__(self, other: object) -> bool: + if not isinstance(other, CharsetMatch): + raise TypeError( + "__eq__ cannot be invoked on {} and {}.".format( + str(other.__class__), str(self.__class__) + ) + ) + return self.encoding == other.encoding and self.fingerprint == other.fingerprint + + def __lt__(self, other: object) -> bool: + """ + Implemented to make sorted available upon CharsetMatches items. + """ + if not isinstance(other, CharsetMatch): + raise ValueError + + chaos_difference: float = abs(self.chaos - other.chaos) + coherence_difference: float = abs(self.coherence - other.coherence) + + # Bellow 1% difference --> Use Coherence + if chaos_difference < 0.01 and coherence_difference > 0.02: + # When having a tough decision, use the result that decoded as many multi-byte as possible. + if chaos_difference == 0.0 and self.coherence == other.coherence: + return self.multi_byte_usage > other.multi_byte_usage + return self.coherence > other.coherence + + return self.chaos < other.chaos + + @property + def multi_byte_usage(self) -> float: + return 1.0 - len(str(self)) / len(self.raw) + + @property + def chaos_secondary_pass(self) -> float: + """ + Check once again chaos in decoded text, except this time, with full content. + Use with caution, this can be very slow. + Notice: Will be removed in 3.0 + """ + warnings.warn( + "chaos_secondary_pass is deprecated and will be removed in 3.0", + DeprecationWarning, + ) + return mess_ratio(str(self), 1.0) + + @property + def coherence_non_latin(self) -> float: + """ + Coherence ratio on the first non-latin language detected if ANY. + Notice: Will be removed in 3.0 + """ + warnings.warn( + "coherence_non_latin is deprecated and will be removed in 3.0", + DeprecationWarning, + ) + return 0.0 + + @property + def w_counter(self) -> TypeCounter[str]: + """ + Word counter instance on decoded text. + Notice: Will be removed in 3.0 + """ + warnings.warn( + "w_counter is deprecated and will be removed in 3.0", DeprecationWarning + ) + + string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower()) + + return Counter(string_printable_only.split()) + + def __str__(self) -> str: + # Lazy Str Loading + if self._string is None: + self._string = str(self._payload, self._encoding, "strict") + return self._string + + def __repr__(self) -> str: + return "".format(self.encoding, self.fingerprint) + + def add_submatch(self, other: "CharsetMatch") -> None: + if not isinstance(other, CharsetMatch) or other == self: + raise ValueError( + "Unable to add instance <{}> as a submatch of a CharsetMatch".format( + other.__class__ + ) + ) + + other._string = None # Unload RAM usage; dirty trick. + self._leaves.append(other) + + @property + def encoding(self) -> str: + return self._encoding + + @property + def encoding_aliases(self) -> List[str]: + """ + Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855. + """ + also_known_as: List[str] = [] + for u, p in aliases.items(): + if self.encoding == u: + also_known_as.append(p) + elif self.encoding == p: + also_known_as.append(u) + return also_known_as + + @property + def bom(self) -> bool: + return self._has_sig_or_bom + + @property + def byte_order_mark(self) -> bool: + return self._has_sig_or_bom + + @property + def languages(self) -> List[str]: + """ + Return the complete list of possible languages found in decoded sequence. + Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'. + """ + return [e[0] for e in self._languages] + + @property + def language(self) -> str: + """ + Most probable language found in decoded sequence. If none were detected or inferred, the property will return + "Unknown". + """ + if not self._languages: + # Trying to infer the language based on the given encoding + # Its either English or we should not pronounce ourselves in certain cases. + if "ascii" in self.could_be_from_charset: + return "English" + + # doing it there to avoid circular import + from charset_normalizer.cd import encoding_languages, mb_encoding_languages + + languages = ( + mb_encoding_languages(self.encoding) + if is_multi_byte_encoding(self.encoding) + else encoding_languages(self.encoding) + ) + + if len(languages) == 0 or "Latin Based" in languages: + return "Unknown" + + return languages[0] + + return self._languages[0][0] + + @property + def chaos(self) -> float: + return self._mean_mess_ratio + + @property + def coherence(self) -> float: + if not self._languages: + return 0.0 + return self._languages[0][1] + + @property + def percent_chaos(self) -> float: + return round(self.chaos * 100, ndigits=3) + + @property + def percent_coherence(self) -> float: + return round(self.coherence * 100, ndigits=3) + + @property + def raw(self) -> bytes: + """ + Original untouched bytes. + """ + return self._payload + + @property + def submatch(self) -> List["CharsetMatch"]: + return self._leaves + + @property + def has_submatch(self) -> bool: + return len(self._leaves) > 0 + + @property + def alphabets(self) -> List[str]: + if self._unicode_ranges is not None: + return self._unicode_ranges + # list detected ranges + detected_ranges: List[Optional[str]] = [ + unicode_range(char) for char in str(self) + ] + # filter and sort + self._unicode_ranges = sorted(list({r for r in detected_ranges if r})) + return self._unicode_ranges + + @property + def could_be_from_charset(self) -> List[str]: + """ + The complete list of encoding that output the exact SAME str result and therefore could be the originating + encoding. + This list does include the encoding available in property 'encoding'. + """ + return [self._encoding] + [m.encoding for m in self._leaves] + + def first(self) -> "CharsetMatch": + """ + Kept for BC reasons. Will be removed in 3.0. + """ + return self + + def best(self) -> "CharsetMatch": + """ + Kept for BC reasons. Will be removed in 3.0. + """ + return self + + def output(self, encoding: str = "utf_8") -> bytes: + """ + Method to get re-encoded bytes payload using given target encoding. Default to UTF-8. + Any errors will be simply ignored by the encoder NOT replaced. + """ + if self._output_encoding is None or self._output_encoding != encoding: + self._output_encoding = encoding + self._output_payload = str(self).encode(encoding, "replace") + + return self._output_payload # type: ignore + + @property + def fingerprint(self) -> str: + """ + Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one. + """ + return sha256(self.output()).hexdigest() + + +class CharsetMatches: + """ + Container with every CharsetMatch items ordered by default from most probable to the less one. + Act like a list(iterable) but does not implements all related methods. + """ + + def __init__(self, results: Optional[List[CharsetMatch]] = None): + self._results: List[CharsetMatch] = sorted(results) if results else [] + + def __iter__(self) -> Iterator[CharsetMatch]: + yield from self._results + + def __getitem__(self, item: Union[int, str]) -> CharsetMatch: + """ + Retrieve a single item either by its position or encoding name (alias may be used here). + Raise KeyError upon invalid index or encoding not present in results. + """ + if isinstance(item, int): + return self._results[item] + if isinstance(item, str): + item = iana_name(item, False) + for result in self._results: + if item in result.could_be_from_charset: + return result + raise KeyError + + def __len__(self) -> int: + return len(self._results) + + def __bool__(self) -> bool: + return len(self._results) > 0 + + def append(self, item: CharsetMatch) -> None: + """ + Insert a single match. Will be inserted accordingly to preserve sort. + Can be inserted as a submatch. + """ + if not isinstance(item, CharsetMatch): + raise ValueError( + "Cannot append instance '{}' to CharsetMatches".format( + str(item.__class__) + ) + ) + # We should disable the submatch factoring when the input file is too heavy (conserve RAM usage) + if len(item.raw) <= TOO_BIG_SEQUENCE: + for match in self._results: + if match.fingerprint == item.fingerprint and match.chaos == item.chaos: + match.add_submatch(item) + return + self._results.append(item) + self._results = sorted(self._results) + + def best(self) -> Optional["CharsetMatch"]: + """ + Simply return the first match. Strict equivalent to matches[0]. + """ + if not self._results: + return None + return self._results[0] + + def first(self) -> Optional["CharsetMatch"]: + """ + Redundant method, call the method best(). Kept for BC reasons. + """ + return self.best() + + +CoherenceMatch = Tuple[str, float] +CoherenceMatches = List[CoherenceMatch] + + +class CliDetectionResult: + def __init__( + self, + path: str, + encoding: Optional[str], + encoding_aliases: List[str], + alternative_encodings: List[str], + language: str, + alphabets: List[str], + has_sig_or_bom: bool, + chaos: float, + coherence: float, + unicode_path: Optional[str], + is_preferred: bool, + ): + self.path: str = path + self.unicode_path: Optional[str] = unicode_path + self.encoding: Optional[str] = encoding + self.encoding_aliases: List[str] = encoding_aliases + self.alternative_encodings: List[str] = alternative_encodings + self.language: str = language + self.alphabets: List[str] = alphabets + self.has_sig_or_bom: bool = has_sig_or_bom + self.chaos: float = chaos + self.coherence: float = coherence + self.is_preferred: bool = is_preferred + + @property + def __dict__(self) -> Dict[str, Any]: # type: ignore + return { + "path": self.path, + "encoding": self.encoding, + "encoding_aliases": self.encoding_aliases, + "alternative_encodings": self.alternative_encodings, + "language": self.language, + "alphabets": self.alphabets, + "has_sig_or_bom": self.has_sig_or_bom, + "chaos": self.chaos, + "coherence": self.coherence, + "unicode_path": self.unicode_path, + "is_preferred": self.is_preferred, + } + + def to_json(self) -> str: + return dumps(self.__dict__, ensure_ascii=True, indent=4) diff --git a/libs/common/charset_normalizer/py.typed b/libs/common/charset_normalizer/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/charset_normalizer/utils.py b/libs/common/charset_normalizer/utils.py new file mode 100644 index 00000000..859f212b --- /dev/null +++ b/libs/common/charset_normalizer/utils.py @@ -0,0 +1,424 @@ +try: + # WARNING: unicodedata2 support is going to be removed in 3.0 + # Python is quickly catching up. + import unicodedata2 as unicodedata +except ImportError: + import unicodedata # type: ignore[no-redef] + +import importlib +import logging +from codecs import IncrementalDecoder +from encodings.aliases import aliases +from functools import lru_cache +from re import findall +from typing import Generator, List, Optional, Set, Tuple, Union + +from _multibytecodec import MultibyteIncrementalDecoder + +from .constant import ( + ENCODING_MARKS, + IANA_SUPPORTED_SIMILAR, + RE_POSSIBLE_ENCODING_INDICATION, + UNICODE_RANGES_COMBINED, + UNICODE_SECONDARY_RANGE_KEYWORD, + UTF8_MAXIMAL_ALLOCATION, +) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_accentuated(character: str) -> bool: + try: + description: str = unicodedata.name(character) + except ValueError: + return False + return ( + "WITH GRAVE" in description + or "WITH ACUTE" in description + or "WITH CEDILLA" in description + or "WITH DIAERESIS" in description + or "WITH CIRCUMFLEX" in description + or "WITH TILDE" in description + ) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def remove_accent(character: str) -> str: + decomposed: str = unicodedata.decomposition(character) + if not decomposed: + return character + + codes: List[str] = decomposed.split(" ") + + return chr(int(codes[0], 16)) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def unicode_range(character: str) -> Optional[str]: + """ + Retrieve the Unicode range official name from a single character. + """ + character_ord: int = ord(character) + + for range_name, ord_range in UNICODE_RANGES_COMBINED.items(): + if character_ord in ord_range: + return range_name + + return None + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_latin(character: str) -> bool: + try: + description: str = unicodedata.name(character) + except ValueError: + return False + return "LATIN" in description + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_ascii(character: str) -> bool: + try: + character.encode("ascii") + except UnicodeEncodeError: + return False + return True + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_punctuation(character: str) -> bool: + character_category: str = unicodedata.category(character) + + if "P" in character_category: + return True + + character_range: Optional[str] = unicode_range(character) + + if character_range is None: + return False + + return "Punctuation" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_symbol(character: str) -> bool: + character_category: str = unicodedata.category(character) + + if "S" in character_category or "N" in character_category: + return True + + character_range: Optional[str] = unicode_range(character) + + if character_range is None: + return False + + return "Forms" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_emoticon(character: str) -> bool: + character_range: Optional[str] = unicode_range(character) + + if character_range is None: + return False + + return "Emoticons" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_separator(character: str) -> bool: + if character.isspace() or character in {"|", "+", ",", ";", "<", ">"}: + return True + + character_category: str = unicodedata.category(character) + + return "Z" in character_category + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_case_variable(character: str) -> bool: + return character.islower() != character.isupper() + + +def is_private_use_only(character: str) -> bool: + character_category: str = unicodedata.category(character) + + return character_category == "Co" + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_cjk(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: + return False + + return "CJK" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_hiragana(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: + return False + + return "HIRAGANA" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_katakana(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: + return False + + return "KATAKANA" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_hangul(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: + return False + + return "HANGUL" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_thai(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: + return False + + return "THAI" in character_name + + +@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED)) +def is_unicode_range_secondary(range_name: str) -> bool: + return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_unprintable(character: str) -> bool: + return ( + character.isspace() is False # includes \n \t \r \v + and character.isprintable() is False + and character != "\x1A" # Why? Its the ASCII substitute character. + and character != "\ufeff" # bug discovered in Python, + # Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space. + ) + + +def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional[str]: + """ + Extract using ASCII-only decoder any specified encoding in the first n-bytes. + """ + if not isinstance(sequence, bytes): + raise TypeError + + seq_len: int = len(sequence) + + results: List[str] = findall( + RE_POSSIBLE_ENCODING_INDICATION, + sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"), + ) + + if len(results) == 0: + return None + + for specified_encoding in results: + specified_encoding = specified_encoding.lower().replace("-", "_") + + encoding_alias: str + encoding_iana: str + + for encoding_alias, encoding_iana in aliases.items(): + if encoding_alias == specified_encoding: + return encoding_iana + if encoding_iana == specified_encoding: + return encoding_iana + + return None + + +@lru_cache(maxsize=128) +def is_multi_byte_encoding(name: str) -> bool: + """ + Verify is a specific encoding is a multi byte one based on it IANA name + """ + return name in { + "utf_8", + "utf_8_sig", + "utf_16", + "utf_16_be", + "utf_16_le", + "utf_32", + "utf_32_le", + "utf_32_be", + "utf_7", + } or issubclass( + importlib.import_module("encodings.{}".format(name)).IncrementalDecoder, + MultibyteIncrementalDecoder, + ) + + +def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]: + """ + Identify and extract SIG/BOM in given sequence. + """ + + for iana_encoding in ENCODING_MARKS: + marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding] + + if isinstance(marks, bytes): + marks = [marks] + + for mark in marks: + if sequence.startswith(mark): + return iana_encoding, mark + + return None, b"" + + +def should_strip_sig_or_bom(iana_encoding: str) -> bool: + return iana_encoding not in {"utf_16", "utf_32"} + + +def iana_name(cp_name: str, strict: bool = True) -> str: + cp_name = cp_name.lower().replace("-", "_") + + encoding_alias: str + encoding_iana: str + + for encoding_alias, encoding_iana in aliases.items(): + if cp_name in [encoding_alias, encoding_iana]: + return encoding_iana + + if strict: + raise ValueError("Unable to retrieve IANA for '{}'".format(cp_name)) + + return cp_name + + +def range_scan(decoded_sequence: str) -> List[str]: + ranges: Set[str] = set() + + for character in decoded_sequence: + character_range: Optional[str] = unicode_range(character) + + if character_range is None: + continue + + ranges.add(character_range) + + return list(ranges) + + +def cp_similarity(iana_name_a: str, iana_name_b: str) -> float: + + if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b): + return 0.0 + + decoder_a = importlib.import_module( + "encodings.{}".format(iana_name_a) + ).IncrementalDecoder + decoder_b = importlib.import_module( + "encodings.{}".format(iana_name_b) + ).IncrementalDecoder + + id_a: IncrementalDecoder = decoder_a(errors="ignore") + id_b: IncrementalDecoder = decoder_b(errors="ignore") + + character_match_count: int = 0 + + for i in range(255): + to_be_decoded: bytes = bytes([i]) + if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded): + character_match_count += 1 + + return character_match_count / 254 + + +def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool: + """ + Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using + the function cp_similarity. + """ + return ( + iana_name_a in IANA_SUPPORTED_SIMILAR + and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a] + ) + + +def set_logging_handler( + name: str = "charset_normalizer", + level: int = logging.INFO, + format_string: str = "%(asctime)s | %(levelname)s | %(message)s", +) -> None: + + logger = logging.getLogger(name) + logger.setLevel(level) + + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter(format_string)) + logger.addHandler(handler) + + +def cut_sequence_chunks( + sequences: bytes, + encoding_iana: str, + offsets: range, + chunk_size: int, + bom_or_sig_available: bool, + strip_sig_or_bom: bool, + sig_payload: bytes, + is_multi_byte_decoder: bool, + decoded_payload: Optional[str] = None, +) -> Generator[str, None, None]: + + if decoded_payload and is_multi_byte_decoder is False: + for i in offsets: + chunk = decoded_payload[i : i + chunk_size] + if not chunk: + break + yield chunk + else: + for i in offsets: + chunk_end = i + chunk_size + if chunk_end > len(sequences) + 8: + continue + + cut_sequence = sequences[i : i + chunk_size] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode( + encoding_iana, + errors="ignore" if is_multi_byte_decoder else "strict", + ) + + # multi-byte bad cutting detector and adjustment + # not the cleanest way to perform that fix but clever enough for now. + if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80: + + chunk_partial_size_chk: int = min(chunk_size, 16) + + if ( + decoded_payload + and chunk[:chunk_partial_size_chk] not in decoded_payload + ): + for j in range(i, i - 4, -1): + cut_sequence = sequences[j:chunk_end] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode(encoding_iana, errors="ignore") + + if chunk[:chunk_partial_size_chk] in decoded_payload: + break + + yield chunk diff --git a/libs/common/charset_normalizer/version.py b/libs/common/charset_normalizer/version.py new file mode 100644 index 00000000..64c0dbde --- /dev/null +++ b/libs/common/charset_normalizer/version.py @@ -0,0 +1,6 @@ +""" +Expose version +""" + +__version__ = "2.1.1" +VERSION = __version__.split(".") diff --git a/libs/common/confuse/__init__.py b/libs/common/confuse/__init__.py index be5090c4..0e7bfd32 100644 --- a/libs/common/confuse/__init__.py +++ b/libs/common/confuse/__init__.py @@ -1,9 +1,7 @@ """Painless YAML configuration. """ -from __future__ import division, absolute_import, print_function - -__version__ = '1.7.0' +__version__ = '2.0.0' from .exceptions import * # NOQA from .util import * # NOQA diff --git a/libs/common/confuse/core.py b/libs/common/confuse/core.py index 6c6c4b09..7d00d9c1 100644 --- a/libs/common/confuse/core.py +++ b/libs/common/confuse/core.py @@ -15,8 +15,6 @@ """Worry-free YAML configuration files. """ -from __future__ import division, absolute_import, print_function - import errno import os import yaml @@ -165,23 +163,10 @@ class ConfigView(object): def __str__(self): """Get the value for this view as a bytestring. """ - if util.PY3: - return self.__unicode__() - else: - return bytes(self.get()) - - def __unicode__(self): - """Get the value for this view as a Unicode string. - """ - return util.STRING(self.get()) - - def __nonzero__(self): - """Gets the value for this view as a boolean. (Python 2 only.) - """ - return self.__bool__() + return str(self.get()) def __bool__(self): - """Gets the value for this view as a boolean. (Python 3 only.) + """Gets the value for this view as a bool. """ return bool(self.get()) @@ -441,7 +426,7 @@ class Subview(ConfigView): self.name += u'#{0}'.format(self.key) elif isinstance(self.key, bytes): self.name += self.key.decode('utf-8') - elif isinstance(self.key, util.STRING): + elif isinstance(self.key, str): self.name += self.key else: self.name += repr(self.key) diff --git a/libs/common/confuse/exceptions.py b/libs/common/confuse/exceptions.py index 782260ea..c277dd0b 100644 --- a/libs/common/confuse/exceptions.py +++ b/libs/common/confuse/exceptions.py @@ -1,5 +1,3 @@ -from __future__ import division, absolute_import, print_function - import yaml __all__ = [ diff --git a/libs/common/confuse/sources.py b/libs/common/confuse/sources.py index 2b0f53ba..ed0c5f49 100644 --- a/libs/common/confuse/sources.py +++ b/libs/common/confuse/sources.py @@ -1,6 +1,4 @@ -from __future__ import division, absolute_import, print_function - -from .util import BASESTRING, build_dict +from .util import build_dict from . import yaml_util import os @@ -27,8 +25,7 @@ class ConfigSource(dict): behavior. """ super(ConfigSource, self).__init__(value) - if (filename is not None - and not isinstance(filename, BASESTRING)): + if filename is not None and not isinstance(filename, str): raise TypeError(u'filename must be a string or None') self.filename = filename self.default = default diff --git a/libs/common/confuse/templates.py b/libs/common/confuse/templates.py index 1b56b28a..a46b57a7 100644 --- a/libs/common/confuse/templates.py +++ b/libs/common/confuse/templates.py @@ -1,29 +1,12 @@ -from __future__ import division, absolute_import, print_function - import os import re -import sys +import enum +import pathlib +from collections import abc from . import util from . import exceptions -try: - import enum - SUPPORTS_ENUM = True -except ImportError: - SUPPORTS_ENUM = False - -try: - import pathlib - SUPPORTS_PATHLIB = True -except ImportError: - SUPPORTS_PATHLIB = False - -if sys.version_info >= (3, 3): - from collections import abc -else: - import collections as abc - REQUIRED = object() """A sentinel indicating that there is no default value and an exception @@ -130,7 +113,7 @@ class Number(Template): def convert(self, value, view): """Check that the value is an int or a float. """ - if isinstance(value, util.NUMERIC_TYPES): + if isinstance(value, (int, float)): return value else: self.fail( @@ -243,7 +226,7 @@ class String(Template): def convert(self, value, view): """Check that the value is a string and matches the pattern. """ - if not isinstance(value, util.BASESTRING): + if not isinstance(value, str): self.fail(u'must be a string', view, True) if self.pattern and not self.regex.match(value): @@ -278,7 +261,7 @@ class Choice(Template): """Ensure that the value is among the choices (and remap if the choices are a mapping). """ - if (SUPPORTS_ENUM and isinstance(self.choices, type) + if (isinstance(self.choices, type) and issubclass(self.choices, enum.Enum)): try: return self.choices(value) @@ -385,7 +368,7 @@ class StrSeq(Template): self.split = split def _convert_value(self, x, view): - if isinstance(x, util.STRING): + if isinstance(x, str): return x elif isinstance(x, bytes): return x.decode('utf-8', 'ignore') @@ -396,7 +379,7 @@ class StrSeq(Template): if isinstance(value, bytes): value = value.decode('utf-8', 'ignore') - if isinstance(value, util.STRING): + if isinstance(value, str): if self.split: value = value.split() else: @@ -566,13 +549,13 @@ class Filename(Template): except exceptions.NotFoundError: return self.get_default_value(view.name) - if not isinstance(path, util.BASESTRING): + if not isinstance(path, str): self.fail( u'must be a filename, not {0}'.format(type(path).__name__), view, True ) - path = os.path.expanduser(util.STRING(path)) + path = os.path.expanduser(str(path)) if not os.path.isabs(path): if self.cwd is not None: @@ -602,9 +585,6 @@ class Path(Filename): Filenames are parsed equivalent to the `Filename` template and then converted to `pathlib.Path` objects. - - For Python 2 it returns the original path as returned by the `Filename` - template. """ def value(self, view, template=None): value = super(Path, self).value(view, template) @@ -709,15 +689,14 @@ def as_template(value): return Integer() elif isinstance(value, int): return Integer(value) - elif isinstance(value, type) and issubclass(value, util.BASESTRING): + elif isinstance(value, type) and issubclass(value, str): return String() - elif isinstance(value, util.BASESTRING): + elif isinstance(value, str): return String(value) elif isinstance(value, set): # convert to list to avoid hash related problems return Choice(list(value)) - elif (SUPPORTS_ENUM and isinstance(value, type) - and issubclass(value, enum.Enum)): + elif isinstance(value, type) and issubclass(value, enum.Enum): return Choice(value) elif isinstance(value, list): return OneOf(value) @@ -725,7 +704,7 @@ def as_template(value): return Number() elif isinstance(value, float): return Number(value) - elif SUPPORTS_PATHLIB and isinstance(value, pathlib.PurePath): + elif isinstance(value, pathlib.PurePath): return Path(value) elif value is None: return Template(None) diff --git a/libs/common/confuse/util.py b/libs/common/confuse/util.py index 70bd4569..c27e1618 100644 --- a/libs/common/confuse/util.py +++ b/libs/common/confuse/util.py @@ -1,5 +1,3 @@ -from __future__ import division, absolute_import, print_function - import os import sys import argparse @@ -8,12 +6,6 @@ import platform import pkgutil -PY3 = sys.version_info[0] == 3 -STRING = str if PY3 else unicode # noqa: F821 -BASESTRING = str if PY3 else basestring # noqa: F821 -NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) # noqa: F821 - - UNIX_DIR_FALLBACK = '~/.config' WINDOWS_DIR_VAR = 'APPDATA' WINDOWS_DIR_FALLBACK = '~\\AppData\\Roaming' @@ -75,7 +67,7 @@ def build_dict(obj, sep='', keep_none=False): return obj # Get keys iterator - keys = obj.keys() if PY3 else obj.iterkeys() + keys = obj.keys() if sep: # Splitting keys by `sep` needs sorted keys to prevent parents # from clobbering children diff --git a/libs/common/confuse/yaml_util.py b/libs/common/confuse/yaml_util.py index 2cb4b529..de12d459 100644 --- a/libs/common/confuse/yaml_util.py +++ b/libs/common/confuse/yaml_util.py @@ -1,9 +1,6 @@ -from __future__ import division, absolute_import, print_function - from collections import OrderedDict import yaml from .exceptions import ConfigReadError -from .util import BASESTRING # YAML loading. @@ -119,7 +116,7 @@ def parse_as_scalar(value, loader=Loader): - The empty string '' will return None """ # We only deal with strings - if not isinstance(value, BASESTRING): + if not isinstance(value, str): return value try: loader = loader('') diff --git a/libs/common/decorator.py b/libs/common/decorator.py index b1f8b567..2479b6f7 100644 --- a/libs/common/decorator.py +++ b/libs/common/decorator.py @@ -1,6 +1,6 @@ # ######################### LICENSE ############################ # -# Copyright (c) 2005-2018, Michele Simionato +# Copyright (c) 2005-2021, Michele Simionato # All rights reserved. # Redistribution and use in source and binary forms, with or without @@ -28,55 +28,26 @@ # DAMAGE. """ -Decorator module, see http://pypi.python.org/pypi/decorator +Decorator module, see +https://github.com/micheles/decorator/blob/master/docs/documentation.md for the documentation. """ -from __future__ import print_function - import re import sys import inspect import operator import itertools -import collections - -__version__ = '4.4.2' - -if sys.version_info >= (3,): - from inspect import getfullargspec - - def get_init(cls): - return cls.__init__ -else: - FullArgSpec = collections.namedtuple( - 'FullArgSpec', 'args varargs varkw defaults ' - 'kwonlyargs kwonlydefaults annotations') - - def getfullargspec(f): - "A quick and dirty replacement for getfullargspec for Python 2.X" - return FullArgSpec._make(inspect.getargspec(f) + ([], None, {})) - - def get_init(cls): - return cls.__init__.__func__ - -try: - iscoroutinefunction = inspect.iscoroutinefunction -except AttributeError: - # let's assume there are no coroutine functions in old Python - def iscoroutinefunction(f): - return False -try: - from inspect import isgeneratorfunction -except ImportError: - # assume no generator function in old Python versions - def isgeneratorfunction(caller): - return False +from contextlib import _GeneratorContextManager +from inspect import getfullargspec, iscoroutinefunction, isgeneratorfunction +__version__ = '5.1.1' DEF = re.compile(r'\s*def\s*([_\w][_\w\d]*)\s*\(') +POS = inspect.Parameter.POSITIONAL_OR_KEYWORD +EMPTY = inspect.Parameter.empty -# basic functionality +# this is not used anymore in the core, but kept for backward compatibility class FunctionMaker(object): """ An object with the ability to create functions with a given signature. @@ -100,7 +71,7 @@ class FunctionMaker(object): self.name = '_lambda_' self.doc = func.__doc__ self.module = func.__module__ - if inspect.isfunction(func): + if inspect.isroutine(func): argspec = getfullargspec(func) self.annotations = getattr(func, '__annotations__', {}) for a in ('args', 'varargs', 'varkw', 'defaults', 'kwonlyargs', @@ -143,7 +114,9 @@ class FunctionMaker(object): raise TypeError('You are decorating a non function: %s' % func) def update(self, func, **kw): - "Update the signature of func with the data in self" + """ + Update the signature of func with the data in self + """ func.__name__ = self.name func.__doc__ = getattr(self, 'doc', None) func.__dict__ = getattr(self, 'dict', {}) @@ -160,7 +133,9 @@ class FunctionMaker(object): func.__dict__.update(kw) def make(self, src_templ, evaldict=None, addsource=False, **attrs): - "Make a new function from a given template and update the signature" + """ + Make a new function from a given template and update the signature + """ src = src_templ % vars(self) # expand name and signature evaldict = evaldict or {} mo = DEF.search(src) @@ -221,106 +196,128 @@ class FunctionMaker(object): return self.make(body, evaldict, addsource, **attrs) -def decorate(func, caller, extras=()): +def fix(args, kwargs, sig): """ - decorate(func, caller) decorates a function using a caller. - If the caller is a generator function, the resulting function - will be a generator function. + Fix args and kwargs to be consistent with the signature """ - evaldict = dict(_call_=caller, _func_=func) - es = '' - for i, extra in enumerate(extras): - ex = '_e%d_' % i - evaldict[ex] = extra - es += ex + ', ' + ba = sig.bind(*args, **kwargs) + ba.apply_defaults() # needed for test_dan_schult + return ba.args, ba.kwargs - if '3.5' <= sys.version < '3.6': - # with Python 3.5 isgeneratorfunction returns True for all coroutines - # however we know that it is NOT possible to have a generator - # coroutine in python 3.5: PEP525 was not there yet - generatorcaller = isgeneratorfunction( - caller) and not iscoroutinefunction(caller) + +def decorate(func, caller, extras=(), kwsyntax=False): + """ + Decorates a function/generator/coroutine using a caller. + If kwsyntax is True calling the decorated functions with keyword + syntax will pass the named arguments inside the ``kw`` dictionary, + even if such argument are positional, similarly to what functools.wraps + does. By default kwsyntax is False and the the arguments are untouched. + """ + sig = inspect.signature(func) + if iscoroutinefunction(caller): + async def fun(*args, **kw): + if not kwsyntax: + args, kw = fix(args, kw, sig) + return await caller(func, *(extras + args), **kw) + elif isgeneratorfunction(caller): + def fun(*args, **kw): + if not kwsyntax: + args, kw = fix(args, kw, sig) + for res in caller(func, *(extras + args), **kw): + yield res else: - generatorcaller = isgeneratorfunction(caller) - if generatorcaller: - fun = FunctionMaker.create( - func, "for res in _call_(_func_, %s%%(shortsignature)s):\n" - " yield res" % es, evaldict, __wrapped__=func) - else: - fun = FunctionMaker.create( - func, "return _call_(_func_, %s%%(shortsignature)s)" % es, - evaldict, __wrapped__=func) - if hasattr(func, '__qualname__'): - fun.__qualname__ = func.__qualname__ + def fun(*args, **kw): + if not kwsyntax: + args, kw = fix(args, kw, sig) + return caller(func, *(extras + args), **kw) + fun.__name__ = func.__name__ + fun.__doc__ = func.__doc__ + fun.__wrapped__ = func + fun.__signature__ = sig + fun.__qualname__ = func.__qualname__ + # builtin functions like defaultdict.__setitem__ lack many attributes + try: + fun.__defaults__ = func.__defaults__ + except AttributeError: + pass + try: + fun.__kwdefaults__ = func.__kwdefaults__ + except AttributeError: + pass + try: + fun.__annotations__ = func.__annotations__ + except AttributeError: + pass + try: + fun.__module__ = func.__module__ + except AttributeError: + pass + try: + fun.__dict__.update(func.__dict__) + except AttributeError: + pass return fun -def decorator(caller, _func=None): - """decorator(caller) converts a caller function into a decorator""" +def decoratorx(caller): + """ + A version of "decorator" implemented via "exec" and not via the + Signature object. Use this if you are want to preserve the `.__code__` + object properties (https://github.com/micheles/decorator/issues/129). + """ + def dec(func): + return FunctionMaker.create( + func, + "return _call_(_func_, %(shortsignature)s)", + dict(_call_=caller, _func_=func), + __wrapped__=func, __qualname__=func.__qualname__) + return dec + + +def decorator(caller, _func=None, kwsyntax=False): + """ + decorator(caller) converts a caller function into a decorator + """ if _func is not None: # return a decorated function # this is obsolete behavior; you should use decorate instead - return decorate(_func, caller) + return decorate(_func, caller, (), kwsyntax) # else return a decorator function - defaultargs, defaults = '', () - if inspect.isclass(caller): - name = caller.__name__.lower() - doc = 'decorator(%s) converts functions/generators into ' \ - 'factories of %s objects' % (caller.__name__, caller.__name__) - elif inspect.isfunction(caller): - if caller.__name__ == '': - name = '_lambda_' + sig = inspect.signature(caller) + dec_params = [p for p in sig.parameters.values() if p.kind is POS] + + def dec(func=None, *args, **kw): + na = len(args) + 1 + extras = args + tuple(kw.get(p.name, p.default) + for p in dec_params[na:] + if p.default is not EMPTY) + if func is None: + return lambda func: decorate(func, caller, extras, kwsyntax) else: - name = caller.__name__ - doc = caller.__doc__ - nargs = caller.__code__.co_argcount - ndefs = len(caller.__defaults__ or ()) - defaultargs = ', '.join(caller.__code__.co_varnames[nargs-ndefs:nargs]) - if defaultargs: - defaultargs += ',' - defaults = caller.__defaults__ - else: # assume caller is an object with a __call__ method - name = caller.__class__.__name__.lower() - doc = caller.__call__.__doc__ - evaldict = dict(_call=caller, _decorate_=decorate) - dec = FunctionMaker.create( - '%s(func, %s)' % (name, defaultargs), - 'if func is None: return lambda func: _decorate_(func, _call, (%s))\n' - 'return _decorate_(func, _call, (%s))' % (defaultargs, defaultargs), - evaldict, doc=doc, module=caller.__module__, __wrapped__=caller) - if defaults: - dec.__defaults__ = (None,) + defaults + return decorate(func, caller, extras, kwsyntax) + dec.__signature__ = sig.replace(parameters=dec_params) + dec.__name__ = caller.__name__ + dec.__doc__ = caller.__doc__ + dec.__wrapped__ = caller + dec.__qualname__ = caller.__qualname__ + dec.__kwdefaults__ = getattr(caller, '__kwdefaults__', None) + dec.__dict__.update(caller.__dict__) return dec # ####################### contextmanager ####################### # -try: # Python >= 3.2 - from contextlib import _GeneratorContextManager -except ImportError: # Python >= 2.5 - from contextlib import GeneratorContextManager as _GeneratorContextManager - class ContextManager(_GeneratorContextManager): + def __init__(self, g, *a, **k): + _GeneratorContextManager.__init__(self, g, a, k) + def __call__(self, func): - """Context manager decorator""" - return FunctionMaker.create( - func, "with _self_: return _func_(%(shortsignature)s)", - dict(_self_=self, _func_=func), __wrapped__=func) + def caller(f, *a, **k): + with self.__class__(self.func, *self.args, **self.kwds): + return f(*a, **k) + return decorate(func, caller) -init = getfullargspec(_GeneratorContextManager.__init__) -n_args = len(init.args) -if n_args == 2 and not init.varargs: # (self, genobj) Python 2.7 - def __init__(self, g, *a, **k): - return _GeneratorContextManager.__init__(self, g(*a, **k)) - ContextManager.__init__ = __init__ -elif n_args == 2 and init.varargs: # (self, gen, *a, **k) Python 3.4 - pass -elif n_args == 4: # (self, gen, args, kwds) Python 3.5 - def __init__(self, g, *a, **k): - return _GeneratorContextManager.__init__(self, g, a, k) - ContextManager.__init__ = __init__ - _contextmanager = decorator(ContextManager) diff --git a/libs/common/distutils-precedence.pth b/libs/common/distutils-precedence.pth new file mode 100644 index 00000000..7f009fe9 --- /dev/null +++ b/libs/common/distutils-precedence.pth @@ -0,0 +1 @@ +import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'local') == 'local'; enabled and __import__('_distutils_hack').add_shim(); diff --git a/libs/common/dogpile/__init__.py b/libs/common/dogpile/__init__.py index c04e31db..2367ae03 100644 --- a/libs/common/dogpile/__init__.py +++ b/libs/common/dogpile/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.9.2' +__version__ = "1.1.8" from .lock import Lock # noqa from .lock import NeedRegenerationException # noqa diff --git a/libs/common/dogpile/cache/api.py b/libs/common/dogpile/cache/api.py index 4aef3519..0717d439 100644 --- a/libs/common/dogpile/cache/api.py +++ b/libs/common/dogpile/cache/api.py @@ -1,13 +1,19 @@ -import operator - -from ..util.compat import py3k +import abc +import pickle +from typing import Any +from typing import Callable +from typing import cast +from typing import Mapping +from typing import NamedTuple +from typing import Optional +from typing import Sequence +from typing import Union -class NoValue(object): +class NoValue: """Describe a missing cache value. - The :attr:`.NO_VALUE` module global - should be used. + The :data:`.NO_VALUE` constant should be used. """ @@ -22,51 +28,123 @@ class NoValue(object): """ return "" - if py3k: - - def __bool__(self): # pragma NO COVERAGE - return False - - else: - - def __nonzero__(self): # pragma NO COVERAGE - return False + def __bool__(self): # pragma NO COVERAGE + return False NO_VALUE = NoValue() """Value returned from ``get()`` that describes a key not present.""" +MetaDataType = Mapping[str, Any] -class CachedValue(tuple): + +KeyType = str +"""A cache key.""" + +ValuePayload = Any +"""An object to be placed in the cache against a key.""" + + +KeyManglerType = Callable[[KeyType], KeyType] +Serializer = Callable[[ValuePayload], bytes] +Deserializer = Callable[[bytes], ValuePayload] + + +class CacheMutex(abc.ABC): + """Describes a mutexing object with acquire and release methods. + + This is an abstract base class; any object that has acquire/release + methods may be used. + + .. versionadded:: 1.1 + + + .. seealso:: + + :meth:`.CacheBackend.get_mutex` - the backend method that optionally + returns this locking object. + + """ + + @abc.abstractmethod + def acquire(self, wait: bool = True) -> bool: + """Acquire the mutex. + + :param wait: if True, block until available, else return True/False + immediately. + + :return: True if the lock succeeded. + + """ + raise NotImplementedError() + + @abc.abstractmethod + def release(self) -> None: + """Release the mutex.""" + + raise NotImplementedError() + + @abc.abstractmethod + def locked(self) -> bool: + """Check if the mutex was acquired. + + :return: true if the lock is acquired. + + .. versionadded:: 1.1.2 + + """ + raise NotImplementedError() + + @classmethod + def __subclasshook__(cls, C): + return hasattr(C, "acquire") and hasattr(C, "release") + + +class CachedValue(NamedTuple): """Represent a value stored in the cache. :class:`.CachedValue` is a two-tuple of ``(payload, metadata)``, where ``metadata`` is dogpile.cache's tracking information ( - currently the creation time). The metadata - and tuple structure is pickleable, if - the backend requires serialization. + currently the creation time). """ - payload = property(operator.itemgetter(0)) - """Named accessor for the payload.""" + payload: ValuePayload - metadata = property(operator.itemgetter(1)) - """Named accessor for the dogpile.cache metadata dictionary.""" - - def __new__(cls, payload, metadata): - return tuple.__new__(cls, (payload, metadata)) - - def __reduce__(self): - return CachedValue, (self.payload, self.metadata) + metadata: MetaDataType -class CacheBackend(object): - """Base class for backend implementations.""" +CacheReturnType = Union[CachedValue, NoValue] +"""The non-serialized form of what may be returned from a backend +get method. - key_mangler = None +""" + +SerializedReturnType = Union[bytes, NoValue] +"""the serialized form of what may be returned from a backend get method.""" + +BackendFormatted = Union[CacheReturnType, SerializedReturnType] +"""Describes the type returned from the :meth:`.CacheBackend.get` method.""" + +BackendSetType = Union[CachedValue, bytes] +"""Describes the value argument passed to the :meth:`.CacheBackend.set` +method.""" + +BackendArguments = Mapping[str, Any] + + +class CacheBackend: + """Base class for backend implementations. + + Backends which set and get Python object values should subclass this + backend. For backends in which the value that's stored is ultimately + a stream of bytes, the :class:`.BytesBackend` should be used. + + """ + + key_mangler: Optional[Callable[[KeyType], KeyType]] = None """Key mangling function. May be None, or otherwise declared @@ -74,7 +152,23 @@ class CacheBackend(object): """ - def __init__(self, arguments): # pragma NO COVERAGE + serializer: Union[None, Serializer] = None + """Serializer function that will be used by default if not overridden + by the region. + + .. versionadded:: 1.1 + + """ + + deserializer: Union[None, Deserializer] = None + """deserializer function that will be used by default if not overridden + by the region. + + .. versionadded:: 1.1 + + """ + + def __init__(self, arguments: BackendArguments): # pragma NO COVERAGE """Construct a new :class:`.CacheBackend`. Subclasses should override this to @@ -97,10 +191,10 @@ class CacheBackend(object): ) ) - def has_lock_timeout(self): + def has_lock_timeout(self) -> bool: return False - def get_mutex(self, key): + def get_mutex(self, key: KeyType) -> Optional[CacheMutex]: """Return an optional mutexing object for the given key. This object need only provide an ``acquire()`` @@ -133,48 +227,141 @@ class CacheBackend(object): """ return None - def get(self, key): # pragma NO COVERAGE - """Retrieve a value from the cache. + def get(self, key: KeyType) -> BackendFormatted: # pragma NO COVERAGE + """Retrieve an optionally serialized value from the cache. - The returned value should be an instance of - :class:`.CachedValue`, or ``NO_VALUE`` if - not present. + :param key: String key that was passed to the :meth:`.CacheRegion.get` + method, which will also be processed by the "key mangling" function + if one was present. + + :return: the Python object that corresponds to + what was established via the :meth:`.CacheBackend.set` method, + or the :data:`.NO_VALUE` constant if not present. + + If a serializer is in use, this method will only be called if the + :meth:`.CacheBackend.get_serialized` method is not overridden. """ raise NotImplementedError() - def get_multi(self, keys): # pragma NO COVERAGE - """Retrieve multiple values from the cache. + def get_multi( + self, keys: Sequence[KeyType] + ) -> Sequence[BackendFormatted]: # pragma NO COVERAGE + """Retrieve multiple optionally serialized values from the cache. - The returned value should be a list, corresponding - to the list of keys given. + :param keys: sequence of string keys that was passed to the + :meth:`.CacheRegion.get_multi` method, which will also be processed + by the "key mangling" function if one was present. + + :return a list of values as would be returned + individually via the :meth:`.CacheBackend.get` method, corresponding + to the list of keys given. + + If a serializer is in use, this method will only be called if the + :meth:`.CacheBackend.get_serialized_multi` method is not overridden. .. versionadded:: 0.5.0 """ raise NotImplementedError() - def set(self, key, value): # pragma NO COVERAGE - """Set a value in the cache. + def get_serialized(self, key: KeyType) -> SerializedReturnType: + """Retrieve a serialized value from the cache. - The key will be whatever was passed - to the registry, processed by the - "key mangling" function, if any. - The value will always be an instance - of :class:`.CachedValue`. + :param key: String key that was passed to the :meth:`.CacheRegion.get` + method, which will also be processed by the "key mangling" function + if one was present. + + :return: a bytes object, or :data:`.NO_VALUE` + constant if not present. + + The default implementation of this method for :class:`.CacheBackend` + returns the value of the :meth:`.CacheBackend.get` method. + + .. versionadded:: 1.1 + + .. seealso:: + + :class:`.BytesBackend` + + """ + return cast(SerializedReturnType, self.get(key)) + + def get_serialized_multi( + self, keys: Sequence[KeyType] + ) -> Sequence[SerializedReturnType]: # pragma NO COVERAGE + """Retrieve multiple serialized values from the cache. + + :param keys: sequence of string keys that was passed to the + :meth:`.CacheRegion.get_multi` method, which will also be processed + by the "key mangling" function if one was present. + + :return: list of bytes objects + + The default implementation of this method for :class:`.CacheBackend` + returns the value of the :meth:`.CacheBackend.get_multi` method. + + .. versionadded:: 1.1 + + .. seealso:: + + :class:`.BytesBackend` + + """ + return cast(Sequence[SerializedReturnType], self.get_multi(keys)) + + def set( + self, key: KeyType, value: BackendSetType + ) -> None: # pragma NO COVERAGE + """Set an optionally serialized value in the cache. + + :param key: String key that was passed to the :meth:`.CacheRegion.set` + method, which will also be processed by the "key mangling" function + if one was present. + + :param value: The optionally serialized :class:`.CachedValue` object. + May be an instance of :class:`.CachedValue` or a bytes object + depending on if a serializer is in use with the region and if the + :meth:`.CacheBackend.set_serialized` method is not overridden. + + .. seealso:: + + :meth:`.CacheBackend.set_serialized` """ raise NotImplementedError() - def set_multi(self, mapping): # pragma NO COVERAGE + def set_serialized( + self, key: KeyType, value: bytes + ) -> None: # pragma NO COVERAGE + """Set a serialized value in the cache. + + :param key: String key that was passed to the :meth:`.CacheRegion.set` + method, which will also be processed by the "key mangling" function + if one was present. + + :param value: a bytes object to be stored. + + The default implementation of this method for :class:`.CacheBackend` + calls upon the :meth:`.CacheBackend.set` method. + + .. versionadded:: 1.1 + + .. seealso:: + + :class:`.BytesBackend` + + """ + self.set(key, value) + + def set_multi( + self, mapping: Mapping[KeyType, BackendSetType] + ) -> None: # pragma NO COVERAGE """Set multiple values in the cache. - ``mapping`` is a dict in which - the key will be whatever was passed - to the registry, processed by the - "key mangling" function, if any. - The value will always be an instance - of :class:`.CachedValue`. + :param mapping: a dict in which the key will be whatever was passed to + the :meth:`.CacheRegion.set_multi` method, processed by the "key + mangling" function, if any. When implementing a new :class:`.CacheBackend` or cutomizing via :class:`.ProxyBackend`, be aware that when this method is invoked by @@ -184,17 +371,52 @@ class CacheBackend(object): -- that will have the undesirable effect of modifying the returned values as well. + If a serializer is in use, this method will only be called if the + :meth:`.CacheBackend.set_serialized_multi` method is not overridden. + + .. versionadded:: 0.5.0 """ raise NotImplementedError() - def delete(self, key): # pragma NO COVERAGE + def set_serialized_multi( + self, mapping: Mapping[KeyType, bytes] + ) -> None: # pragma NO COVERAGE + """Set multiple serialized values in the cache. + + :param mapping: a dict in which the key will be whatever was passed to + the :meth:`.CacheRegion.set_multi` method, processed by the "key + mangling" function, if any. + + When implementing a new :class:`.CacheBackend` or cutomizing via + :class:`.ProxyBackend`, be aware that when this method is invoked by + :meth:`.Region.get_or_create_multi`, the ``mapping`` values are the + same ones returned to the upstream caller. If the subclass alters the + values in any way, it must not do so 'in-place' on the ``mapping`` dict + -- that will have the undesirable effect of modifying the returned + values as well. + + .. versionadded:: 1.1 + + The default implementation of this method for :class:`.CacheBackend` + calls upon the :meth:`.CacheBackend.set_multi` method. + + .. seealso:: + + :class:`.BytesBackend` + + + """ + self.set_multi(mapping) + + def delete(self, key: KeyType) -> None: # pragma NO COVERAGE """Delete a value from the cache. - The key will be whatever was passed - to the registry, processed by the - "key mangling" function, if any. + :param key: String key that was passed to the + :meth:`.CacheRegion.delete` + method, which will also be processed by the "key mangling" function + if one was present. The behavior here should be idempotent, that is, can be called any number of times @@ -203,12 +425,14 @@ class CacheBackend(object): """ raise NotImplementedError() - def delete_multi(self, keys): # pragma NO COVERAGE + def delete_multi( + self, keys: Sequence[KeyType] + ) -> None: # pragma NO COVERAGE """Delete multiple values from the cache. - The key will be whatever was passed - to the registry, processed by the - "key mangling" function, if any. + :param keys: sequence of string keys that was passed to the + :meth:`.CacheRegion.delete_multi` method, which will also be processed + by the "key mangling" function if one was present. The behavior here should be idempotent, that is, can be called any number of times @@ -219,3 +443,95 @@ class CacheBackend(object): """ raise NotImplementedError() + + +class DefaultSerialization: + serializer: Union[None, Serializer] = staticmethod( # type: ignore + pickle.dumps + ) + deserializer: Union[None, Deserializer] = staticmethod( # type: ignore + pickle.loads + ) + + +class BytesBackend(DefaultSerialization, CacheBackend): + """A cache backend that receives and returns series of bytes. + + This backend only supports the "serialized" form of values; subclasses + should implement :meth:`.BytesBackend.get_serialized`, + :meth:`.BytesBackend.get_serialized_multi`, + :meth:`.BytesBackend.set_serialized`, + :meth:`.BytesBackend.set_serialized_multi`. + + .. versionadded:: 1.1 + + """ + + def get_serialized(self, key: KeyType) -> SerializedReturnType: + """Retrieve a serialized value from the cache. + + :param key: String key that was passed to the :meth:`.CacheRegion.get` + method, which will also be processed by the "key mangling" function + if one was present. + + :return: a bytes object, or :data:`.NO_VALUE` + constant if not present. + + .. versionadded:: 1.1 + + """ + raise NotImplementedError() + + def get_serialized_multi( + self, keys: Sequence[KeyType] + ) -> Sequence[SerializedReturnType]: # pragma NO COVERAGE + """Retrieve multiple serialized values from the cache. + + :param keys: sequence of string keys that was passed to the + :meth:`.CacheRegion.get_multi` method, which will also be processed + by the "key mangling" function if one was present. + + :return: list of bytes objects + + .. versionadded:: 1.1 + + """ + raise NotImplementedError() + + def set_serialized( + self, key: KeyType, value: bytes + ) -> None: # pragma NO COVERAGE + """Set a serialized value in the cache. + + :param key: String key that was passed to the :meth:`.CacheRegion.set` + method, which will also be processed by the "key mangling" function + if one was present. + + :param value: a bytes object to be stored. + + .. versionadded:: 1.1 + + """ + raise NotImplementedError() + + def set_serialized_multi( + self, mapping: Mapping[KeyType, bytes] + ) -> None: # pragma NO COVERAGE + """Set multiple serialized values in the cache. + + :param mapping: a dict in which the key will be whatever was passed to + the :meth:`.CacheRegion.set_multi` method, processed by the "key + mangling" function, if any. + + When implementing a new :class:`.CacheBackend` or cutomizing via + :class:`.ProxyBackend`, be aware that when this method is invoked by + :meth:`.Region.get_or_create_multi`, the ``mapping`` values are the + same ones returned to the upstream caller. If the subclass alters the + values in any way, it must not do so 'in-place' on the ``mapping`` dict + -- that will have the undesirable effect of modifying the returned + values as well. + + .. versionadded:: 1.1 + + """ + raise NotImplementedError() diff --git a/libs/common/dogpile/cache/backends/__init__.py b/libs/common/dogpile/cache/backends/__init__.py index a2c1d6e4..e3d90400 100644 --- a/libs/common/dogpile/cache/backends/__init__.py +++ b/libs/common/dogpile/cache/backends/__init__.py @@ -24,6 +24,11 @@ register_backend( "dogpile.cache.backends.memcached", "MemcachedBackend", ) +register_backend( + "dogpile.cache.pymemcache", + "dogpile.cache.backends.memcached", + "PyMemcacheBackend", +) register_backend( "dogpile.cache.memory", "dogpile.cache.backends.memory", "MemoryBackend" ) @@ -35,3 +40,8 @@ register_backend( register_backend( "dogpile.cache.redis", "dogpile.cache.backends.redis", "RedisBackend" ) +register_backend( + "dogpile.cache.redis_sentinel", + "dogpile.cache.backends.redis", + "RedisSentinelBackend", +) diff --git a/libs/common/dogpile/cache/backends/file.py b/libs/common/dogpile/cache/backends/file.py index c78fc0cf..bc52d8bc 100644 --- a/libs/common/dogpile/cache/backends/file.py +++ b/libs/common/dogpile/cache/backends/file.py @@ -9,17 +9,18 @@ Provides backends that deal with local filesystem access. from __future__ import with_statement from contextlib import contextmanager +import dbm import os +import threading -from ..api import CacheBackend +from ..api import BytesBackend from ..api import NO_VALUE from ... import util -from ...util import compat __all__ = ["DBMBackend", "FileLock", "AbstractFileLock"] -class DBMBackend(CacheBackend): +class DBMBackend(BytesBackend): """A file-backend using a dbm file to store keys. Basic usage:: @@ -156,12 +157,6 @@ class DBMBackend(CacheBackend): util.KeyReentrantMutex.factory, ) - # TODO: make this configurable - if compat.py3k: - import dbm - else: - import anydbm as dbm - self.dbmmodule = dbm self._init_dbm_file() def _init_lock(self, argument, suffix, basedir, basefile, wrapper=None): @@ -185,7 +180,7 @@ class DBMBackend(CacheBackend): exists = True break if not exists: - fh = self.dbmmodule.open(self.filename, "c") + fh = dbm.open(self.filename, "c") fh.close() def get_mutex(self, key): @@ -215,57 +210,50 @@ class DBMBackend(CacheBackend): @contextmanager def _dbm_file(self, write): with self._use_rw_lock(write): - dbm = self.dbmmodule.open(self.filename, "w" if write else "r") - yield dbm - dbm.close() + with dbm.open(self.filename, "w" if write else "r") as dbm_obj: + yield dbm_obj - def get(self, key): - with self._dbm_file(False) as dbm: - if hasattr(dbm, "get"): - value = dbm.get(key, NO_VALUE) + def get_serialized(self, key): + with self._dbm_file(False) as dbm_obj: + if hasattr(dbm_obj, "get"): + value = dbm_obj.get(key, NO_VALUE) else: # gdbm objects lack a .get method try: - value = dbm[key] + value = dbm_obj[key] except KeyError: value = NO_VALUE - if value is not NO_VALUE: - value = compat.pickle.loads(value) return value - def get_multi(self, keys): - return [self.get(key) for key in keys] + def get_serialized_multi(self, keys): + return [self.get_serialized(key) for key in keys] - def set(self, key, value): - with self._dbm_file(True) as dbm: - dbm[key] = compat.pickle.dumps( - value, compat.pickle.HIGHEST_PROTOCOL - ) + def set_serialized(self, key, value): + with self._dbm_file(True) as dbm_obj: + dbm_obj[key] = value - def set_multi(self, mapping): - with self._dbm_file(True) as dbm: + def set_serialized_multi(self, mapping): + with self._dbm_file(True) as dbm_obj: for key, value in mapping.items(): - dbm[key] = compat.pickle.dumps( - value, compat.pickle.HIGHEST_PROTOCOL - ) + dbm_obj[key] = value def delete(self, key): - with self._dbm_file(True) as dbm: + with self._dbm_file(True) as dbm_obj: try: - del dbm[key] + del dbm_obj[key] except KeyError: pass def delete_multi(self, keys): - with self._dbm_file(True) as dbm: + with self._dbm_file(True) as dbm_obj: for key in keys: try: - del dbm[key] + del dbm_obj[key] except KeyError: pass -class AbstractFileLock(object): +class AbstractFileLock: """Coordinate read/write access to a file. typically is a file-based lock but doesn't necessarily have to be. @@ -397,7 +385,7 @@ class FileLock(AbstractFileLock): """ def __init__(self, filename): - self._filedescriptor = compat.threading.local() + self._filedescriptor = threading.local() self.filename = filename @util.memoized_property diff --git a/libs/common/dogpile/cache/backends/memcached.py b/libs/common/dogpile/cache/backends/memcached.py index a7c2329c..2fe30c7a 100644 --- a/libs/common/dogpile/cache/backends/memcached.py +++ b/libs/common/dogpile/cache/backends/memcached.py @@ -7,29 +7,42 @@ Provides backends for talking to `memcached `_. """ import random +import threading import time +import typing +from typing import Any +from typing import Mapping +import warnings from ..api import CacheBackend from ..api import NO_VALUE from ... import util -from ...util import compat + + +if typing.TYPE_CHECKING: + import bmemcached + import memcache + import pylibmc + import pymemcache +else: + # delayed import + bmemcached = None # noqa F811 + memcache = None # noqa F811 + pylibmc = None # noqa F811 + pymemcache = None # noqa F811 __all__ = ( "GenericMemcachedBackend", "MemcachedBackend", "PylibmcBackend", + "PyMemcacheBackend", "BMemcachedBackend", "MemcachedLock", ) class MemcachedLock(object): - """Simple distributed lock using memcached. - - This is an adaptation of the lock featured at - http://amix.dk/blog/post/19386 - - """ + """Simple distributed lock using memcached.""" def __init__(self, client_fn, key, timeout=0): self.client_fn = client_fn @@ -45,11 +58,15 @@ class MemcachedLock(object): elif not wait: return False else: - sleep_time = (((i + 1) * random.random()) + 2 ** i) / 2.5 + sleep_time = (((i + 1) * random.random()) + 2**i) / 2.5 time.sleep(sleep_time) if i < 15: i += 1 + def locked(self): + client = self.client_fn() + return client.get(self.key) is not None + def release(self): client = self.client_fn() client.delete(self.key) @@ -107,10 +124,17 @@ class GenericMemcachedBackend(CacheBackend): """ - set_arguments = {} + set_arguments: Mapping[str, Any] = {} """Additional arguments which will be passed to the :meth:`set` method.""" + # No need to override serializer, as all the memcached libraries + # handles that themselves. Still, we support customizing the + # serializer/deserializer to use better default pickle protocol + # or completely different serialization mechanism + serializer = None + deserializer = None + def __init__(self, arguments): self._imports() # using a plain threading.local here. threading.local @@ -138,7 +162,7 @@ class GenericMemcachedBackend(CacheBackend): def _clients(self): backend = self - class ClientPool(compat.threading.local): + class ClientPool(threading.local): def __init__(self): self.memcached = backend._create_client() @@ -173,12 +197,17 @@ class GenericMemcachedBackend(CacheBackend): def get_multi(self, keys): values = self.client.get_multi(keys) - return [NO_VALUE if key not in values else values[key] for key in keys] + + return [ + NO_VALUE if val is None else val + for val in [values.get(key, NO_VALUE) for key in keys] + ] def set(self, key, value): self.client.set(key, value, **self.set_arguments) def set_multi(self, mapping): + mapping = {key: value for key, value in mapping.items()} self.client.set_multi(mapping, **self.set_arguments) def delete(self, key): @@ -188,10 +217,9 @@ class GenericMemcachedBackend(CacheBackend): self.client.delete_multi(keys) -class MemcacheArgs(object): +class MemcacheArgs(GenericMemcachedBackend): """Mixin which provides support for the 'time' argument to set(), 'min_compress_len' to other methods. - """ def __init__(self, arguments): @@ -207,9 +235,6 @@ class MemcacheArgs(object): super(MemcacheArgs, self).__init__(arguments) -pylibmc = None - - class PylibmcBackend(MemcacheArgs, GenericMemcachedBackend): """A backend for the `pylibmc `_ @@ -259,9 +284,6 @@ class PylibmcBackend(MemcacheArgs, GenericMemcachedBackend): ) -memcache = None - - class MemcachedBackend(MemcacheArgs, GenericMemcachedBackend): """A backend using the standard `Python-memcached `_ memcached client. - This is a pure Python memcached client which - includes the ability to authenticate with a memcached - server using SASL. + This is a pure Python memcached client which includes + security features like SASL and SSL/TLS. + + SASL is a standard for adding authentication mechanisms + to protocols in a way that is protocol independent. A typical configuration using username/password:: @@ -317,6 +363,25 @@ class BMemcachedBackend(GenericMemcachedBackend): } ) + A typical configuration using tls_context:: + + import ssl + from dogpile.cache import make_region + + ctx = ssl.create_default_context(cafile="/path/to/my-ca.pem") + + region = make_region().configure( + 'dogpile.cache.bmemcached', + expiration_time = 3600, + arguments = { + 'url':["127.0.0.1"], + 'tls_context':ctx, + } + ) + + For advanced ways to configure TLS creating a more complex + tls_context visit https://docs.python.org/3/library/ssl.html + Arguments which can be passed to the ``arguments`` dictionary include: @@ -324,12 +389,17 @@ class BMemcachedBackend(GenericMemcachedBackend): SASL authentication. :param password: optional password, will be used for SASL authentication. + :param tls_context: optional TLS context, will be used for + TLS connections. + + .. versionadded:: 1.0.2 """ def __init__(self, arguments): self.username = arguments.get("username", None) self.password = arguments.get("password", None) + self.tls_context = arguments.get("tls_context", None) super(BMemcachedBackend, self).__init__(arguments) def _imports(self): @@ -355,10 +425,215 @@ class BMemcachedBackend(GenericMemcachedBackend): def _create_client(self): return self.Client( - self.url, username=self.username, password=self.password + self.url, + username=self.username, + password=self.password, + tls_context=self.tls_context, ) def delete_multi(self, keys): """python-binary-memcached api does not implements delete_multi""" for key in keys: self.delete(key) + + +class PyMemcacheBackend(GenericMemcachedBackend): + """A backend for the + `pymemcache `_ + memcached client. + + A comprehensive, fast, pure Python memcached client + + .. versionadded:: 1.1.2 + + pymemcache supports the following features: + + * Complete implementation of the memcached text protocol. + * Configurable timeouts for socket connect and send/recv calls. + * Access to the "noreply" flag, which can significantly increase + the speed of writes. + * Flexible, simple approach to serialization and deserialization. + * The (optional) ability to treat network and memcached errors as + cache misses. + + dogpile.cache uses the ``HashClient`` from pymemcache in order to reduce + API differences when compared to other memcached client drivers. + This allows the user to provide a single server or a list of memcached + servers. + + Arguments which can be passed to the ``arguments`` + dictionary include: + + :param tls_context: optional TLS context, will be used for + TLS connections. + + A typical configuration using tls_context:: + + import ssl + from dogpile.cache import make_region + + ctx = ssl.create_default_context(cafile="/path/to/my-ca.pem") + + region = make_region().configure( + 'dogpile.cache.pymemcache', + expiration_time = 3600, + arguments = { + 'url':["127.0.0.1"], + 'tls_context':ctx, + } + ) + + .. seealso:: + + ``_ - additional TLS + documentation. + + :param serde: optional "serde". Defaults to + ``pymemcache.serde.pickle_serde``. + + :param default_noreply: defaults to False. When set to True this flag + enables the pymemcache "noreply" feature. See the pymemcache + documentation for further details. + + :param socket_keepalive: optional socket keepalive, will be used for + TCP keepalive configuration. Use of this parameter requires pymemcache + 3.5.0 or greater. This parameter + accepts a + `pymemcache.client.base.KeepAliveOpts + `_ + object. + + A typical configuration using ``socket_keepalive``:: + + from pymemcache import KeepaliveOpts + from dogpile.cache import make_region + + # Using the default keepalive configuration + socket_keepalive = KeepaliveOpts() + + region = make_region().configure( + 'dogpile.cache.pymemcache', + expiration_time = 3600, + arguments = { + 'url':["127.0.0.1"], + 'socket_keepalive': socket_keepalive + } + ) + + .. versionadded:: 1.1.4 - added support for ``socket_keepalive``. + + :param enable_retry_client: optional flag to enable retry client + mechanisms to handle failure. Defaults to False. When set to ``True``, + the :paramref:`.PyMemcacheBackend.retry_attempts` parameter must also + be set, along with optional parameters + :paramref:`.PyMemcacheBackend.retry_delay`. + :paramref:`.PyMemcacheBackend.retry_for`, + :paramref:`.PyMemcacheBackend.do_not_retry_for`. + + .. seealso:: + + ``_ - + in the pymemcache documentation + + .. versionadded:: 1.1.4 + + :param retry_attempts: how many times to attempt an action with + pymemcache's retrying wrapper before failing. Must be 1 or above. + Defaults to None. + + .. versionadded:: 1.1.4 + + :param retry_delay: optional int|float, how many seconds to sleep between + each attempt. Used by the retry wrapper. Defaults to None. + + .. versionadded:: 1.1.4 + + :param retry_for: optional None|tuple|set|list, what exceptions to + allow retries for. Will allow retries for all exceptions if None. + Example: ``(MemcacheClientError, MemcacheUnexpectedCloseError)`` + Accepts any class that is a subclass of Exception. Defaults to None. + + .. versionadded:: 1.1.4 + + :param do_not_retry_for: optional None|tuple|set|list, what + exceptions should be retried. Will not block retries for any Exception if + None. Example: ``(IOError, MemcacheIllegalInputError)`` + Accepts any class that is a subclass of Exception. Defaults to None. + + .. versionadded:: 1.1.4 + + :param hashclient_retry_attempts: Amount of times a client should be tried + before it is marked dead and removed from the pool in the HashClient's + internal mechanisms. + + .. versionadded:: 1.1.5 + + :param hashclient_retry_timeout: Time in seconds that should pass between + retry attempts in the HashClient's internal mechanisms. + + .. versionadded:: 1.1.5 + + :param dead_timeout: Time in seconds before attempting to add a node + back in the pool in the HashClient's internal mechanisms. + + .. versionadded:: 1.1.5 + + """ # noqa E501 + + def __init__(self, arguments): + super().__init__(arguments) + + self.serde = arguments.get("serde", pymemcache.serde.pickle_serde) + self.default_noreply = arguments.get("default_noreply", False) + self.tls_context = arguments.get("tls_context", None) + self.socket_keepalive = arguments.get("socket_keepalive", None) + self.enable_retry_client = arguments.get("enable_retry_client", False) + self.retry_attempts = arguments.get("retry_attempts", None) + self.retry_delay = arguments.get("retry_delay", None) + self.retry_for = arguments.get("retry_for", None) + self.do_not_retry_for = arguments.get("do_not_retry_for", None) + self.hashclient_retry_attempts = arguments.get( + "hashclient_retry_attempts", 2 + ) + self.hashclient_retry_timeout = arguments.get( + "hashclient_retry_timeout", 1 + ) + self.dead_timeout = arguments.get("hashclient_dead_timeout", 60) + if ( + self.retry_delay is not None + or self.retry_attempts is not None + or self.retry_for is not None + or self.do_not_retry_for is not None + ) and not self.enable_retry_client: + warnings.warn( + "enable_retry_client is not set; retry options " + "will be ignored" + ) + + def _imports(self): + global pymemcache + import pymemcache + + def _create_client(self): + _kwargs = { + "serde": self.serde, + "default_noreply": self.default_noreply, + "tls_context": self.tls_context, + "retry_attempts": self.hashclient_retry_attempts, + "retry_timeout": self.hashclient_retry_timeout, + "dead_timeout": self.dead_timeout, + } + if self.socket_keepalive is not None: + _kwargs.update({"socket_keepalive": self.socket_keepalive}) + + client = pymemcache.client.hash.HashClient(self.url, **_kwargs) + if self.enable_retry_client: + return pymemcache.client.retrying.RetryingClient( + client, + attempts=self.retry_attempts, + retry_delay=self.retry_delay, + retry_for=self.retry_for, + do_not_retry_for=self.do_not_retry_for, + ) + + return client diff --git a/libs/common/dogpile/cache/backends/memory.py b/libs/common/dogpile/cache/backends/memory.py index 92dde6bc..f09b3029 100644 --- a/libs/common/dogpile/cache/backends/memory.py +++ b/libs/common/dogpile/cache/backends/memory.py @@ -10,9 +10,10 @@ places the value as given into the dictionary. """ + from ..api import CacheBackend +from ..api import DefaultSerialization from ..api import NO_VALUE -from ...util.compat import pickle class MemoryBackend(CacheBackend): @@ -49,36 +50,20 @@ class MemoryBackend(CacheBackend): """ - pickle_values = False - def __init__(self, arguments): self._cache = arguments.pop("cache_dict", {}) def get(self, key): - value = self._cache.get(key, NO_VALUE) - if value is not NO_VALUE and self.pickle_values: - value = pickle.loads(value) - return value + return self._cache.get(key, NO_VALUE) def get_multi(self, keys): - ret = [self._cache.get(key, NO_VALUE) for key in keys] - if self.pickle_values: - ret = [ - pickle.loads(value) if value is not NO_VALUE else value - for value in ret - ] - return ret + return [self._cache.get(key, NO_VALUE) for key in keys] def set(self, key, value): - if self.pickle_values: - value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL) self._cache[key] = value def set_multi(self, mapping): - pickle_values = self.pickle_values for key, value in mapping.items(): - if pickle_values: - value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL) self._cache[key] = value def delete(self, key): @@ -89,7 +74,7 @@ class MemoryBackend(CacheBackend): self._cache.pop(key, None) -class MemoryPickleBackend(MemoryBackend): +class MemoryPickleBackend(DefaultSerialization, MemoryBackend): """A backend that uses a plain dictionary, but serializes objects on :meth:`.MemoryBackend.set` and deserializes :meth:`.MemoryBackend.get`. @@ -120,5 +105,3 @@ class MemoryPickleBackend(MemoryBackend): .. versionadded:: 0.5.3 """ - - pickle_values = True diff --git a/libs/common/dogpile/cache/backends/null.py b/libs/common/dogpile/cache/backends/null.py index 6ab4cb4f..b4ad0fb3 100644 --- a/libs/common/dogpile/cache/backends/null.py +++ b/libs/common/dogpile/cache/backends/null.py @@ -24,6 +24,9 @@ class NullLock(object): def release(self): pass + def locked(self): + return False + class NullBackend(CacheBackend): """A "null" backend that effectively disables all cache operations. diff --git a/libs/common/dogpile/cache/backends/redis.py b/libs/common/dogpile/cache/backends/redis.py index 348d91a2..1a6ba09a 100644 --- a/libs/common/dogpile/cache/backends/redis.py +++ b/libs/common/dogpile/cache/backends/redis.py @@ -8,20 +8,23 @@ Provides backends for talking to `Redis `_. from __future__ import absolute_import +import typing import warnings -from ..api import CacheBackend +from ..api import BytesBackend from ..api import NO_VALUE -from ...util.compat import pickle -from ...util.compat import u -redis = None +if typing.TYPE_CHECKING: + import redis +else: + # delayed import + redis = None # noqa F811 -__all__ = ("RedisBackend",) +__all__ = ("RedisBackend", "RedisSentinelBackend") -class RedisBackend(CacheBackend): - """A `Redis `_ backend, using the +class RedisBackend(BytesBackend): + r"""A `Redis `_ backend, using the `redis-py `_ backend. Example configuration:: @@ -46,14 +49,10 @@ class RedisBackend(CacheBackend): :param url: string. If provided, will override separate host/port/db params. The format is that accepted by ``StrictRedis.from_url()``. - .. versionadded:: 0.4.1 - :param host: string, default is ``localhost``. :param password: string, default is no password. - .. versionadded:: 0.4.1 - :param port: integer, default is ``6379``. :param db: integer, default is ``0``. @@ -71,33 +70,31 @@ class RedisBackend(CacheBackend): Redis should expire it. This argument is only valid when ``distributed_lock`` is ``True``. - .. versionadded:: 0.5.0 - :param socket_timeout: float, seconds for socket timeout. Default is None (no timeout). - .. versionadded:: 0.5.4 - :param lock_sleep: integer, number of seconds to sleep when failed to acquire a lock. This argument is only valid when ``distributed_lock`` is ``True``. - .. versionadded:: 0.5.0 - :param connection_pool: ``redis.ConnectionPool`` object. If provided, this object supersedes other connection arguments passed to the ``redis.StrictRedis`` instance, including url and/or host as well as socket_timeout, and will be passed to ``redis.StrictRedis`` as the source of connectivity. - .. versionadded:: 0.5.4 - :param thread_local_lock: bool, whether a thread-local Redis lock object should be used. This is the default, but is not compatible with asynchronous runners, as they run in a different thread than the one used to create the lock. - .. versionadded:: 0.9.1 + :param connection_kwargs: dict, additional keyword arguments are passed + along to the + ``StrictRedis.from_url()`` method or ``StrictRedis()`` constructor + directly, including parameters like ``ssl``, ``ssl_certfile``, + ``charset``, etc. + + .. versionadded:: 1.1.6 Added ``connection_kwargs`` parameter. """ @@ -109,12 +106,12 @@ class RedisBackend(CacheBackend): self.password = arguments.pop("password", None) self.port = arguments.pop("port", 6379) self.db = arguments.pop("db", 0) - self.distributed_lock = arguments.get("distributed_lock", False) + self.distributed_lock = arguments.pop("distributed_lock", False) self.socket_timeout = arguments.pop("socket_timeout", None) - - self.lock_timeout = arguments.get("lock_timeout", None) - self.lock_sleep = arguments.get("lock_sleep", 0.1) - self.thread_local_lock = arguments.get("thread_local_lock", True) + self.lock_timeout = arguments.pop("lock_timeout", None) + self.lock_sleep = arguments.pop("lock_sleep", 0.1) + self.thread_local_lock = arguments.pop("thread_local_lock", True) + self.connection_kwargs = arguments.pop("connection_kwargs", {}) if self.distributed_lock and self.thread_local_lock: warnings.warn( @@ -123,8 +120,8 @@ class RedisBackend(CacheBackend): ) self.redis_expiration_time = arguments.pop("redis_expiration_time", 0) - self.connection_pool = arguments.get("connection_pool", None) - self.client = self._create_client() + self.connection_pool = arguments.pop("connection_pool", None) + self._create_client() def _imports(self): # defer imports until backend is used @@ -136,73 +133,207 @@ class RedisBackend(CacheBackend): # the connection pool already has all other connection # options present within, so here we disregard socket_timeout # and others. - return redis.StrictRedis(connection_pool=self.connection_pool) - - args = {} - if self.socket_timeout: - args["socket_timeout"] = self.socket_timeout - - if self.url is not None: - args.update(url=self.url) - return redis.StrictRedis.from_url(**args) - else: - args.update( - host=self.host, - password=self.password, - port=self.port, - db=self.db, + self.writer_client = redis.StrictRedis( + connection_pool=self.connection_pool ) - return redis.StrictRedis(**args) + self.reader_client = self.writer_client + else: + args = {} + args.update(self.connection_kwargs) + if self.socket_timeout: + args["socket_timeout"] = self.socket_timeout + + if self.url is not None: + args.update(url=self.url) + self.writer_client = redis.StrictRedis.from_url(**args) + self.reader_client = self.writer_client + else: + args.update( + host=self.host, + password=self.password, + port=self.port, + db=self.db, + ) + self.writer_client = redis.StrictRedis(**args) + self.reader_client = self.writer_client def get_mutex(self, key): if self.distributed_lock: - return self.client.lock( - u("_lock{0}").format(key), - timeout=self.lock_timeout, - sleep=self.lock_sleep, - thread_local=self.thread_local_lock, + return _RedisLockWrapper( + self.writer_client.lock( + "_lock{0}".format(key), + timeout=self.lock_timeout, + sleep=self.lock_sleep, + thread_local=self.thread_local_lock, + ) ) else: return None - def get(self, key): - value = self.client.get(key) + def get_serialized(self, key): + value = self.reader_client.get(key) if value is None: return NO_VALUE - return pickle.loads(value) + return value - def get_multi(self, keys): + def get_serialized_multi(self, keys): if not keys: return [] - values = self.client.mget(keys) - return [pickle.loads(v) if v is not None else NO_VALUE for v in values] + values = self.reader_client.mget(keys) + return [v if v is not None else NO_VALUE for v in values] - def set(self, key, value): + def set_serialized(self, key, value): if self.redis_expiration_time: - self.client.setex( - key, - self.redis_expiration_time, - pickle.dumps(value, pickle.HIGHEST_PROTOCOL), - ) + self.writer_client.setex(key, self.redis_expiration_time, value) else: - self.client.set(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) - - def set_multi(self, mapping): - mapping = dict( - (k, pickle.dumps(v, pickle.HIGHEST_PROTOCOL)) - for k, v in mapping.items() - ) + self.writer_client.set(key, value) + def set_serialized_multi(self, mapping): if not self.redis_expiration_time: - self.client.mset(mapping) + self.writer_client.mset(mapping) else: - pipe = self.client.pipeline() + pipe = self.writer_client.pipeline() for key, value in mapping.items(): pipe.setex(key, self.redis_expiration_time, value) pipe.execute() def delete(self, key): - self.client.delete(key) + self.writer_client.delete(key) def delete_multi(self, keys): - self.client.delete(*keys) + self.writer_client.delete(*keys) + + +class _RedisLockWrapper: + __slots__ = ("mutex", "__weakref__") + + def __init__(self, mutex: typing.Any): + self.mutex = mutex + + def acquire(self, wait: bool = True) -> typing.Any: + return self.mutex.acquire(blocking=wait) + + def release(self) -> typing.Any: + return self.mutex.release() + + def locked(self) -> bool: + return self.mutex.locked() # type: ignore + + +class RedisSentinelBackend(RedisBackend): + """A `Redis `_ backend, using the + `redis-py `_ backend. + It will use the Sentinel of a Redis cluster. + + .. versionadded:: 1.0.0 + + Example configuration:: + + from dogpile.cache import make_region + + region = make_region().configure( + 'dogpile.cache.redis_sentinel', + arguments = { + 'sentinels': [ + ['redis_sentinel_1', 26379], + ['redis_sentinel_2', 26379] + ], + 'db': 0, + 'redis_expiration_time': 60*60*2, # 2 hours + 'distributed_lock': True, + 'thread_local_lock': False + } + ) + + + Arguments accepted in the arguments dictionary: + + :param db: integer, default is ``0``. + + :param redis_expiration_time: integer, number of seconds after setting + a value that Redis should expire it. This should be larger than dogpile's + cache expiration. By default no expiration is set. + + :param distributed_lock: boolean, when True, will use a + redis-lock as the dogpile lock. Use this when multiple processes will be + talking to the same redis instance. When False, dogpile will + coordinate on a regular threading mutex, Default is True. + + :param lock_timeout: integer, number of seconds after acquiring a lock that + Redis should expire it. This argument is only valid when + ``distributed_lock`` is ``True``. + + :param socket_timeout: float, seconds for socket timeout. + Default is None (no timeout). + + :param sentinels: is a list of sentinel nodes. Each node is represented by + a pair (hostname, port). + Default is None (not in sentinel mode). + + :param service_name: str, the service name. + Default is 'mymaster'. + + :param sentinel_kwargs: is a dictionary of connection arguments used when + connecting to sentinel instances. Any argument that can be passed to + a normal Redis connection can be specified here. + Default is {}. + + :param connection_kwargs: dict, additional keyword arguments are passed + along to the + ``StrictRedis.from_url()`` method or ``StrictRedis()`` constructor + directly, including parameters like ``ssl``, ``ssl_certfile``, + ``charset``, etc. + + :param lock_sleep: integer, number of seconds to sleep when failed to + acquire a lock. This argument is only valid when + ``distributed_lock`` is ``True``. + + :param thread_local_lock: bool, whether a thread-local Redis lock object + should be used. This is the default, but is not compatible with + asynchronous runners, as they run in a different thread than the one + used to create the lock. + + """ + + def __init__(self, arguments): + arguments = arguments.copy() + + self.sentinels = arguments.pop("sentinels", None) + self.service_name = arguments.pop("service_name", "mymaster") + self.sentinel_kwargs = arguments.pop("sentinel_kwargs", {}) + + super().__init__( + arguments={ + "distributed_lock": True, + "thread_local_lock": False, + **arguments, + } + ) + + def _imports(self): + # defer imports until backend is used + global redis + import redis.sentinel # noqa + + def _create_client(self): + sentinel_kwargs = {} + sentinel_kwargs.update(self.sentinel_kwargs) + sentinel_kwargs.setdefault("password", self.password) + + connection_kwargs = {} + connection_kwargs.update(self.connection_kwargs) + connection_kwargs.setdefault("password", self.password) + + if self.db is not None: + connection_kwargs.setdefault("db", self.db) + sentinel_kwargs.setdefault("db", self.db) + if self.socket_timeout is not None: + connection_kwargs.setdefault("socket_timeout", self.socket_timeout) + + sentinel = redis.sentinel.Sentinel( + self.sentinels, + sentinel_kwargs=sentinel_kwargs, + **connection_kwargs, + ) + self.writer_client = sentinel.master_for(self.service_name) + self.reader_client = sentinel.slave_for(self.service_name) diff --git a/libs/common/dogpile/cache/proxy.py b/libs/common/dogpile/cache/proxy.py index 0eb8f8b3..bf6e296b 100644 --- a/libs/common/dogpile/cache/proxy.py +++ b/libs/common/dogpile/cache/proxy.py @@ -10,7 +10,16 @@ base backend. """ +from typing import Mapping +from typing import Optional +from typing import Sequence + +from .api import BackendFormatted +from .api import BackendSetType from .api import CacheBackend +from .api import CacheMutex +from .api import KeyType +from .api import SerializedReturnType class ProxyBackend(CacheBackend): @@ -55,11 +64,11 @@ class ProxyBackend(CacheBackend): """ - def __init__(self, *args, **kwargs): - self.proxied = None + def __init__(self, *arg, **kw): + pass - def wrap(self, backend): - """ Take a backend as an argument and setup the self.proxied property. + def wrap(self, backend: CacheBackend) -> "ProxyBackend": + """Take a backend as an argument and setup the self.proxied property. Return an object that be used as a backend by a :class:`.CacheRegion` object. """ @@ -73,23 +82,37 @@ class ProxyBackend(CacheBackend): # Delegate any functions that are not already overridden to # the proxies backend # - def get(self, key): + def get(self, key: KeyType) -> BackendFormatted: return self.proxied.get(key) - def set(self, key, value): + def set(self, key: KeyType, value: BackendSetType) -> None: self.proxied.set(key, value) - def delete(self, key): + def delete(self, key: KeyType) -> None: self.proxied.delete(key) - def get_multi(self, keys): + def get_multi(self, keys: Sequence[KeyType]) -> Sequence[BackendFormatted]: return self.proxied.get_multi(keys) - def set_multi(self, mapping): + def set_multi(self, mapping: Mapping[KeyType, BackendSetType]) -> None: self.proxied.set_multi(mapping) - def delete_multi(self, keys): + def delete_multi(self, keys: Sequence[KeyType]) -> None: self.proxied.delete_multi(keys) - def get_mutex(self, key): + def get_mutex(self, key: KeyType) -> Optional[CacheMutex]: return self.proxied.get_mutex(key) + + def get_serialized(self, key: KeyType) -> SerializedReturnType: + return self.proxied.get_serialized(key) + + def get_serialized_multi( + self, keys: Sequence[KeyType] + ) -> Sequence[SerializedReturnType]: + return self.proxied.get_serialized_multi(keys) + + def set_serialized(self, key: KeyType, value: bytes) -> None: + self.proxied.set_serialized(key, value) + + def set_serialized_multi(self, mapping: Mapping[KeyType, bytes]) -> None: + self.proxied.set_serialized_multi(mapping) diff --git a/libs/common/dogpile/cache/region.py b/libs/common/dogpile/cache/region.py index 7b4c103d..ef0dbc49 100644 --- a/libs/common/dogpile/cache/region.py +++ b/libs/common/dogpile/cache/region.py @@ -4,16 +4,35 @@ import contextlib import datetime from functools import partial from functools import wraps +import json import logging from numbers import Number import threading import time +from typing import Any +from typing import Callable +from typing import cast +from typing import Mapping +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Type +from typing import Union from decorator import decorate from . import exception +from .api import BackendArguments +from .api import BackendFormatted from .api import CachedValue +from .api import CacheMutex +from .api import CacheReturnType +from .api import KeyType +from .api import MetaDataType from .api import NO_VALUE +from .api import SerializedReturnType +from .api import Serializer +from .api import ValuePayload from .backends import _backend_loader from .backends import register_backend # noqa from .proxy import ProxyBackend @@ -23,12 +42,11 @@ from .util import repr_obj from .. import Lock from .. import NeedRegenerationException from ..util import coerce_string_conf -from ..util import compat from ..util import memoized_property from ..util import NameRegistry from ..util import PluginLoader -value_version = 1 +value_version = 2 """An integer placed in the :class:`.CachedValue` so that new versions of dogpile.cache can detect cached values from a previous, backwards-incompatible version. @@ -38,7 +56,20 @@ values from a previous, backwards-incompatible version. log = logging.getLogger(__name__) -class RegionInvalidationStrategy(object): +AsyncCreator = Callable[ + ["CacheRegion", KeyType, Callable[[], ValuePayload], CacheMutex], None +] + +ExpirationTimeCallable = Callable[[], float] + +ToStr = Callable[[Any], str] + +FunctionKeyGenerator = Callable[..., Callable[..., KeyType]] + +FunctionMultiKeyGenerator = Callable[..., Callable[..., Sequence[KeyType]]] + + +class RegionInvalidationStrategy: """Region invalidation strategy interface Implement this interface and pass implementation instance @@ -110,7 +141,7 @@ class RegionInvalidationStrategy(object): """ - def invalidate(self, hard=True): + def invalidate(self, hard: bool = True) -> None: """Region invalidation. :class:`.CacheRegion` propagated call. @@ -122,7 +153,7 @@ class RegionInvalidationStrategy(object): raise NotImplementedError() - def is_hard_invalidated(self, timestamp): + def is_hard_invalidated(self, timestamp: float) -> bool: """Check timestamp to determine if it was hard invalidated. :return: Boolean. True if ``timestamp`` is older than @@ -133,7 +164,7 @@ class RegionInvalidationStrategy(object): raise NotImplementedError() - def is_soft_invalidated(self, timestamp): + def is_soft_invalidated(self, timestamp: float) -> bool: """Check timestamp to determine if it was soft invalidated. :return: Boolean. True if ``timestamp`` is older than @@ -144,7 +175,7 @@ class RegionInvalidationStrategy(object): raise NotImplementedError() - def is_invalidated(self, timestamp): + def is_invalidated(self, timestamp: float) -> bool: """Check timestamp to determine if it was invalidated. :return: Boolean. True if ``timestamp`` is older than @@ -154,7 +185,7 @@ class RegionInvalidationStrategy(object): raise NotImplementedError() - def was_soft_invalidated(self): + def was_soft_invalidated(self) -> bool: """Indicate the region was invalidated in soft mode. :return: Boolean. True if region was invalidated in soft mode. @@ -163,7 +194,7 @@ class RegionInvalidationStrategy(object): raise NotImplementedError() - def was_hard_invalidated(self): + def was_hard_invalidated(self) -> bool: """Indicate the region was invalidated in hard mode. :return: Boolean. True if region was invalidated in hard mode. @@ -178,27 +209,27 @@ class DefaultInvalidationStrategy(RegionInvalidationStrategy): self._is_hard_invalidated = None self._invalidated = None - def invalidate(self, hard=True): + def invalidate(self, hard: bool = True) -> None: self._is_hard_invalidated = bool(hard) self._invalidated = time.time() - def is_invalidated(self, timestamp): + def is_invalidated(self, timestamp: float) -> bool: return self._invalidated is not None and timestamp < self._invalidated - def was_hard_invalidated(self): + def was_hard_invalidated(self) -> bool: return self._is_hard_invalidated is True - def is_hard_invalidated(self, timestamp): + def is_hard_invalidated(self, timestamp: float) -> bool: return self.was_hard_invalidated() and self.is_invalidated(timestamp) - def was_soft_invalidated(self): + def was_soft_invalidated(self) -> bool: return self._is_hard_invalidated is False - def is_soft_invalidated(self, timestamp): + def is_soft_invalidated(self, timestamp: float) -> bool: return self.was_soft_invalidated() and self.is_invalidated(timestamp) -class CacheRegion(object): +class CacheRegion: r"""A front end to a particular cache backend. :param name: Optional, a string name for the region. @@ -284,6 +315,21 @@ class CacheRegion(object): to convert non-string or Unicode keys to bytestrings, which is needed when using a backend such as bsddb or dbm under Python 2.x in conjunction with Unicode keys. + + :param serializer: function which will be applied to all values before + passing to the backend. Defaults to ``None``, in which case the + serializer recommended by the backend will be used. Typical + serializers include ``pickle.dumps`` and ``json.dumps``. + + .. versionadded:: 1.1.0 + + :param deserializer: function which will be applied to all values returned + by the backend. Defaults to ``None``, in which case the + deserializer recommended by the backend will be used. Typical + deserializers include ``pickle.dumps`` and ``json.dumps``. + + .. versionadded:: 1.1.0 + :param async_creation_runner: A callable that, when specified, will be passed to and called by dogpile.lock when there is a stale value present in the cache. It will be passed the @@ -339,31 +385,37 @@ class CacheRegion(object): def __init__( self, - name=None, - function_key_generator=function_key_generator, - function_multi_key_generator=function_multi_key_generator, - key_mangler=None, - async_creation_runner=None, + name: Optional[str] = None, + function_key_generator: FunctionKeyGenerator = function_key_generator, + function_multi_key_generator: FunctionMultiKeyGenerator = function_multi_key_generator, # noqa E501 + key_mangler: Optional[Callable[[KeyType], KeyType]] = None, + serializer: Optional[Callable[[ValuePayload], bytes]] = None, + deserializer: Optional[Callable[[bytes], ValuePayload]] = None, + async_creation_runner: Optional[AsyncCreator] = None, ): """Construct a new :class:`.CacheRegion`.""" self.name = name self.function_key_generator = function_key_generator self.function_multi_key_generator = function_multi_key_generator self.key_mangler = self._user_defined_key_mangler = key_mangler + self.serializer = self._user_defined_serializer = serializer + self.deserializer = self._user_defined_deserializer = deserializer self.async_creation_runner = async_creation_runner - self.region_invalidator = DefaultInvalidationStrategy() + self.region_invalidator: RegionInvalidationStrategy = ( + DefaultInvalidationStrategy() + ) def configure( self, - backend, - expiration_time=None, - arguments=None, - _config_argument_dict=None, - _config_prefix=None, - wrap=None, - replace_existing_backend=False, - region_invalidator=None, - ): + backend: str, + expiration_time: Optional[Union[float, datetime.timedelta]] = None, + arguments: Optional[BackendArguments] = None, + _config_argument_dict: Optional[Mapping[str, Any]] = None, + _config_prefix: Optional[str] = None, + wrap: Sequence[Union[ProxyBackend, Type[ProxyBackend]]] = (), + replace_existing_backend: bool = False, + region_invalidator: Optional[RegionInvalidationStrategy] = None, + ) -> "CacheRegion": """Configure a :class:`.CacheRegion`. The :class:`.CacheRegion` itself @@ -414,7 +466,7 @@ class CacheRegion(object): .. versionadded:: 0.6.2 - """ + """ if "backend" in self.__dict__ and not replace_existing_backend: raise exception.RegionAlreadyConfigured( @@ -438,12 +490,12 @@ class CacheRegion(object): else: self.backend = backend_cls(arguments or {}) + self.expiration_time: Union[float, None] + if not expiration_time or isinstance(expiration_time, Number): - self.expiration_time = expiration_time + self.expiration_time = cast(Union[None, float], expiration_time) elif isinstance(expiration_time, datetime.timedelta): - self.expiration_time = int( - compat.timedelta_total_seconds(expiration_time) - ) + self.expiration_time = int(expiration_time.total_seconds()) else: raise exception.ValidationError( "expiration_time is not a number or timedelta." @@ -452,6 +504,12 @@ class CacheRegion(object): if not self._user_defined_key_mangler: self.key_mangler = self.backend.key_mangler + if not self._user_defined_serializer: + self.serializer = self.backend.serializer + + if not self._user_defined_deserializer: + self.deserializer = self.backend.deserializer + self._lock_registry = NameRegistry(self._create_mutex) if getattr(wrap, "__iter__", False): @@ -463,26 +521,28 @@ class CacheRegion(object): return self - def wrap(self, proxy): - """ Takes a ProxyBackend instance or class and wraps the - attached backend. """ + def wrap(self, proxy: Union[ProxyBackend, Type[ProxyBackend]]) -> None: + """Takes a ProxyBackend instance or class and wraps the + attached backend.""" # if we were passed a type rather than an instance then # initialize it. - if type(proxy) == type: - proxy = proxy() + if isinstance(proxy, type): + proxy_instance = proxy() + else: + proxy_instance = proxy - if not issubclass(type(proxy), ProxyBackend): + if not isinstance(proxy_instance, ProxyBackend): raise TypeError( - "Type %s is not a valid ProxyBackend" % type(proxy) + "%r is not a valid ProxyBackend" % (proxy_instance,) ) - self.backend = proxy.wrap(self.backend) + self.backend = proxy_instance.wrap(self.backend) def _mutex(self, key): return self._lock_registry.get(key) - class _LockWrapper(object): + class _LockWrapper(CacheMutex): """weakref-capable wrapper for threading.Lock""" def __init__(self): @@ -494,6 +554,9 @@ class CacheRegion(object): def release(self): self.lock.release() + def locked(self): + return self.lock.locked() + def _create_mutex(self, key): mutex = self.backend.get_mutex(key) if mutex is not None: @@ -699,7 +762,7 @@ class CacheRegion(object): if self.key_mangler: key = self.key_mangler(key) - value = self.backend.get(key) + value = self._get_from_backend(key) value = self._unexpired_value_fn(expiration_time, ignore_expiration)( value ) @@ -767,10 +830,10 @@ class CacheRegion(object): if not keys: return [] - if self.key_mangler: - keys = list(map(lambda key: self.key_mangler(key), keys)) + if self.key_mangler is not None: + keys = [self.key_mangler(key) for key in keys] - backend_values = self.backend.get_multi(keys) + backend_values = self._get_multi_from_backend(keys) _unexpired_value_fn = self._unexpired_value_fn( expiration_time, ignore_expiration @@ -805,14 +868,26 @@ class CacheRegion(object): return True + def key_is_locked(self, key: KeyType) -> bool: + """Return True if a particular cache key is currently being generated + within the dogpile lock. + + .. versionadded:: 1.1.2 + + """ + mutex = self._mutex(key) + locked: bool = mutex.locked() + return locked + def get_or_create( self, - key, - creator, - expiration_time=None, - should_cache_fn=None, - creator_args=None, - ): + key: KeyType, + creator: Callable[..., ValuePayload], + expiration_time: Optional[float] = None, + should_cache_fn: Optional[Callable[[ValuePayload], bool]] = None, + creator_args: Optional[Tuple[Any, Mapping[str, Any]]] = None, + ) -> ValuePayload: + """Return a cached value based on the given key. If the value does not exist or is considered to be expired @@ -899,12 +974,17 @@ class CacheRegion(object): key = self.key_mangler(key) def get_value(): - value = self.backend.get(key) + value = self._get_from_backend(key) if self._is_cache_miss(value, orig_key): raise NeedRegenerationException() - ct = value.metadata["ct"] + ct = cast(CachedValue, value).metadata["ct"] if self.region_invalidator.is_soft_invalidated(ct): + if expiration_time is None: + raise exception.DogpileCacheException( + "Non-None expiration time required " + "for soft invalidation" + ) ct = time.time() - expiration_time - 0.0001 return value.payload, ct @@ -919,37 +999,42 @@ class CacheRegion(object): created_value = creator() value = self._value(created_value) + if ( + expiration_time is None + and self.region_invalidator.was_soft_invalidated() + ): + raise exception.DogpileCacheException( + "Non-None expiration time required " + "for soft invalidation" + ) + if not should_cache_fn or should_cache_fn(created_value): - self.backend.set(key, value) + self._set_cached_value_to_backend(key, value) return value.payload, value.metadata["ct"] if expiration_time is None: expiration_time = self.expiration_time - if ( - expiration_time is None - and self.region_invalidator.was_soft_invalidated() - ): - raise exception.DogpileCacheException( - "Non-None expiration time required " "for soft invalidation" - ) - if expiration_time == -1: expiration_time = None + async_creator: Optional[Callable[[CacheMutex], AsyncCreator]] if self.async_creation_runner: + acr = self.async_creation_runner def async_creator(mutex): if creator_args: + ca = creator_args + @wraps(creator) def go(): - return creator(*creator_args[0], **creator_args[1]) + return creator(*ca[0], **ca[1]) else: go = creator - return self.async_creation_runner(self, orig_key, go, mutex) + return acr(self, orig_key, go, mutex) else: async_creator = None @@ -964,8 +1049,12 @@ class CacheRegion(object): return value def get_or_create_multi( - self, keys, creator, expiration_time=None, should_cache_fn=None - ): + self, + keys: Sequence[KeyType], + creator: Callable[[], ValuePayload], + expiration_time: Optional[float] = None, + should_cache_fn: Optional[Callable[[ValuePayload], bool]] = None, + ) -> Sequence[ValuePayload]: """Return a sequence of cached values based on a sequence of keys. The behavior for generation of values based on keys corresponds @@ -1020,34 +1109,29 @@ class CacheRegion(object): # _has_value() will return False. return value.payload, 0 else: - ct = value.metadata["ct"] + ct = cast(CachedValue, value).metadata["ct"] if self.region_invalidator.is_soft_invalidated(ct): + if expiration_time is None: + raise exception.DogpileCacheException( + "Non-None expiration time required " + "for soft invalidation" + ) ct = time.time() - expiration_time - 0.0001 return value.payload, ct - def gen_value(): + def gen_value() -> ValuePayload: raise NotImplementedError() - def async_creator(key, mutex): + def async_creator(mutexes, key, mutex): mutexes[key] = mutex if expiration_time is None: expiration_time = self.expiration_time - if ( - expiration_time is None - and self.region_invalidator.was_soft_invalidated() - ): - raise exception.DogpileCacheException( - "Non-None expiration time required " "for soft invalidation" - ) - if expiration_time == -1: expiration_time = None - mutexes = {} - sorted_unique_keys = sorted(set(keys)) if self.key_mangler: @@ -1057,7 +1141,11 @@ class CacheRegion(object): orig_to_mangled = dict(zip(sorted_unique_keys, mangled_keys)) - values = dict(zip(mangled_keys, self.backend.get_multi(mangled_keys))) + values = dict( + zip(mangled_keys, self._get_multi_from_backend(mangled_keys)) + ) + + mutexes: Mapping[KeyType, Any] = {} for orig_key, mangled_key in orig_to_mangled.items(): with Lock( @@ -1065,7 +1153,9 @@ class CacheRegion(object): gen_value, lambda: get_value(mangled_key), expiration_time, - async_creator=lambda mutex: async_creator(orig_key, mutex), + async_creator=lambda mutex: async_creator( + mutexes, orig_key, mutex + ), ): pass try: @@ -1077,22 +1167,31 @@ class CacheRegion(object): with self._log_time(keys_to_get): new_values = creator(*keys_to_get) - values_w_created = dict( - (orig_to_mangled[k], self._value(v)) + values_w_created = { + orig_to_mangled[k]: self._value(v) for k, v in zip(keys_to_get, new_values) - ) + } - if not should_cache_fn: - self.backend.set_multi(values_w_created) - else: - values_to_cache = dict( - (k, v) - for k, v in values_w_created.items() - if should_cache_fn(v[0]) + if ( + expiration_time is None + and self.region_invalidator.was_soft_invalidated() + ): + raise exception.DogpileCacheException( + "Non-None expiration time required " + "for soft invalidation" ) - if values_to_cache: - self.backend.set_multi(values_to_cache) + if not should_cache_fn: + self._set_multi_cached_value_to_backend(values_w_created) + + else: + self._set_multi_cached_value_to_backend( + { + k: v + for k, v in values_w_created.items() + if should_cache_fn(v.payload) + } + ) values.update(values_w_created) return [values[orig_to_mangled[k]].payload for k in keys] @@ -1100,36 +1199,162 @@ class CacheRegion(object): for mutex in mutexes.values(): mutex.release() - def _value(self, value): + def _value( + self, value: Any, metadata: Optional[MetaDataType] = None + ) -> CachedValue: """Return a :class:`.CachedValue` given a value.""" - return CachedValue(value, {"ct": time.time(), "v": value_version}) - def set(self, key, value): + if metadata is None: + metadata = self._gen_metadata() + return CachedValue(value, metadata) + + def _parse_serialized_from_backend( + self, value: SerializedReturnType + ) -> CacheReturnType: + if value in (None, NO_VALUE): + return NO_VALUE + + assert self.deserializer + byte_value = cast(bytes, value) + + bytes_metadata, _, bytes_payload = byte_value.partition(b"|") + metadata = json.loads(bytes_metadata) + payload = self.deserializer(bytes_payload) + return CachedValue(payload, metadata) + + def _serialize_cached_value_elements( + self, payload: ValuePayload, metadata: MetaDataType + ) -> bytes: + serializer = cast(Serializer, self.serializer) + + return b"%b|%b" % ( + json.dumps(metadata).encode("ascii"), + serializer(payload), + ) + + def _serialized_payload( + self, payload: ValuePayload, metadata: Optional[MetaDataType] = None + ) -> BackendFormatted: + """Return a backend formatted representation of a value. + + If a serializer is in use then this will return a string representation + with the value formatted by the serializer. + + """ + if metadata is None: + metadata = self._gen_metadata() + + return self._serialize_cached_value_elements(payload, metadata) + + def _serialized_cached_value(self, value: CachedValue) -> BackendFormatted: + """Return a backend formatted representation of a :class:`.CachedValue`. + + If a serializer is in use then this will return a string representation + with the value formatted by the serializer. + + """ + + assert self.serializer + return self._serialize_cached_value_elements( + value.payload, value.metadata + ) + + def _get_from_backend(self, key: KeyType) -> CacheReturnType: + if self.deserializer: + return self._parse_serialized_from_backend( + self.backend.get_serialized(key) + ) + else: + return cast(CacheReturnType, self.backend.get(key)) + + def _get_multi_from_backend( + self, keys: Sequence[KeyType] + ) -> Sequence[CacheReturnType]: + if self.deserializer: + return [ + self._parse_serialized_from_backend(v) + for v in self.backend.get_serialized_multi(keys) + ] + else: + return cast( + Sequence[CacheReturnType], self.backend.get_multi(keys) + ) + + def _set_cached_value_to_backend( + self, key: KeyType, value: CachedValue + ) -> None: + if self.serializer: + self.backend.set_serialized( + key, self._serialized_cached_value(value) + ) + else: + self.backend.set(key, value) + + def _set_multi_cached_value_to_backend( + self, mapping: Mapping[KeyType, CachedValue] + ) -> None: + if not mapping: + return + + if self.serializer: + self.backend.set_serialized_multi( + { + k: self._serialized_cached_value(v) + for k, v in mapping.items() + } + ) + else: + self.backend.set_multi(mapping) + + def _gen_metadata(self) -> MetaDataType: + return {"ct": time.time(), "v": value_version} + + def set(self, key: KeyType, value: ValuePayload) -> None: """Place a new value in the cache under the given key.""" if self.key_mangler: key = self.key_mangler(key) - self.backend.set(key, self._value(value)) - def set_multi(self, mapping): - """Place new values in the cache under the given keys. + if self.serializer: + self.backend.set_serialized(key, self._serialized_payload(value)) + else: + self.backend.set(key, self._value(value)) - .. versionadded:: 0.5.0 - - """ + def set_multi(self, mapping: Mapping[KeyType, ValuePayload]) -> None: + """Place new values in the cache under the given keys.""" if not mapping: return - if self.key_mangler: - mapping = dict( - (self.key_mangler(k), self._value(v)) - for k, v in mapping.items() - ) - else: - mapping = dict((k, self._value(v)) for k, v in mapping.items()) - self.backend.set_multi(mapping) + metadata = self._gen_metadata() - def delete(self, key): + if self.serializer: + if self.key_mangler: + mapping = { + self.key_mangler(k): self._serialized_payload( + v, metadata=metadata + ) + for k, v in mapping.items() + } + else: + mapping = { + k: self._serialized_payload(v, metadata=metadata) + for k, v in mapping.items() + } + self.backend.set_serialized_multi(mapping) + else: + if self.key_mangler: + mapping = { + self.key_mangler(k): self._value(v, metadata=metadata) + for k, v in mapping.items() + } + else: + mapping = { + k: self._value(v, metadata=metadata) + for k, v in mapping.items() + } + self.backend.set_multi(mapping) + + def delete(self, key: KeyType) -> None: """Remove a value from the cache. This operation is idempotent (can be called multiple times, or on a @@ -1141,7 +1366,7 @@ class CacheRegion(object): self.backend.delete(key) - def delete_multi(self, keys): + def delete_multi(self, keys: Sequence[KeyType]) -> None: """Remove multiple values from the cache. This operation is idempotent (can be called multiple times, or on a @@ -1152,18 +1377,19 @@ class CacheRegion(object): """ if self.key_mangler: - keys = list(map(lambda key: self.key_mangler(key), keys)) + km = self.key_mangler + keys = [km(key) for key in keys] self.backend.delete_multi(keys) def cache_on_arguments( self, - namespace=None, - expiration_time=None, - should_cache_fn=None, - to_str=compat.string_type, - function_key_generator=None, - ): + namespace: Optional[str] = None, + expiration_time: Union[float, ExpirationTimeCallable, None] = None, + should_cache_fn: Optional[Callable[[ValuePayload], bool]] = None, + to_str: Callable[[Any], str] = str, + function_key_generator: Optional[FunctionKeyGenerator] = None, + ) -> Callable[[Callable[..., ValuePayload]], Callable[..., ValuePayload]]: """A function decorator that will cache the return value of the function using a key derived from the function itself and its arguments. @@ -1256,7 +1482,7 @@ class CacheRegion(object): (with caveats) for use with instance or class methods. Given the example:: - class MyClass(object): + class MyClass: @region.cache_on_arguments(namespace="foo") def one(self, a, b): return a + b @@ -1270,12 +1496,12 @@ class CacheRegion(object): name within the same module, as can occur when decorating instance or class methods as below:: - class MyClass(object): + class MyClass: @region.cache_on_arguments(namespace='MC') def somemethod(self, x, y): "" - class MyOtherClass(object): + class MyOtherClass: @region.cache_on_arguments(namespace='MOC') def somemethod(self, x, y): "" @@ -1313,14 +1539,8 @@ class CacheRegion(object): end of the day, week or time period" and "cache until a certain date or time passes". - .. versionchanged:: 0.5.0 - ``expiration_time`` may be passed as a callable to - :meth:`.CacheRegion.cache_on_arguments`. - :param should_cache_fn: passed to :meth:`.CacheRegion.get_or_create`. - .. versionadded:: 0.4.3 - :param to_str: callable, will be called on each function argument in order to convert to a string. Defaults to ``str()``. If the function accepts non-ascii unicode arguments on Python 2.x, the @@ -1328,14 +1548,10 @@ class CacheRegion(object): produce unicode cache keys which may require key mangling before reaching the cache. - .. versionadded:: 0.5.0 - :param function_key_generator: a function that will produce a "cache key". This function will supersede the one configured on the :class:`.CacheRegion` itself. - .. versionadded:: 0.5.5 - .. seealso:: :meth:`.CacheRegion.cache_multi_on_arguments` @@ -1343,30 +1559,34 @@ class CacheRegion(object): :meth:`.CacheRegion.get_or_create` """ - expiration_time_is_callable = compat.callable(expiration_time) + expiration_time_is_callable = callable(expiration_time) if function_key_generator is None: - function_key_generator = self.function_key_generator + _function_key_generator = self.function_key_generator + else: + _function_key_generator = function_key_generator def get_or_create_for_user_func(key_generator, user_func, *arg, **kw): key = key_generator(*arg, **kw) - timeout = ( - expiration_time() + timeout: Optional[float] = ( + cast(ExpirationTimeCallable, expiration_time)() if expiration_time_is_callable - else expiration_time + else cast(Optional[float], expiration_time) ) return self.get_or_create( key, user_func, timeout, should_cache_fn, (arg, kw) ) def cache_decorator(user_func): - if to_str is compat.string_type: + if to_str is cast(Callable[[Any], str], str): # backwards compatible - key_generator = function_key_generator(namespace, user_func) + key_generator = _function_key_generator( + namespace, user_func + ) # type: ignore else: - key_generator = function_key_generator( - namespace, user_func, to_str=to_str + key_generator = _function_key_generator( + namespace, user_func, to_str ) def refresh(*arg, **kw): @@ -1406,13 +1626,20 @@ class CacheRegion(object): def cache_multi_on_arguments( self, - namespace=None, - expiration_time=None, - should_cache_fn=None, - asdict=False, - to_str=compat.string_type, - function_multi_key_generator=None, - ): + namespace: Optional[str] = None, + expiration_time: Union[float, ExpirationTimeCallable, None] = None, + should_cache_fn: Optional[Callable[[ValuePayload], bool]] = None, + asdict: bool = False, + to_str: ToStr = str, + function_multi_key_generator: Optional[ + FunctionMultiKeyGenerator + ] = None, + ) -> Callable[ + [Callable[..., Sequence[ValuePayload]]], + Callable[ + ..., Union[Sequence[ValuePayload], Mapping[KeyType, ValuePayload]] + ], + ]: """A function decorator that will cache multiple return values from the function using a sequence of keys derived from the function itself and the arguments passed to it. @@ -1529,12 +1756,19 @@ class CacheRegion(object): :meth:`.CacheRegion.get_or_create_multi` """ - expiration_time_is_callable = compat.callable(expiration_time) + expiration_time_is_callable = callable(expiration_time) if function_multi_key_generator is None: - function_multi_key_generator = self.function_multi_key_generator + _function_multi_key_generator = self.function_multi_key_generator + else: + _function_multi_key_generator = function_multi_key_generator - def get_or_create_for_user_func(key_generator, user_func, *arg, **kw): + def get_or_create_for_user_func( + key_generator: Callable[..., Sequence[KeyType]], + user_func: Callable[..., Sequence[ValuePayload]], + *arg: Any, + **kw: Any, + ) -> Union[Sequence[ValuePayload], Mapping[KeyType, ValuePayload]]: cache_keys = arg keys = key_generator(*arg, **kw) key_lookup = dict(zip(keys, cache_keys)) @@ -1543,12 +1777,16 @@ class CacheRegion(object): def creator(*keys_to_create): return user_func(*[key_lookup[k] for k in keys_to_create]) - timeout = ( - expiration_time() + timeout: Optional[float] = ( + cast(ExpirationTimeCallable, expiration_time)() if expiration_time_is_callable - else expiration_time + else cast(Optional[float], expiration_time) ) + result: Union[ + Sequence[ValuePayload], Mapping[KeyType, ValuePayload] + ] + if asdict: def dict_create(*keys): @@ -1581,7 +1819,7 @@ class CacheRegion(object): return result def cache_decorator(user_func): - key_generator = function_multi_key_generator( + key_generator = _function_multi_key_generator( namespace, user_func, to_str=to_str ) @@ -1627,7 +1865,7 @@ class CacheRegion(object): return cache_decorator -def make_region(*arg, **kw): +def make_region(*arg: Any, **kw: Any) -> CacheRegion: """Instantiate a new :class:`.CacheRegion`. Currently, :func:`.make_region` is a passthrough diff --git a/libs/common/dogpile/cache/util.py b/libs/common/dogpile/cache/util.py index 65ba5e65..6bf6cefe 100644 --- a/libs/common/dogpile/cache/util.py +++ b/libs/common/dogpile/cache/util.py @@ -4,7 +4,7 @@ from ..util import compat from ..util import langhelpers -def function_key_generator(namespace, fn, to_str=compat.string_type): +def function_key_generator(namespace, fn, to_str=str): """Return a function that generates a string key, based on a given function as well as arguments to the returned function itself. @@ -45,7 +45,7 @@ def function_key_generator(namespace, fn, to_str=compat.string_type): return generate_key -def function_multi_key_generator(namespace, fn, to_str=compat.string_type): +def function_multi_key_generator(namespace, fn, to_str=str): if namespace is None: namespace = "%s:%s" % (fn.__module__, fn.__name__) @@ -68,7 +68,7 @@ def function_multi_key_generator(namespace, fn, to_str=compat.string_type): return generate_keys -def kwarg_function_key_generator(namespace, fn, to_str=compat.string_type): +def kwarg_function_key_generator(namespace, fn, to_str=str): """Return a function that generates a string key, based on a given function as well as arguments to the returned function itself. @@ -131,7 +131,7 @@ def kwarg_function_key_generator(namespace, fn, to_str=compat.string_type): def sha1_mangle_key(key): """a SHA1 key mangler.""" - if isinstance(key, compat.text_type): + if isinstance(key, str): key = key.encode("utf-8") return sha1(key).hexdigest() @@ -162,7 +162,7 @@ PluginLoader = langhelpers.PluginLoader to_list = langhelpers.to_list -class repr_obj(object): +class repr_obj: __slots__ = ("value", "max_chars") diff --git a/libs/common/dogpile/lock.py b/libs/common/dogpile/lock.py index 68e97b06..465cd90d 100644 --- a/libs/common/dogpile/lock.py +++ b/libs/common/dogpile/lock.py @@ -15,7 +15,7 @@ class NeedRegenerationException(Exception): NOT_REGENERATED = object() -class Lock(object): +class Lock: """Dogpile lock class. Provides an interface around an arbitrary mutex diff --git a/libs/common/dogpile/util/compat.py b/libs/common/dogpile/util/compat.py index 464d97e1..85e4e85f 100644 --- a/libs/common/dogpile/util/compat.py +++ b/libs/common/dogpile/util/compat.py @@ -1,18 +1,6 @@ import collections import inspect -import sys -py2k = sys.version_info < (3, 0) -py3k = sys.version_info >= (3, 0) -py32 = sys.version_info >= (3, 2) -py27 = sys.version_info >= (2, 7) -jython = sys.platform.startswith("java") -win32 = sys.platform.startswith("win") - -try: - import threading -except ImportError: - import dummy_threading as threading # noqa FullArgSpec = collections.namedtuple( "FullArgSpec", @@ -33,7 +21,17 @@ ArgSpec = collections.namedtuple( def inspect_getfullargspec(func): - """Fully vendored version of getfullargspec from Python 3.3.""" + """Fully vendored version of getfullargspec from Python 3.3. + + This version is more performant than the one which appeared in + later Python 3 versions. + + """ + + # if a Signature is already present, as is the case with newer + # "decorator" package, defer back to built in + if hasattr(func, "__signature__"): + return inspect.getfullargspec(func) if inspect.ismethod(func): func = func.__func__ @@ -46,7 +44,7 @@ def inspect_getfullargspec(func): nargs = co.co_argcount names = co.co_varnames - nkwargs = co.co_kwonlyargcount if py3k else 0 + nkwargs = co.co_kwonlyargcount args = list(names[:nargs]) kwonlyargs = list(names[nargs : nargs + nkwargs]) @@ -65,77 +63,10 @@ def inspect_getfullargspec(func): varkw, func.__defaults__, kwonlyargs, - func.__kwdefaults__ if py3k else None, - func.__annotations__ if py3k else {}, + func.__kwdefaults__, + func.__annotations__, ) def inspect_getargspec(func): return ArgSpec(*inspect_getfullargspec(func)[0:4]) - - -if py3k: # pragma: no cover - string_types = (str,) - text_type = str - string_type = str - - if py32: - callable = callable # noqa - else: - - def callable(fn): # noqa - return hasattr(fn, "__call__") - - def u(s): - return s - - def ue(s): - return s - - import configparser - import io - import _thread as thread -else: - # Using noqa bellow due to tox -e pep8 who use - # python3.7 as the default interpreter - string_types = (basestring,) # noqa - text_type = unicode # noqa - string_type = str - - def u(s): - return unicode(s, "utf-8") # noqa - - def ue(s): - return unicode(s, "unicode_escape") # noqa - - import ConfigParser as configparser # noqa - import StringIO as io # noqa - - callable = callable # noqa - import thread # noqa - - -if py3k or jython: - import pickle -else: - import cPickle as pickle # noqa - -if py3k: - - def read_config_file(config, fileobj): - return config.read_file(fileobj) - - -else: - - def read_config_file(config, fileobj): - return config.readfp(fileobj) - - -def timedelta_total_seconds(td): - if py27: - return td.total_seconds() - else: - return ( - td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6 - ) / 1e6 diff --git a/libs/common/dogpile/util/langhelpers.py b/libs/common/dogpile/util/langhelpers.py index 8066e3ef..a59b24ef 100644 --- a/libs/common/dogpile/util/langhelpers.py +++ b/libs/common/dogpile/util/langhelpers.py @@ -1,13 +1,17 @@ +import abc import collections import re +import threading +from typing import MutableMapping +from typing import MutableSet -from . import compat +import stevedore def coerce_string_conf(d): result = {} for k, v in d.items(): - if not isinstance(v, compat.string_types): + if not isinstance(v, str): result[k] = v continue @@ -25,21 +29,26 @@ def coerce_string_conf(d): return result -class PluginLoader(object): +class PluginLoader: def __init__(self, group): self.group = group - self.impls = {} + self.impls = {} # loaded plugins + self._mgr = None # lazily defined stevedore manager + self._unloaded = {} # plugins registered but not loaded def load(self, name): + if name in self._unloaded: + self.impls[name] = self._unloaded[name]() + return self.impls[name] if name in self.impls: - return self.impls[name]() + return self.impls[name] else: # pragma NO COVERAGE - import pkg_resources - - for impl in pkg_resources.iter_entry_points(self.group, name): - self.impls[name] = impl.load - return impl.load() - else: + if self._mgr is None: + self._mgr = stevedore.ExtensionManager(self.group) + try: + self.impls[name] = self._mgr[name].plugin + return self.impls[name] + except KeyError: raise self.NotFound( "Can't load plugin %s %s" % (self.group, name) ) @@ -49,13 +58,13 @@ class PluginLoader(object): mod = __import__(modulepath, fromlist=[objname]) return getattr(mod, objname) - self.impls[name] = load + self._unloaded[name] = load class NotFound(Exception): """The specified plugin could not be found.""" -class memoized_property(object): +class memoized_property: """A read-only @property that is only evaluated once.""" def __init__(self, fget, doc=None): @@ -80,8 +89,23 @@ def to_list(x, default=None): return x -class KeyReentrantMutex(object): - def __init__(self, key, mutex, keys): +class Mutex(abc.ABC): + @abc.abstractmethod + def acquire(self, wait: bool = True) -> bool: + raise NotImplementedError() + + @abc.abstractmethod + def release(self) -> None: + raise NotImplementedError() + + +class KeyReentrantMutex: + def __init__( + self, + key: str, + mutex: Mutex, + keys: MutableMapping[int, MutableSet[str]], + ): self.key = key self.mutex = mutex self.keys = keys @@ -91,7 +115,9 @@ class KeyReentrantMutex(object): # this collection holds zero or one # thread idents as the key; a set of # keynames held as the value. - keystore = collections.defaultdict(set) + keystore: MutableMapping[ + int, MutableSet[str] + ] = collections.defaultdict(set) def fac(key): return KeyReentrantMutex(key, mutex, keystore) @@ -99,7 +125,7 @@ class KeyReentrantMutex(object): return fac def acquire(self, wait=True): - current_thread = compat.threading.current_thread().ident + current_thread = threading.get_ident() keys = self.keys.get(current_thread) if keys is not None and self.key not in keys: # current lockholder, new key. add it in @@ -113,7 +139,7 @@ class KeyReentrantMutex(object): return False def release(self): - current_thread = compat.threading.current_thread().ident + current_thread = threading.get_ident() keys = self.keys.get(current_thread) assert keys is not None, "this thread didn't do the acquire" assert self.key in keys, "No acquire held for key '%s'" % self.key @@ -123,3 +149,10 @@ class KeyReentrantMutex(object): # the thread ident and unlock. del self.keys[current_thread] self.mutex.release() + + def locked(self): + current_thread = threading.get_ident() + keys = self.keys.get(current_thread) + if keys is None: + return False + return self.key in keys diff --git a/libs/common/dogpile/util/nameregistry.py b/libs/common/dogpile/util/nameregistry.py index 467cfda9..3c4cc022 100644 --- a/libs/common/dogpile/util/nameregistry.py +++ b/libs/common/dogpile/util/nameregistry.py @@ -1,7 +1,9 @@ +import threading +from typing import Any +from typing import Callable +from typing import MutableMapping import weakref -from .compat import threading - class NameRegistry(object): """Generates and return an object, keeping it as a @@ -39,19 +41,15 @@ class NameRegistry(object): """ - _locks = weakref.WeakValueDictionary() _mutex = threading.RLock() - def __init__(self, creator): - """Create a new :class:`.NameRegistry`. - - - """ - self._values = weakref.WeakValueDictionary() + def __init__(self, creator: Callable[..., Any]): + """Create a new :class:`.NameRegistry`.""" + self._values: MutableMapping[str, Any] = weakref.WeakValueDictionary() self._mutex = threading.RLock() self.creator = creator - def get(self, identifier, *args, **kw): + def get(self, identifier: str, *args: Any, **kw: Any) -> Any: r"""Get and possibly create the value. :param identifier: Hash key for the value. @@ -70,7 +68,7 @@ class NameRegistry(object): except KeyError: return self._sync_get(identifier, *args, **kw) - def _sync_get(self, identifier, *args, **kw): + def _sync_get(self, identifier: str, *args: Any, **kw: Any) -> Any: self._mutex.acquire() try: try: diff --git a/libs/common/dogpile/util/readwrite_lock.py b/libs/common/dogpile/util/readwrite_lock.py index 2954858a..3f375103 100644 --- a/libs/common/dogpile/util/readwrite_lock.py +++ b/libs/common/dogpile/util/readwrite_lock.py @@ -1,6 +1,5 @@ import logging - -from .compat import threading +import threading log = logging.getLogger(__name__) @@ -63,10 +62,10 @@ class ReadWriteMutex(object): # check if we are the last asynchronous reader thread # out the door. if self.async_ == 0: - # yes. so if a sync operation is waiting, notifyAll to wake + # yes. so if a sync operation is waiting, notify_all to wake # it up if self.current_sync_operation is not None: - self.condition.notifyAll() + self.condition.notify_all() elif self.async_ < 0: raise LockError( "Synchronizer error - too many " @@ -96,7 +95,7 @@ class ReadWriteMutex(object): # establish ourselves as the current sync # this indicates to other read/write operations # that they should wait until this is None again - self.current_sync_operation = threading.currentThread() + self.current_sync_operation = threading.current_thread() # now wait again for asyncs to finish if self.async_ > 0: @@ -118,7 +117,7 @@ class ReadWriteMutex(object): """Release the 'write' lock.""" self.condition.acquire() try: - if self.current_sync_operation is not threading.currentThread(): + if self.current_sync_operation is not threading.current_thread(): raise LockError( "Synchronizer error - current thread doesn't " "have the write lock" @@ -129,7 +128,7 @@ class ReadWriteMutex(object): self.current_sync_operation = None # tell everyone to get ready - self.condition.notifyAll() + self.condition.notify_all() log.debug("%s released write lock", self) finally: diff --git a/libs/common/easy_install.py b/libs/common/easy_install.py deleted file mode 100644 index d87e9840..00000000 --- a/libs/common/easy_install.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Run the EasyInstall command""" - -if __name__ == '__main__': - from setuptools.command.easy_install import main - main() diff --git a/libs/common/guessit/__main__.py b/libs/common/guessit/__main__.py index fad196d6..36d55f19 100644 --- a/libs/common/guessit/__main__.py +++ b/libs/common/guessit/__main__.py @@ -4,14 +4,12 @@ Entry point module """ # pragma: no cover -from __future__ import print_function - import json import logging -import os import sys -import six +from collections import OrderedDict + from rebulk.__version__ import __version__ as __rebulk_version__ from guessit import api @@ -20,12 +18,6 @@ from guessit.jsonutils import GuessitEncoder from guessit.options import argument_parser, parse_options, load_config, merge_options -try: - from collections import OrderedDict -except ImportError: # pragma: no-cover - from ordereddict import OrderedDict # pylint:disable=import-error - - def guess_filename(filename, options): """ Guess a single filename using given options @@ -48,6 +40,7 @@ def guess_filename(filename, options): if options.get('json'): print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False)) elif options.get('yaml'): + # pylint:disable=import-outside-toplevel import yaml from guessit import yamlutils @@ -78,6 +71,7 @@ def display_properties(options): else: print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False)) elif options.get('yaml'): + # pylint:disable=import-outside-toplevel import yaml from guessit import yamlutils if options.get('values'): @@ -91,30 +85,16 @@ def display_properties(options): properties_list = list(sorted(properties.keys())) for property_name in properties_list: property_values = properties.get(property_name) - print(2 * ' ' + '[+] %s' % (property_name,)) + print(2 * ' ' + f'[+] {property_name}') if property_values and options.get('values'): for property_value in property_values: - print(4 * ' ' + '[!] %s' % (property_value,)) - - -def fix_argv_encoding(): - """ - Fix encoding of sys.argv on windows Python 2 - """ - if six.PY2 and os.name == 'nt': # pragma: no cover - # see http://bugs.python.org/issue2128 - import locale - - for i, j in enumerate(sys.argv): - sys.argv[i] = j.decode(locale.getpreferredencoding()) + print(4 * ' ' + f'[!] {property_value}') def main(args=None): # pylint:disable=too-many-branches """ Main function for entry point """ - fix_argv_encoding() - if args is None: # pragma: no cover options = parse_options() else: @@ -142,7 +122,7 @@ def main(args=None): # pylint:disable=too-many-branches if options.get('yaml'): try: - import yaml # pylint:disable=unused-variable,unused-import + import yaml # pylint:disable=unused-variable,unused-import,import-outside-toplevel except ImportError: # pragma: no cover del options['yaml'] print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr) @@ -156,14 +136,8 @@ def main(args=None): # pylint:disable=too-many-branches for filename in options.get('filename'): filenames.append(filename) if options.get('input_file'): - if six.PY2: - input_file = open(options.get('input_file'), 'r') - else: - input_file = open(options.get('input_file'), 'r', encoding='utf-8') - try: + with open(options.get('input_file'), 'r', encoding='utf-8') as input_file: filenames.extend([line.strip() for line in input_file.readlines()]) - finally: - input_file.close() filenames = list(filter(lambda f: f, filenames)) diff --git a/libs/common/guessit/__version__.py b/libs/common/guessit/__version__.py index e505897b..71a00623 100644 --- a/libs/common/guessit/__version__.py +++ b/libs/common/guessit/__version__.py @@ -4,4 +4,4 @@ Version module """ # pragma: no cover -__version__ = '3.1.1' +__version__ = '3.5.0' diff --git a/libs/common/guessit/api.py b/libs/common/guessit/api.py index 8e306340..bc25b0dd 100644 --- a/libs/common/guessit/api.py +++ b/libs/common/guessit/api.py @@ -4,15 +4,12 @@ API functions that can be used by external software """ -try: - from collections import OrderedDict -except ImportError: # pragma: no-cover - from ordereddict import OrderedDict # pylint:disable=import-error - import os import traceback +from collections import OrderedDict +from copy import deepcopy +from pathlib import Path -import six from rebulk.introspector import introspect from .__version__ import __version__ @@ -26,24 +23,23 @@ class GuessitException(Exception): """ def __init__(self, string, options): - super(GuessitException, self).__init__("An internal error has occured in guessit.\n" - "===================== Guessit Exception Report =====================\n" - "version=%s\n" - "string=%s\n" - "options=%s\n" - "--------------------------------------------------------------------\n" - "%s" - "--------------------------------------------------------------------\n" - "Please report at " - "https://github.com/guessit-io/guessit/issues.\n" - "====================================================================" % - (__version__, str(string), str(options), traceback.format_exc())) + super().__init__("An internal error has occurred in guessit.\n" + "===================== Guessit Exception Report =====================\n" + f"version={__version__}\n" + f"string={str(string)}\n" + f"options={str(options)}\n" + "--------------------------------------------------------------------\n" + f"{traceback.format_exc()}" + "--------------------------------------------------------------------\n" + "Please report at " + "https://github.com/guessit-io/guessit/issues.\n" + "====================================================================") self.string = string self.options = options -def configure(options=None, rules_builder=rebulk_builder, force=False): +def configure(options=None, rules_builder=None, force=False): """ Load configuration files and initialize rebulk rules if required. @@ -58,6 +54,13 @@ def configure(options=None, rules_builder=rebulk_builder, force=False): default_api.configure(options, rules_builder=rules_builder, force=force) +def reset(): + """ + Reset api internal state. + """ + default_api.reset() + + def guessit(string, options=None): """ Retrieves all matches from string as a dict @@ -95,7 +98,7 @@ def suggested_expected(titles, options=None): return default_api.suggested_expected(titles, options) -class GuessItApi(object): +class GuessItApi: """ An api class that can be configured with custom Rebulk configuration. """ @@ -107,15 +110,19 @@ class GuessItApi(object): self.load_config_options = None self.advanced_config = None + def reset(self): + """ + Reset api internal state. + """ + self.__init__() # pylint:disable=unnecessary-dunder-call + @classmethod def _fix_encoding(cls, value): if isinstance(value, list): return [cls._fix_encoding(item) for item in value] if isinstance(value, dict): return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()} - if six.PY2 and isinstance(value, six.text_type): - return value.encode('utf-8') - if six.PY3 and isinstance(value, six.binary_type): + if isinstance(value, bytes): return value.decode('ascii') return value @@ -126,7 +133,7 @@ class GuessItApi(object): return False return True - def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True): + def configure(self, options=None, rules_builder=None, force=False, sanitize_options=True): """ Load configuration files and initialize rebulk rules if required. @@ -136,9 +143,14 @@ class GuessItApi(object): :type rules_builder: :param force: :type force: bool + :param sanitize_options: + :type force: bool :return: :rtype: dict """ + if not rules_builder: + rules_builder = rebulk_builder + if sanitize_options: options = parse_options(options, True) options = self._fix_encoding(options) @@ -159,7 +171,7 @@ class GuessItApi(object): self.advanced_config != advanced_config if should_build_rebulk: - self.advanced_config = advanced_config + self.advanced_config = deepcopy(advanced_config) self.rebulk = rules_builder(advanced_config) self.config = config @@ -175,16 +187,12 @@ class GuessItApi(object): :return: :rtype: """ - try: - from pathlib import Path - if isinstance(string, Path): - try: - # Handle path-like object - string = os.fspath(string) - except AttributeError: - string = str(string) - except ImportError: - pass + if isinstance(string, Path): + try: + # Handle path-like object + string = os.fspath(string) + except AttributeError: + string = str(string) try: options = parse_options(options, True) @@ -194,32 +202,27 @@ class GuessItApi(object): result_decode = False result_encode = False - if six.PY2: - if isinstance(string, six.text_type): - string = string.encode("utf-8") - result_decode = True - elif isinstance(string, six.binary_type): - string = six.binary_type(string) - if six.PY3: - if isinstance(string, six.binary_type): - string = string.decode('ascii') - result_encode = True - elif isinstance(string, six.text_type): - string = six.text_type(string) + if isinstance(string, bytes): + string = string.decode('ascii') + result_encode = True matches = self.rebulk.matches(string, options) if result_decode: for match in matches: - if isinstance(match.value, six.binary_type): + if isinstance(match.value, bytes): match.value = match.value.decode("utf-8") if result_encode: for match in matches: - if isinstance(match.value, six.text_type): + if isinstance(match.value, str): match.value = match.value.encode("ascii") - return matches.to_dict(options.get('advanced', False), options.get('single_value', False), - options.get('enforce_list', False)) - except: - raise GuessitException(string, options) + matches_dict = matches.to_dict(options.get('advanced', False), options.get('single_value', False), + options.get('enforce_list', False)) + output_input_string = options.get('output_input_string', False) + if output_input_string: + matches_dict['input_string'] = matches.input_string + return matches_dict + except Exception as err: + raise GuessitException(string, options) from err def properties(self, options=None): """ @@ -235,8 +238,8 @@ class GuessItApi(object): options = merge_options(config, options) unordered = introspect(self.rebulk, options).properties ordered = OrderedDict() - for k in sorted(unordered.keys(), key=six.text_type): - ordered[k] = list(sorted(unordered[k], key=six.text_type)) + for k in sorted(unordered.keys(), key=str): + ordered[k] = list(sorted(unordered[k], key=str)) if hasattr(self.rebulk, 'customize_properties'): ordered = self.rebulk.customize_properties(ordered) return ordered diff --git a/libs/common/guessit/backports.py b/libs/common/guessit/backports.py deleted file mode 100644 index c149a6b5..00000000 --- a/libs/common/guessit/backports.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -Backports -""" -# pragma: no-cover -# pylint: skip-file - -def cmp_to_key(mycmp): - """functools.cmp_to_key backport""" - class KeyClass(object): - """Key class""" - def __init__(self, obj, *args): # pylint: disable=unused-argument - self.obj = obj - def __lt__(self, other): - return mycmp(self.obj, other.obj) < 0 - def __gt__(self, other): - return mycmp(self.obj, other.obj) > 0 - def __eq__(self, other): - return mycmp(self.obj, other.obj) == 0 - def __le__(self, other): - return mycmp(self.obj, other.obj) <= 0 - def __ge__(self, other): - return mycmp(self.obj, other.obj) >= 0 - def __ne__(self, other): - return mycmp(self.obj, other.obj) != 0 - return KeyClass diff --git a/libs/common/guessit/config/__init__.py b/libs/common/guessit/config/__init__.py new file mode 100644 index 00000000..bdfe58b9 --- /dev/null +++ b/libs/common/guessit/config/__init__.py @@ -0,0 +1,152 @@ +""" +Config module. +""" +from importlib import import_module +from typing import Any, List + +from rebulk import Rebulk + +_regex_prefix = 're:' +_import_prefix = 'import:' +_import_cache = {} +_eval_prefix = 'eval:' +_eval_cache = {} +_pattern_types = ('regex', 'string') +_default_module_names = { + 'validator': 'guessit.rules.common.validators', + 'formatter': 'guessit.rules.common.formatters' +} + + +def _process_option(name: str, value: Any): + if name in ('validator', 'conflict_solver', 'formatter'): + if isinstance(value, dict): + return {item_key: _process_option(name, item_value) for item_key, item_value in value.items()} + if value is not None: + return _process_option_executable(value, _default_module_names.get(name)) + return value + + +def _import(value: str, default_module_name=None): + if '.' in value: + module_name, target = value.rsplit(':', 1) + else: + module_name = default_module_name + target = value + import_id = module_name + ":" + target + if import_id in _import_cache: + return _import_cache[import_id] + + mod = import_module(module_name) + + imported = mod + for item in target.split("."): + imported = getattr(imported, item) + + _import_cache[import_id] = imported + + return imported + + +def _eval(value: str): + compiled = _eval_cache.get(value) + if not compiled: + compiled = compile(value, '', 'eval') + return eval(compiled) # pylint:disable=eval-used + + +def _process_option_executable(value: str, default_module_name=None): + if value.startswith(_import_prefix): + value = value[len(_import_prefix):] + return _import(value, default_module_name) + if value.startswith(_eval_prefix): + value = value[len(_eval_prefix):] + return _eval(value) + if value.startswith('lambda ') or value.startswith('lambda:'): + return _eval(value) + return value + + +def _process_callable_entry(callable_spec: str, rebulk: Rebulk, entry: dict): + _process_option_executable(callable_spec)(rebulk, **entry) + + +def _build_entry_decl(entry, options, value): + entry_decl = dict(options.get(None, {})) + if not value.startswith('_'): + entry_decl['value'] = value + if isinstance(entry, str): + if entry.startswith(_regex_prefix): + entry_decl["regex"] = [entry[len(_regex_prefix):]] + else: + entry_decl["string"] = [entry] + else: + entry_decl.update(entry) + if "pattern" in entry_decl: + legacy_pattern = entry.pop("pattern") + if legacy_pattern.startswith(_regex_prefix): + entry_decl["regex"] = [legacy_pattern[len(_regex_prefix):]] + else: + entry_decl["string"] = [legacy_pattern] + return entry_decl + + +def load_patterns(rebulk: Rebulk, + pattern_type: str, + patterns: List[str], + options: dict = None): + """ + Load patterns for a prepared config entry + :param rebulk: Rebulk builder to use. + :param pattern_type: Pattern type. + :param patterns: Patterns + :param options: kwargs options to pass to rebulk pattern function. + :return: + """ + default_options = options.get(None) if options else None + item_options = dict(default_options) if default_options else {} + pattern_type_option = options.get(pattern_type) + if pattern_type_option: + item_options.update(pattern_type_option) + item_options = {name: _process_option(name, value) for name, value in item_options.items()} + getattr(rebulk, pattern_type)(*patterns, **item_options) + + +def load_config_patterns(rebulk: Rebulk, + config: dict, + options: dict = None): + """ + Load patterns defined in given config. + :param rebulk: Rebulk builder to use. + :param config: dict containing pattern definition. + :param options: Additional pattern options to use. + :type options: Dict[Dict[str, str]] A dict where key is the pattern type (regex, string, functional) and value is + the default kwargs options to pass. + :return: + """ + if options is None: + options = {} + + for value, raw_entries in config.items(): + entries = raw_entries if isinstance(raw_entries, list) else [raw_entries] + for entry in entries: + if isinstance(entry, dict) and "callable" in entry.keys(): + _process_callable_entry(entry.pop("callable"), rebulk, entry) + continue + entry_decl = _build_entry_decl(entry, options, value) + + for pattern_type in _pattern_types: + patterns = entry_decl.get(pattern_type) + if not patterns: + continue + if not isinstance(patterns, list): + patterns = [patterns] + patterns_entry_decl = dict(entry_decl) + + for pattern_type_to_remove in _pattern_types: + patterns_entry_decl.pop(pattern_type_to_remove, None) + + current_pattern_options = dict(options) + current_pattern_options[None] = patterns_entry_decl + + load_patterns(rebulk, pattern_type, patterns, current_pattern_options) diff --git a/libs/common/guessit/config/options.json b/libs/common/guessit/config/options.json index da7c7030..a5a00d2d 100644 --- a/libs/common/guessit/config/options.json +++ b/libs/common/guessit/config/options.json @@ -52,27 +52,103 @@ "ending": ")]}" }, "audio_codec": { + "audio_codec": { + "MP3": {"string": ["MP3", "LAME"],"regex": ["LAME(?:\\d)+-?(?:\\d)+"]}, + "MP2": "MP2", + "Dolby Digital": {"string": ["Dolby", "DolbyDigital"], "regex": ["Dolby-Digital", "DD", "AC-?3D?"]}, + "Dolby Atmos": {"string": ["Atmos"], "regex": ["Dolby-?Atmos"]}, + "AAC": "AAC", + "Dolby Digital Plus": {"string": ["DDP", "DD+"], "regex": ["E-?AC-?3"]}, + "FLAC": "Flac", + "DTS": "DTS", + "DTS-HD": {"regex": ["DTS-?HD", "DTS(?=-?MA)"], "conflict_solver": "lambda match, other: other if other.name == 'audio_codec' else '__default__'"}, + "Dolby TrueHD": {"regex": ["True-?HD"] }, + "Opus": "Opus", + "Vorbis": "Vorbis", + "PCM": "PCM", + "LPCM": "LPCM" + }, "audio_channels": { "1.0": [ "1ch", - "mono" + "mono", + "re:(1[\\W_]0(?:ch)?)(?=[^\\d]|$)" ], "2.0": [ "2ch", "stereo", - "re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)" + {"regex": "(2[\\W_]0(?:ch)?)(?=[^\\d]|$)", "children": true}, + {"string": "20", "validator": "import:seps_after", "tags": "weak-audio_channels"} ], "5.1": [ "5ch", "6ch", - "re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)", - "re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)" + {"regex": "(5[\\W_][01](?:ch)?)(?=[^\\d]|$)", "children": true}, + {"regex": "(6[\\W_]0(?:ch)?)(?=[^\\d]|$)", "children": true}, + {"regex": "5[01]", "validator": "import:seps_after", "tags": "weak-audio_channels"} ], "7.1": [ "7ch", "8ch", - "re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)" + {"regex": "(7[\\W_][01](?:ch)?)(?=[^\\d]|$)", "children": true}, + {"regex": "7[01]", "validator": "import:seps_after", "tags": "weak-audio_channels"} ] + }, + "audio_profile": { + "Master Audio": {"string": "MA", "tags": ["audio_profile.rule", "DTS-HD"]}, + "High Resolution Audio": {"string": ["HR", "HRA"], "tags": ["audio_profile.rule", "DTS-HD"]}, + "Extended Surround": {"string": "ES", "tags": ["audio_profile.rule", "DTS"]}, + "High Efficiency": {"string": "HE", "tags": ["audio_profile.rule", "AAC"]}, + "Low Complexity": {"string": "LC", "tags": ["audio_profile.rule", "AAC"]}, + "High Quality": {"string": "HQ", "tags": ["audio_profile.rule", "Dolby Digital"]}, + "EX": {"string": "EX", "tags": ["audio_profile.rule", "Dolby Digital"]} + } + }, + "bit_rate": { + "bit_rate": { + "_": { + "regex": ["\\d+-?[kmg]b(ps|its?)", "\\d+\\.\\d+-?[kmg]b(ps|its?)"], + "conflict_solver": "lambda match, other: match if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags else other", + "formatter": "import:guessit.rules.common.quantity:BitRate.fromstring", + "tags": ["release-group-prefix"] + } + } + }, + "bonus": { + "bonus": { + "_": { + "regex": "x(\\d+)", + "private_parent": true, + "children": true, + "formatter": "eval:int", + "validator": {"__parent__": "import:seps_surround"}, + "validate_all": true, + "conflict_solver": "lambda match, conflicting: match if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags else '__default__'" + } + } + }, + "cd": { + "_cd_of_cd_count": { + "regex": "cd-?(?P\\d+)(?:-?of-?(?P\\d+))?", + "validator": { + "cd": "lambda match: 0 < match.value < 100", + "cd_count": "lambda match: 0 < match.value < 100" + }, + "formatter": {"cd": "eval:int", "cd_count": "eval:int"}, + "children": true, + "private_parent": true, + "properties": {"cd": [null], "cd_count": [null]} + }, + "_cd_count": { + "regex": "(?P\\d+)-?cds?", + "validator": { + "cd": "lambda match: 0 < match.value < 100", + "cd_count": "lambda match: 0 < match.value < 100" + }, + "formatter": {"cd_count": "eval:int"}, + "children": true, + "private_parent": true, + "properties": {"cd": [null], "cd_count": [null]} } }, "container": { @@ -149,6 +225,40 @@ ] } }, + "edition": { + "edition": { + "Collector": {"string": ["collector"], "regex": ["collector'?s?-edition", "edition-collector"]}, + "Special": [ + {"regex": ["special-edition", "edition-special"], "conflict_solver": "lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__'"}, + {"string": "se", "tags": "has-neighbor"} + ], + "Director's Definitive Cut": "ddc", + "Criterion": {"string": ["CC", "Criterion"], "regex": ["criterion-edition", "edition-criterion"] }, + "Deluxe": {"string": ["deluxe"], "regex": ["deluxe-edition", "edition-deluxe"] }, + "Limited": {"string": ["limited"], "regex": ["limited-edition"], "tags": ["has-neighbor", "release-group-prefix"]}, + "Theatrical": {"string": ["theatrical"], "regex": ["theatrical-cut", "theatrical-edition"]}, + "Director's Cut": {"string": ["DC"], "regex": ["director'?s?-cut", "director'?s?-cut-edition", "edition-director'?s?-cut"]}, + "Extended": {"string": ["extended"], "regex": ["extended-?cut", "extended-?version"], "tags": ["has-neighbor", "release-group-prefix"]}, + "Alternative Cut": {"regex": ["alternat(e|ive)(?:-?Cut)?"], "tags": ["has-neighbor", "release-group-prefix"]}, + "Remastered": [ + {"string": "Remastered", "tags": ["has-neighbor", "release-group-prefix"]}, + {"regex": "4k-remaster(?:ed)?", "tags": ["release-group-prefix"]} + ], + "Restored": [ + {"string": "Restored", "tags": ["has-neighbor", "release-group-prefix"]}, + {"regex": "4k-restore(?:d)?", "tags": ["release-group-prefix"]} + ], + "Uncensored": {"string": "Uncensored", "tags": ["has-neighbor", "release-group-prefix"]}, + "Uncut": {"string": "Uncut", "tags": ["has-neighbor", "release-group-prefix"]}, + "Unrated": {"string": "Unrated", "tags": ["has-neighbor", "release-group-prefix"]}, + "Festival": {"string": "Festival", "tags": ["has-neighbor-before", "has-neighbor-after"]}, + "IMAX": {"string": ["imax"], "regex": ["imax-edition"]}, + "Fan": {"regex": ["fan-edit(?:ion)?", "fan-collection"]}, + "Ultimate": {"regex": ["ultimate-edition"]}, + "_Ultimate_Collector": {"regex": ["ultimate-collector'?s?-edition"], "value": ["Ultimate", "Collector"]}, + "_Ultimate_Fan": {"regex": ["ultimate-fan-edit(?:ion)?", "ultimate-fan-collection"], "value": ["Ultimate", "Fan"]} + } + }, "episodes": { "season_max_range": 100, "episode_max_range": 100, @@ -211,6 +321,11 @@ "All" ] }, + "film": { + "film": { + "_f": {"regex": "f(\\d{1,2})", "name": "film", "validate_all": true, "validator": {"__parent__": "import:seps_surround"}, "private_parent": true, "children": true, "formatter": "eval:int"} + } + }, "language": { "synonyms": { "ell": [ @@ -270,12 +385,15 @@ ], "mul": [ "multi", + "multiple", "dl" ] }, "subtitle_affixes": [ "sub", "subs", + "subtitle", + "subtitles", "esub", "esubs", "subbed", @@ -298,8 +416,7 @@ "legendas", "legendado", "subtitulado", - "soft", - "subtitles" + "soft" ], "subtitle_suffixes": [ "subforced", @@ -323,6 +440,95 @@ "true" ] }, + "other": { + "other": { + "Audio Fixed": {"regex": ["Audio-?Fix", "Audio-?Fixed"]}, + "Sync Fixed": {"regex": ["Sync-?Fix", "Sync-?Fixed"]}, + "Dual Audio": {"string": ["Dual"], "regex": ["Dual-?Audio"]}, + "Widescreen": {"string": ["ws"], "regex": ["wide-?screen"]}, + "Reencoded": {"regex": ["Re-?Enc(?:oded)?"]}, + "_repack_with_count": {"regex": ["Repack(?P\\d*)", "Rerip(?P\\d*)"], "value": {"other": "Proper"}, "tags": ["streaming_service.prefix", "streaming_service.suffix"]}, + "Proper": [ + {"string": "Proper", "tags": ["has-neighbor", "streaming_service.prefix", "streaming_service.suffix"]}, + {"regex": ["Real-Proper", "Real-Repack", "Real-Rerip"], "tags": ["streaming_service.prefix", "streaming_service.suffix", "real"]}, + {"string": "Real", "tags": ["has-neighbor", "streaming_service.prefix", "streaming_service.suffix", "real"]} + ], + "Fix": [ + {"string": ["Fix", "Fixed"], "tags": ["has-neighbor-before", "has-neighbor-after", "streaming_service.prefix", "streaming_service.suffix"]}, + {"string": ["Dirfix", "Nfofix", "Prooffix"], "tags": ["streaming_service.prefix", "streaming_service.suffix"]}, + {"regex": ["(?:Proof-?)?Sample-?Fix"], "tags": ["streaming_service.prefix", "streaming_service.suffix"]} + ], + "Fan Subtitled": {"string": "Fansub", "tags": "has-neighbor"}, + "Fast Subtitled": {"string": "Fastsub", "tags": "has-neighbor"}, + "Region 5": "R5", + "Region C": "RC", + "Preair": {"regex": "Pre-?Air"}, + "PS Vita": [ + {"regex": "(?:PS-?)Vita"}, + {"string": "Vita", "tags": "has-neighbor"} + ], + "_HdRip": {"value": {"other": "HD", "another": "Rip"}, "regex": ["(HD)(?PRip)"], "private_parent": true, "children": true, "validator":{"__parent__": "import:seps_surround"}, "validate_all": true}, + "Screener": [ + "Screener", + {"regex": "Scr(?:eener)?", "validator": null, "tags": ["other.validate.screener", "source-prefix", "source-suffix"]} + ], + "Remux": "Remux", + "Hybrid": "Hybrid", + "PAL": "PAL", + "SECAM": "SECAM", + "NTSC": "NTSC", + "XXX": "XXX", + "2in1": "2in1", + "3D": {"string": "3D", "tags": "has-neighbor"}, + "High Quality": {"string": "HQ", "tags": "uhdbluray-neighbor"}, + "High Resolution": "HR", + "Line Dubbed": "LD", + "Mic Dubbed": "MD", + "Micro HD": ["mHD", "HDLight"], + "Low Definition": "LDTV", + "High Frame Rate": "HFR", + "Variable Frame Rate": "VFR", + "HD": {"string": "HD", "validator": null, "tags": ["streaming_service.prefix", "streaming_service.suffix"]}, + "Full HD": {"string": ["FHD"],"regex": ["Full-?HD"], "validator": null, "tags": ["streaming_service.prefix", "streaming_service.suffix"]}, + "Ultra HD": {"string": ["UHD"],"regex": ["Ultra-?(?:HD)?"], "validator": null, "tags": ["streaming_service.prefix", "streaming_service.suffix"]}, + "Upscaled": {"regex": "Upscaled?"}, + "Complete": {"string": ["Complet", "Complete"], "tags": ["has-neighbor", "release-group-prefix"]}, + "Classic": {"string": "Classic", "tags": ["has-neighbor", "release-group-prefix"]}, + "Bonus": {"string": "Bonus", "tags": ["has-neighbor", "release-group-prefix"]}, + "Trailer": {"string": "Trailer", "tags": ["has-neighbor", "release-group-prefix"]}, + "Retail": {"string": "Retail", "tags": ["has-neighbor", "release-group-prefix"]}, + "Colorized": {"string": "Colorized", "tags": ["has-neighbor", "release-group-prefix"]}, + "Internal": {"string": "Internal", "tags": ["has-neighbor", "release-group-prefix"]}, + "Line Audio": {"string": "LiNE", "tags": ["has-neighbor-before", "has-neighbor-after", "release-group-prefix"]}, + "Read NFO": {"regex": "Read-?NFO"}, + "Converted": {"string": "CONVERT", "tags": "has-neighbor"}, + "Documentary": {"string": ["DOCU", "DOKU"], "tags": "has-neighbor"}, + "Open Matte": {"regex": "(?:OM|Open-?Matte)", "tags": "has-neighbor"}, + "Straight to Video": {"string": "STV", "tags": "has-neighbor"}, + "Original Aspect Ratio": {"string": "OAR", "tags": "has-neighbor"}, + "East Coast Feed": {"regex": "(?:Live-)?(?:Episode-)?East-?(?:Coast-)?Feed"}, + "West Coast Feed": {"regex": "(?:Live-)?(?:Episode-)?West-?(?:Coast-)?Feed"}, + "Original Video": {"string": ["VO", "OV"], "tags": "has-neighbor"}, + "Original Animated Video": {"string": ["Ova", "Oav"]}, + "Original Net Animation": "Ona", + "Original Animation DVD": "Oad", + "Mux": {"string": "Mux", "validator": "import:seps_after", "tags": ["other.validate.mux", "video-codec-prefix", "source-suffix"]}, + "Hardcoded Subtitles": ["HC", "vost"], + "Standard Dynamic Range": {"string": "SDR", "tags": "uhdbluray-neighbor"}, + "HDR10": {"regex": "HDR(?:10)?", "tags": "uhdbluray-neighbor"}, + "Dolby Vision": {"regex": "Dolby-?Vision", "tags": "uhdbluray-neighbor"}, + "BT.2020": {"regex": "BT-?2020","tags": "uhdbluray-neighbor"}, + "Sample": {"string": "Sample", "tags": ["at-end", "not-a-release-group"]}, + "Extras": [ + {"string": "Extras", "tags": "has-neighbor"}, + {"regex": "Digital-?Extras?"} + ], + "Proof": {"string": "Proof", "tags": ["at-end", "not-a-release-group"]}, + "Obfuscated": {"string": ["Obfuscated", "Scrambled"], "tags": ["at-end", "not-a-release-group"]}, + "Repost": {"string": ["xpost", "postbot", "asrequested"], "tags": "not-a-release-group"}, + "_complete_words": {"callable": "import:guessit.rules.properties.other:complete_words", "season_words": ["seasons?", "series?"], "complete_article_words": ["The"]} + } + }, "part": { "prefixes": [ "pt", @@ -342,21 +548,22 @@ }, "screen_size": { "frame_rates": [ - "23.976", - "24", - "25", - "29.970", - "30", - "48", - "50", - "60", - "120" + "23\\.976", + "24(?:\\.0{1,3})?", + "25(?:\\.0{1,3})?", + "29\\.970", + "30(?:\\.0{1,3})?", + "48(?:\\.0{1,3})?", + "50(?:\\.0{1,3})?", + "60(?:\\.0{1,3})?", + "120(?:\\.0{1,3})?" ], "min_ar": 1.333, "max_ar": 1.898, "interlaced": [ "360", "480", + "540", "576", "900", "1080" @@ -375,6 +582,10 @@ "4320" ] }, + "source": { + "rip_prefix": "(?PRip)-?", + "rip_suffix": "-?(?PRip)" + }, "website": { "safe_tlds": [ "com", @@ -395,6 +606,7 @@ ] }, "streaming_service": { + "9Now": "9NOW", "A&E": [ "AE", "A&E" @@ -416,11 +628,21 @@ "Animal Planet": "ANPL", "AnimeLab": "ANLB", "AOL": "AOL", + "AppleTV": [ + "ATVP", + "ATV+", + "APTV" + ], "ARD": "ARD", "BBC iPlayer": [ "iP", "re:BBC-?iPlayer" ], + "Binge": "BNGE", + "Blackpills": "BKPL", + "BluTV": "BLU", + "Boomerang": "BOOM", + "Disney+": "DSNP", "BravoTV": "BRAV", "Canal+": "CNLP", "Cartoon Network": "CN", @@ -431,11 +653,15 @@ "CC", "re:Comedy-?Central" ], - "Channel 4": "4OD", + "Channel 4": [ + "ALL4", + "4OD" + ], "CHRGD": "CHGD", "Cinemax": "CMAX", "Country Music Television": "CMT", "Comedians in Cars Getting Coffee": "CCGC", + "Crave": "CRAV", "Crunchy Roll": [ "CR", "re:Crunchy-?Roll" @@ -457,6 +683,7 @@ "DISC", "Discovery" ], + "Discovery Plus": "DSCP", "Disney": [ "DSNY", "Disney" @@ -469,12 +696,17 @@ "El Trece": "ETTV", "ESPN": "ESPN", "Esquire": "ESQ", + "Facebook Watch": "FBWatch", "Family": "FAM", "Family Jr": "FJR", + "Fandor": "FANDOR", "Food Network": "FOOD", "Fox": "FOX", + "Fox Premium": "FOXP", + "Foxtel": "FXTL", "Freeform": "FREE", "FYI Network": "FYI", + "GagaOOLala": "Gaga", "Global": "GLBL", "GloboSat Play": "GLOB", "Hallmark": "HLMK", @@ -482,6 +714,7 @@ "HBO", "re:HBO-?Go" ], + "HBO Max": "HMAX", "HGTV": "HGTV", "History": [ "HIST", @@ -490,7 +723,13 @@ "Hulu": "HULU", "Investigation Discovery": "ID", "IFC": "IFC", - "iTunes": "iTunes", + "hoichoi": "HoiChoi", + "iflix": "IFX", + "iQIYI": "iQIYI", + "iTunes": [ + "iTunes", + {"pattern": "iT", "ignore_case": false} + ], "ITV": "ITV", "Knowledge Network": "KNOW", "Lifetime": "LIFE", @@ -501,6 +740,9 @@ ], "MSNBC": "MNBC", "MTV": "MTV", + "MUBI": "MUBI", + "National Audiovisual Institute": "INA", + "National Film Board": "NFB", "National Geographic": [ "NATG", "re:National-?Geographic" @@ -519,24 +761,38 @@ "NHL GameCenter": "GC", "Nickelodeon": [ "NICK", - "Nickelodeon" + "Nickelodeon", + "NICKAPP" ], "Norsk Rikskringkasting": "NRK", "OnDemandKorea": [ "ODK", "OnDemandKorea" ], + "Opto": "OPTO", + "Oprah Winfrey Network": "OWN", "PBS": "PBS", "PBS Kids": "PBSK", + "Peacock": [ + "PCOK", + "Peacock" + ], "Playstation Network": "PSN", "Pluzz": "PLUZ", + "PokerGO": "POGO", + "Rakuten TV": "RKTN", + "The Roku Channel": "ROKU", "RTE One": "RTE", - "SBS (AU)": "SBS", + "RUUTU": "RUUTU", + "SBS": "SBS", + "Science Channel": "SCI", "SeeSo": [ "SESO", "SeeSo" ], "Shomi": "SHMI", + "Showtime": "SHO", + "Sony": "SONY", "Spike": "SPIK", "Spike TV": [ "SPKE", @@ -564,7 +820,9 @@ "TVL", "re:TV-?Land" ], + "TVNZ": "TVNZ", "UFC": "UFC", + "UFC Fight Pass": "FP", "UKTV": "UKTV", "Univision": "UNIV", "USA Network": "USAN", diff --git a/libs/common/guessit/data/__init__.py b/libs/common/guessit/data/__init__.py new file mode 100644 index 00000000..86fd56d8 --- /dev/null +++ b/libs/common/guessit/data/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Data +""" diff --git a/libs/common/guessit/tlds-alpha-by-domain.txt b/libs/common/guessit/data/tlds-alpha-by-domain.txt similarity index 100% rename from libs/common/guessit/tlds-alpha-by-domain.txt rename to libs/common/guessit/data/tlds-alpha-by-domain.txt diff --git a/libs/common/guessit/monkeypatch.py b/libs/common/guessit/monkeypatch.py index 33e7c46e..14ddf6e8 100644 --- a/libs/common/guessit/monkeypatch.py +++ b/libs/common/guessit/monkeypatch.py @@ -4,10 +4,7 @@ Monkeypatch initialisation functions """ -try: - from collections import OrderedDict -except ImportError: # pragma: no-cover - from ordereddict import OrderedDict # pylint:disable=import-error +from collections import OrderedDict from rebulk.match import Match diff --git a/libs/common/guessit/options.py b/libs/common/guessit/options.py index 8fa6825c..1e1ede1a 100644 --- a/libs/common/guessit/options.py +++ b/libs/common/guessit/options.py @@ -6,12 +6,13 @@ Options import copy import json import os -import pkgutil import shlex - from argparse import ArgumentParser -import six +try: + from importlib.resources import read_text +except ImportError: + from importlib_resources import read_text def build_argument_parser(): @@ -68,6 +69,8 @@ def build_argument_parser(): help='Display information for filename guesses as json output') output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None, help='Display information for filename guesses as yaml output') + output_opts.add_argument('-i', '--output-input-string', dest='output_input_string', action='store_true', + default=False, help='Add input_string property in the output') conf_opts = opts.add_argument_group("Configuration") conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None, @@ -108,7 +111,7 @@ def parse_options(options=None, api=False): :return: :rtype: """ - if isinstance(options, six.string_types): + if isinstance(options, str): args = shlex.split(options) options = vars(argument_parser.parse_args(args)) elif options is None: @@ -142,7 +145,7 @@ def load_config(options): configurations = [] if not options.get('no_default_config'): - default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8') + default_options_data = read_text('guessit.config', 'options.json') default_options = json.loads(default_options_data) configurations.append(default_options) @@ -153,7 +156,7 @@ def load_config(options): cwd = os.getcwd() yaml_supported = False try: - import yaml # pylint:disable=unused-variable,unused-import + import yaml # pylint:disable=unused-variable,unused-import,import-outside-toplevel yaml_supported = True except ImportError: pass @@ -176,7 +179,7 @@ def load_config(options): if 'advanced_config' not in config: # Guessit doesn't work without advanced_config, so we use default if no configuration files provides it. - default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8') + default_options_data = read_text('guessit.config', 'options.json') default_options = json.loads(default_options_data) config['advanced_config'] = default_options['advanced_config'] @@ -225,7 +228,7 @@ def merge_option_value(option, value, merged): if value is not None and option != 'pristine': if option in merged.keys() and isinstance(merged[option], list): for val in value: - if val not in merged[option]: + if val not in merged[option] and val is not None: merged[option].append(val) elif option in merged.keys() and isinstance(merged[option], dict): merged[option] = merge_options(merged[option], value) @@ -246,17 +249,16 @@ def load_config_file(filepath): :rtype: """ if filepath.endswith('.json'): - with open(filepath) as config_file_data: + with open(filepath, encoding='utf-8') as config_file_data: return json.load(config_file_data) if filepath.endswith('.yaml') or filepath.endswith('.yml'): try: - import yaml - with open(filepath) as config_file_data: + import yaml # pylint:disable=import-outside-toplevel + with open(filepath, encoding='utf-8') as config_file_data: return yaml.load(config_file_data, yaml.SafeLoader) - except ImportError: # pragma: no cover + except ImportError as err: # pragma: no cover raise ConfigurationException('Configuration file extension is not supported. ' - 'PyYAML should be installed to support "%s" file' % ( - filepath,)) + f'PyYAML should be installed to support "{filepath}" file') from err try: # Try to load input as JSON @@ -264,7 +266,7 @@ def load_config_file(filepath): except: # pylint: disable=bare-except pass - raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,)) + raise ConfigurationException(f'Configuration file extension is not supported for "{filepath}" file.') def get_options_file_locations(homedir, cwd, yaml_supported=False): diff --git a/libs/common/guessit/reutils.py b/libs/common/guessit/reutils.py index 0b654d27..e6fc29e5 100644 --- a/libs/common/guessit/reutils.py +++ b/libs/common/guessit/reutils.py @@ -25,11 +25,11 @@ def build_or_pattern(patterns, name=None, escape=False): if not or_pattern: or_pattern.append('(?') if name: - or_pattern.append('P<' + name + '>') + or_pattern.append(f'P<{name}>') else: or_pattern.append(':') else: or_pattern.append('|') - or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern) + or_pattern.append(f'(?:{re.escape(pattern)})' if escape else pattern) or_pattern.append(')') return ''.join(or_pattern) diff --git a/libs/common/guessit/rules/__init__.py b/libs/common/guessit/rules/__init__.py index f16bc4e0..d975cf58 100644 --- a/libs/common/guessit/rules/__init__.py +++ b/libs/common/guessit/rules/__init__.py @@ -26,7 +26,7 @@ from .properties.other import other from .properties.size import size from .properties.bit_rate import bit_rate from .properties.edition import edition -from .properties.cds import cds +from .properties.cd import cd from .properties.bonus import bonus from .properties.film import film from .properties.part import part @@ -71,7 +71,7 @@ def rebulk_builder(config): rebulk.rebulk(size(_config('size'))) rebulk.rebulk(bit_rate(_config('bit_rate'))) rebulk.rebulk(edition(_config('edition'))) - rebulk.rebulk(cds(_config('cds'))) + rebulk.rebulk(cd(_config('cd'))) rebulk.rebulk(bonus(_config('bonus'))) rebulk.rebulk(film(_config('film'))) rebulk.rebulk(part(_config('part'))) diff --git a/libs/common/guessit/rules/common/__init__.py b/libs/common/guessit/rules/common/__init__.py index 444dc72a..17ce2331 100644 --- a/libs/common/guessit/rules/common/__init__.py +++ b/libs/common/guessit/rules/common/__init__.py @@ -3,7 +3,7 @@ """ Common module """ -import re +from rebulk.remodule import re seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators seps_no_groups = seps.replace('[](){}', '') @@ -13,3 +13,10 @@ title_seps = r'-+/\|' # separators for title dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects. alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects. + + +def optional(pattern): + """ + Make a regex pattern optional + """ + return '(?:' + pattern + ')?' diff --git a/libs/common/guessit/rules/common/comparators.py b/libs/common/guessit/rules/common/comparators.py index f46f0c11..27749314 100644 --- a/libs/common/guessit/rules/common/comparators.py +++ b/libs/common/guessit/rules/common/comparators.py @@ -3,10 +3,8 @@ """ Comparators """ -try: - from functools import cmp_to_key -except ImportError: - from ...backports import cmp_to_key + +from functools import cmp_to_key def marker_comparator_predicate(match): @@ -14,10 +12,10 @@ def marker_comparator_predicate(match): Match predicate used in comparator """ return ( - not match.private - and match.name not in ('proper_count', 'title') - and not (match.name == 'container' and 'extension' in match.tags) - and not (match.name == 'other' and match.value == 'Rip') + not match.private + and match.name not in ('proper_count', 'title') + and not (match.name == 'container' and 'extension' in match.tags) + and not (match.name == 'other' and match.value == 'Rip') ) diff --git a/libs/common/guessit/rules/common/date.py b/libs/common/guessit/rules/common/date.py index e513af9f..1e114569 100644 --- a/libs/common/guessit/rules/common/date.py +++ b/libs/common/guessit/rules/common/date.py @@ -11,13 +11,21 @@ _dsep = r'[-/ \.]' _dsep_bis = r'[-/ \.x]' date_regexps = [ + # pylint:disable=consider-using-f-string re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE), + # pylint:disable=consider-using-f-string re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE), + # pylint:disable=consider-using-f-string re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), + # pylint:disable=consider-using-f-string re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), + # pylint:disable=consider-using-f-string re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE), + # pylint:disable=consider-using-f-string re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE), + # pylint:disable=consider-using-f-string re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep), + # pylint:disable=consider-using-f-string re.IGNORECASE)] diff --git a/libs/common/guessit/rules/common/expected.py b/libs/common/guessit/rules/common/expected.py index eae562a2..19f2f887 100644 --- a/libs/common/guessit/rules/common/expected.py +++ b/libs/common/guessit/rules/common/expected.py @@ -3,7 +3,7 @@ """ Expected property factory """ -import re +from rebulk.remodule import re from rebulk import Rebulk from rebulk.utils import find_all diff --git a/libs/common/guessit/rules/common/numeral.py b/libs/common/guessit/rules/common/numeral.py index 7c064fdb..fa698322 100644 --- a/libs/common/guessit/rules/common/numeral.py +++ b/libs/common/guessit/rules/common/numeral.py @@ -81,7 +81,7 @@ def __parse_roman(value): :rtype: """ if not __romanNumeralPattern.search(value): - raise ValueError('Invalid Roman numeral: %s' % value) + raise ValueError(f'Invalid Roman numeral: {value}') result = 0 index = 0 diff --git a/libs/common/guessit/rules/common/quantity.py b/libs/common/guessit/rules/common/quantity.py index bbd41fbb..aed15e71 100644 --- a/libs/common/guessit/rules/common/quantity.py +++ b/libs/common/guessit/rules/common/quantity.py @@ -3,15 +3,14 @@ """ Quantities: Size """ -import re from abc import abstractmethod -import six +from rebulk.remodule import re from ..common import seps -class Quantity(object): +class Quantity: """ Represent a quantity object with magnitude and units. """ @@ -50,7 +49,7 @@ class Quantity(object): return hash(str(self)) def __eq__(self, other): - if isinstance(other, six.string_types): + if isinstance(other, str): return str(self) == other if not isinstance(other, self.__class__): return NotImplemented @@ -60,10 +59,10 @@ class Quantity(object): return not self == other def __repr__(self): - return '<{0} [{1}]>'.format(self.__class__.__name__, self) + return f'<{self.__class__.__name__} [{self}]>' def __str__(self): - return '{0}{1}'.format(self.magnitude, self.units) + return f'{self.magnitude}{self.units}' class Size(Quantity): diff --git a/libs/common/guessit/rules/markers/groups.py b/libs/common/guessit/rules/markers/groups.py index 4716d15d..ac66f044 100644 --- a/libs/common/guessit/rules/markers/groups.py +++ b/libs/common/guessit/rules/markers/groups.py @@ -5,6 +5,7 @@ Groups markers (...), [...] and {...} """ from rebulk import Rebulk +from ...options import ConfigurationException def groups(config): """ @@ -21,6 +22,9 @@ def groups(config): starting = config['starting'] ending = config['ending'] + if len(starting) != len(ending): + raise ConfigurationException("Starting and ending groups must have the same length") + def mark_groups(input_string): """ Functional pattern to mark groups (...), [...] and {...}. @@ -28,7 +32,7 @@ def groups(config): :param input_string: :return: """ - openings = ([], [], []) + openings = ([], ) * len(starting) i = 0 ret = [] diff --git a/libs/common/guessit/rules/processors.py b/libs/common/guessit/rules/processors.py index 5b018140..069e75d2 100644 --- a/libs/common/guessit/rules/processors.py +++ b/libs/common/guessit/rules/processors.py @@ -6,8 +6,6 @@ Processors from collections import defaultdict import copy -import six - from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch from .common import seps_no_groups @@ -68,7 +66,7 @@ class EquivalentHoles(Rule): for name in matches.names: for hole in list(holes): for current_match in matches.named(name): - if isinstance(current_match.value, six.string_types) and \ + if isinstance(current_match.value, str) and \ hole.value.lower() == current_match.value.lower(): if 'equivalent-ignore' in current_match.tags: continue @@ -96,7 +94,7 @@ class RemoveAmbiguous(Rule): consequence = RemoveMatch def __init__(self, sort_function=marker_sorted, predicate=None): - super(RemoveAmbiguous, self).__init__() + super().__init__() self.sort_function = sort_function self.predicate = predicate @@ -131,7 +129,7 @@ class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous): keep the one tagged as 'SxxExx' or in the rightmost filepart. """ def __init__(self, name): - super(RemoveLessSpecificSeasonEpisode, self).__init__( + super().__init__( sort_function=(lambda markers, matches: marker_sorted(list(reversed(markers)), matches, lambda match: match.name == name and 'SxxExx' in match.tags)), diff --git a/libs/common/guessit/rules/properties/audio_codec.py b/libs/common/guessit/rules/properties/audio_codec.py index 815caff9..36ad3561 100644 --- a/libs/common/guessit/rules/properties/audio_codec.py +++ b/libs/common/guessit/rules/properties/audio_codec.py @@ -9,6 +9,7 @@ from rebulk.remodule import re from ..common import dash from ..common.pattern import is_disabled from ..common.validators import seps_before, seps_after +from ...config import load_config_patterns audio_properties = ['audio_codec', 'audio_profile', 'audio_channels'] @@ -22,8 +23,8 @@ def audio_codec(config): # pylint:disable=unused-argument :return: Created Rebulk object :rtype: Rebulk """ - rebulk = Rebulk()\ - .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\ + rebulk = Rebulk() \ + .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) \ .string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): @@ -46,46 +47,19 @@ def audio_codec(config): # pylint:disable=unused-argument conflict_solver=audio_codec_priority, disabled=lambda context: is_disabled(context, 'audio_codec')) - rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") - rebulk.string("MP2", value="MP2") - rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital') - rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos') - rebulk.string("AAC", value="AAC") - rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus') - rebulk.string("Flac", value="FLAC") - rebulk.string("DTS", value="DTS") - rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD', - conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__') - rebulk.regex('True-?HD', value='Dolby TrueHD') - rebulk.string('Opus', value='Opus') - rebulk.string('Vorbis', value='Vorbis') - rebulk.string('PCM', value='PCM') - rebulk.string('LPCM', value='LPCM') + load_config_patterns(rebulk, config.get('audio_codec')) rebulk.defaults(clear=True, name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile')) - rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD']) - rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD']) - rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS']) - rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC']) - rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC']) - rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital']) - rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital']) + + load_config_patterns(rebulk, config.get('audio_profile')) rebulk.defaults(clear=True, name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) - rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') - rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') - rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') - for value, items in config.get('audio_channels').items(): - for item in items: - if item.startswith('re:'): - rebulk.regex(item[3:], value=value, children=True) - else: - rebulk.string(item, value=value) + load_config_patterns(rebulk, config.get('audio_channels')) rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) @@ -130,7 +104,7 @@ class AudioProfileRule(Rule): consequence = RemoveMatch def __init__(self, codec): - super(AudioProfileRule, self).__init__() + super().__init__() self.codec = codec def enabled(self, context): @@ -139,20 +113,20 @@ class AudioProfileRule(Rule): def when(self, matches, context): profile_list = matches.named('audio_profile', lambda match: 'audio_profile.rule' in match.tags and - self.codec in match.tags) + self.codec in match.tags) ret = [] for profile in profile_list: codec = matches.at_span(profile.span, lambda match: match.name == 'audio_codec' and - match.value == self.codec, 0) + match.value == self.codec, 0) if not codec: codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and - match.value == self.codec) + match.value == self.codec) if not codec: codec = matches.next(profile, lambda match: match.name == 'audio_codec' and - match.value == self.codec) + match.value == self.codec) if not codec: ret.append(profile) if codec: @@ -166,7 +140,7 @@ class DtsHDRule(AudioProfileRule): """ def __init__(self): - super(DtsHDRule, self).__init__('DTS-HD') + super().__init__('DTS-HD') class DtsRule(AudioProfileRule): @@ -175,7 +149,7 @@ class DtsRule(AudioProfileRule): """ def __init__(self): - super(DtsRule, self).__init__('DTS') + super().__init__('DTS') class AacRule(AudioProfileRule): @@ -184,7 +158,7 @@ class AacRule(AudioProfileRule): """ def __init__(self): - super(AacRule, self).__init__('AAC') + super().__init__('AAC') class DolbyDigitalRule(AudioProfileRule): @@ -193,7 +167,7 @@ class DolbyDigitalRule(AudioProfileRule): """ def __init__(self): - super(DolbyDigitalRule, self).__init__('Dolby Digital') + super().__init__('Dolby Digital') class HqConflictRule(Rule): diff --git a/libs/common/guessit/rules/properties/bit_rate.py b/libs/common/guessit/rules/properties/bit_rate.py index d279c9f1..e82c223a 100644 --- a/libs/common/guessit/rules/properties/bit_rate.py +++ b/libs/common/guessit/rules/properties/bit_rate.py @@ -3,15 +3,14 @@ """ video_bit_rate and audio_bit_rate properties """ -import re - from rebulk import Rebulk +from rebulk.remodule import re from rebulk.rules import Rule, RemoveMatch, RenameMatch from ..common import dash, seps from ..common.pattern import is_disabled -from ..common.quantity import BitRate from ..common.validators import seps_surround +from ...config import load_config_patterns def bit_rate(config): # pylint:disable=unused-argument @@ -27,13 +26,8 @@ def bit_rate(config): # pylint:disable=unused-argument and is_disabled(context, 'video_bit_rate'))) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='audio_bit_rate', validator=seps_surround) - rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)', - conflict_solver=( - lambda match, other: match - if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags - else other - ), - formatter=BitRate.fromstring, tags=['release-group-prefix']) + + load_config_patterns(rebulk, config.get('bit_rate')) rebulk.rules(BitRateTypeRule) diff --git a/libs/common/guessit/rules/properties/bonus.py b/libs/common/guessit/rules/properties/bonus.py index 54087aa3..c9e31571 100644 --- a/libs/common/guessit/rules/properties/bonus.py +++ b/libs/common/guessit/rules/properties/bonus.py @@ -3,14 +3,13 @@ """ bonus property """ -from rebulk.remodule import re - from rebulk import Rebulk, AppendMatch, Rule +from rebulk.remodule import re from .title import TitleFromPosition from ..common.formatters import cleanup from ..common.pattern import is_disabled -from ..common.validators import seps_surround +from ...config import load_config_patterns def bonus(config): # pylint:disable=unused-argument @@ -23,14 +22,9 @@ def bonus(config): # pylint:disable=unused-argument :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus')) - rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) + rebulk = rebulk.regex_defaults(name='bonus', flags=re.IGNORECASE) - rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int, - validator={'__parent__': seps_surround}, - validate_all=True, - conflict_solver=lambda match, conflicting: match - if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags - else '__default__') + load_config_patterns(rebulk, config.get('bonus')) rebulk.rules(BonusTitleRule) diff --git a/libs/common/guessit/rules/properties/cd.py b/libs/common/guessit/rules/properties/cd.py new file mode 100644 index 00000000..6ede3641 --- /dev/null +++ b/libs/common/guessit/rules/properties/cd.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +cd and cd_count properties +""" +from rebulk import Rebulk +from rebulk.remodule import re + +from ..common import dash +from ..common.pattern import is_disabled +from ...config import load_config_patterns + + +def cd(config): # pylint:disable=unused-argument,invalid-name + """ + Builder for rebulk object. + + :param config: rule configuration + :type config: dict + :return: Created Rebulk object + :rtype: Rebulk + """ + rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd')) + rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) + + load_config_patterns(rebulk, config) + + return rebulk diff --git a/libs/common/guessit/rules/properties/cds.py b/libs/common/guessit/rules/properties/cds.py deleted file mode 100644 index 873df6fe..00000000 --- a/libs/common/guessit/rules/properties/cds.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -cd and cd_count properties -""" -from rebulk.remodule import re - -from rebulk import Rebulk - -from ..common import dash -from ..common.pattern import is_disabled - - -def cds(config): # pylint:disable=unused-argument - """ - Builder for rebulk object. - - :param config: rule configuration - :type config: dict - :return: Created Rebulk object - :rtype: Rebulk - """ - rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd')) - rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) - - rebulk.regex(r'cd-?(?P\d+)(?:-?of-?(?P\d+))?', - validator={'cd': lambda match: 0 < match.value < 100, - 'cd_count': lambda match: 0 < match.value < 100}, - formatter={'cd': int, 'cd_count': int}, - children=True, - private_parent=True, - properties={'cd': [None], 'cd_count': [None]}) - rebulk.regex(r'(?P\d+)-?cds?', - validator={'cd': lambda match: 0 < match.value < 100, - 'cd_count': lambda match: 0 < match.value < 100}, - formatter={'cd_count': int}, - children=True, - private_parent=True, - properties={'cd': [None], 'cd_count': [None]}) - - return rebulk diff --git a/libs/common/guessit/rules/properties/country.py b/libs/common/guessit/rules/properties/country.py index 172c2990..f115614a 100644 --- a/libs/common/guessit/rules/properties/country.py +++ b/libs/common/guessit/rules/properties/country.py @@ -65,7 +65,7 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis return 'UK' return str(babelfish.Country(alpha2)) - def reverse(self, name): # pylint:disable=arguments-differ + def reverse(self, name): # pylint:disable=arguments-renamed # exceptions come first, as they need to override a potential match # with any of the other guessers try: @@ -87,7 +87,7 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis raise babelfish.CountryReverseError(name) -class CountryFinder(object): +class CountryFinder: """Helper class to search and return country matches.""" def __init__(self, allowed_countries, common_words): diff --git a/libs/common/guessit/rules/properties/crc.py b/libs/common/guessit/rules/properties/crc.py index eedee93d..d8421574 100644 --- a/libs/common/guessit/rules/properties/crc.py +++ b/libs/common/guessit/rules/properties/crc.py @@ -35,9 +35,9 @@ def crc(config): # pylint:disable=unused-argument return rebulk -_DIGIT = 0 -_LETTER = 1 -_OTHER = 2 +_digit = 0 +_letter = 1 +_other = 2 _idnum = re.compile(r'(?P[a-zA-Z0-9-]{20,})') # 1.0, (0, 0)) @@ -61,18 +61,18 @@ def guess_idnumber(string): letter_count = 0 last_letter = None - last = _LETTER + last = _letter for c in result['uuid']: if c in '0123456789': - ci = _DIGIT + ci = _digit elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': - ci = _LETTER + ci = _letter if c != last_letter: switch_letter_count += 1 last_letter = c letter_count += 1 else: - ci = _OTHER + ci = _other if ci != last: switch_count += 1 diff --git a/libs/common/guessit/rules/properties/edition.py b/libs/common/guessit/rules/properties/edition.py index 822aa4ee..32eea945 100644 --- a/libs/common/guessit/rules/properties/edition.py +++ b/libs/common/guessit/rules/properties/edition.py @@ -3,12 +3,13 @@ """ edition property """ +from rebulk import Rebulk from rebulk.remodule import re -from rebulk import Rebulk from ..common import dash from ..common.pattern import is_disabled from ..common.validators import seps_surround +from ...config import load_config_patterns def edition(config): # pylint:disable=unused-argument @@ -21,32 +22,9 @@ def edition(config): # pylint:disable=unused-argument :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition')) - rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) + rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) - rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector') - rebulk.regex('special-edition', 'edition-special', value='Special', - conflict_solver=lambda match, other: other - if other.name == 'episode_details' and other.value == 'Special' - else '__default__') - rebulk.string('se', value='Special', tags='has-neighbor') - rebulk.string('ddc', value="Director's Definitive Cut") - rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion') - rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe') - rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix']) - rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical') - rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC', - value="Director's Cut") - rebulk.regex('extended', 'extended-?cut', 'extended-?version', - value='Extended', tags=['has-neighbor', 'release-group-prefix']) - rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix']) - for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'): - rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) - rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after']) - rebulk.regex('imax', 'imax-edition', value='IMAX') - rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan') - rebulk.regex('ultimate-edition', value='Ultimate') - rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector']) - rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan']) + load_config_patterns(rebulk, config.get('edition')) return rebulk diff --git a/libs/common/guessit/rules/properties/episode_title.py b/libs/common/guessit/rules/properties/episode_title.py index ece8921d..2c4fab66 100644 --- a/libs/common/guessit/rules/properties/episode_title.py +++ b/libs/common/guessit/rules/properties/episode_title.py @@ -47,7 +47,7 @@ class RemoveConflictsWithEpisodeTitle(Rule): consequence = RemoveMatch def __init__(self, previous_names): - super(RemoveConflictsWithEpisodeTitle, self).__init__() + super().__init__() self.previous_names = previous_names self.next_names = ('streaming_service', 'screen_size', 'source', 'video_codec', 'audio_codec', 'other', 'container') @@ -129,7 +129,7 @@ class EpisodeTitleFromPosition(TitleBaseRule): dependency = TitleToEpisodeTitle def __init__(self, previous_names): - super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title']) + super().__init__('episode_title', ['title']) self.previous_names = previous_names def hole_filter(self, hole, matches): @@ -150,12 +150,12 @@ class EpisodeTitleFromPosition(TitleBaseRule): def should_remove(self, match, matches, filepart, hole, context): if match.name == 'episode_details': return False - return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context) + return super().should_remove(match, matches, filepart, hole, context) def when(self, matches, context): # pylint:disable=inconsistent-return-statements if matches.named('episode_title'): return - return super(EpisodeTitleFromPosition, self).when(matches, context) + return super().when(matches, context) class AlternativeTitleReplace(Rule): @@ -166,7 +166,7 @@ class AlternativeTitleReplace(Rule): consequence = RenameMatch def __init__(self, previous_names): - super(AlternativeTitleReplace, self).__init__() + super().__init__() self.previous_names = previous_names def when(self, matches, context): # pylint:disable=inconsistent-return-statements diff --git a/libs/common/guessit/rules/properties/episodes.py b/libs/common/guessit/rules/properties/episodes.py index 345c785d..0fbffc27 100644 --- a/libs/common/guessit/rules/properties/episodes.py +++ b/libs/common/guessit/rules/properties/episodes.py @@ -338,7 +338,6 @@ def episodes(config): rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) - # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P\d+)-?' + build_or_pattern(of_words) + r'-?(?P\d+)-?' + build_or_pattern(episode_words) + '?', @@ -479,7 +478,7 @@ class SeePatternRange(Rule): consequence = [RemoveMatch, AppendMatch] def __init__(self, range_separators): - super(SeePatternRange, self).__init__() + super().__init__() self.range_separators = range_separators def when(self, matches, context): @@ -516,7 +515,7 @@ class AbstractSeparatorRange(Rule): consequence = [RemoveMatch, AppendMatch] def __init__(self, range_separators, property_name): - super(AbstractSeparatorRange, self).__init__() + super().__init__() self.range_separators = range_separators self.property_name = property_name @@ -608,7 +607,7 @@ class EpisodeNumberSeparatorRange(AbstractSeparatorRange): """ def __init__(self, range_separators): - super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode") + super().__init__(range_separators, "episode") class SeasonSeparatorRange(AbstractSeparatorRange): @@ -617,7 +616,7 @@ class SeasonSeparatorRange(AbstractSeparatorRange): """ def __init__(self, range_separators): - super(SeasonSeparatorRange, self).__init__(range_separators, "season") + super().__init__(range_separators, "season") class RemoveWeakIfMovie(Rule): @@ -662,7 +661,7 @@ class RemoveWeak(Rule): consequence = RemoveMatch, AppendMatch def __init__(self, episode_words): - super(RemoveWeak, self).__init__() + super().__init__() self.episode_words = episode_words def when(self, matches, context): @@ -843,7 +842,7 @@ class RemoveDetachedEpisodeNumber(Rule): episode_numbers[0].value < 10 and \ episode_numbers[1].value - episode_numbers[0].value != 1: parent = episode_numbers[0] - while parent: # TODO: Add a feature in rebulk to avoid this ... + while parent: ret.append(parent) parent = parent.parent return ret diff --git a/libs/common/guessit/rules/properties/film.py b/libs/common/guessit/rules/properties/film.py index 3c7e6c0f..27166bc7 100644 --- a/libs/common/guessit/rules/properties/film.py +++ b/libs/common/guessit/rules/properties/film.py @@ -6,9 +6,11 @@ film property from rebulk import Rebulk, AppendMatch, Rule from rebulk.remodule import re +from ..common import dash from ..common.formatters import cleanup from ..common.pattern import is_disabled from ..common.validators import seps_surround +from ...config import load_config_patterns def film(config): # pylint:disable=unused-argument @@ -17,10 +19,11 @@ def film(config): # pylint:disable=unused-argument :return: Created Rebulk object :rtype: Rebulk """ - rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround}) + rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'film')) + rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) + rebulk.defaults(name='film', validator=seps_surround) - rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int, - disabled=lambda context: is_disabled(context, 'film')) + load_config_patterns(rebulk, config.get('film')) rebulk.rules(FilmTitleRule) diff --git a/libs/common/guessit/rules/properties/language.py b/libs/common/guessit/rules/properties/language.py index 3f83bc34..3c41e107 100644 --- a/libs/common/guessit/rules/properties/language.py +++ b/libs/common/guessit/rules/properties/language.py @@ -13,8 +13,8 @@ from rebulk.remodule import re from ..common import seps from ..common.pattern import is_disabled -from ..common.words import iter_words from ..common.validators import seps_surround +from ..common.words import iter_words def language(config, common_words): @@ -64,7 +64,8 @@ def language(config, common_words): SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, RemoveLanguage, - RemoveInvalidLanguages(common_words)) + RemoveInvalidLanguages(common_words), + RemoveUndeterminedLanguages) babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms']) @@ -102,7 +103,7 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m def convert(self, alpha3, country=None, script=None): return str(babelfish.Language(alpha3, country, script)) - def reverse(self, name): # pylint:disable=arguments-differ + def reverse(self, name): # pylint:disable=arguments-renamed name = name.lower() # exceptions come first, as they need to override a potential match # with any of the other guessers @@ -136,7 +137,7 @@ def length_comparator(value): _LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang']) -class LanguageWord(object): +class LanguageWord: """ Extension to the Word namedtuple in order to create compound words. @@ -165,7 +166,7 @@ class LanguageWord(object): return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word) def __repr__(self): - return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value) + return f'<({self.start},{self.end}): {self.value}' def to_rebulk_match(language_match): @@ -190,7 +191,7 @@ def to_rebulk_match(language_match): } -class LanguageFinder(object): +class LanguageFinder: """ Helper class to search and return language matches: 'language' and 'subtitle_language' properties """ @@ -226,7 +227,7 @@ class LanguageFinder(object): key = match.property_name if match.lang == UNDETERMINED: undetermined_map[key].add(match) - elif match.lang == 'mul': + elif match.lang == MULTIPLE: multi_map[key].add(match) else: regular_lang_map[key].add(match) @@ -291,7 +292,7 @@ class LanguageFinder(object): if match: yield match - def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements + def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): """ Return the language match for the given word and affixes. """ @@ -322,6 +323,7 @@ class LanguageFinder(object): if match: return match + return None def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements """ @@ -396,7 +398,7 @@ class SubtitlePrefixLanguageRule(Rule): def then(self, matches, when_response, context): to_rename, to_remove = when_response - super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context) + super().then(matches, to_remove, context) for prefix, match in to_rename: # Remove suffix equivalent of prefix. suffix = copy.copy(prefix) @@ -435,7 +437,7 @@ class SubtitleSuffixLanguageRule(Rule): def then(self, matches, when_response, context): to_rename, to_remove = when_response - super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context) + super().then(matches, to_remove, context) for match in to_rename: matches.remove(match) match.name = 'subtitle_language' @@ -488,7 +490,7 @@ class RemoveInvalidLanguages(Rule): def __init__(self, common_words): """Constructor.""" - super(RemoveInvalidLanguages, self).__init__() + super().__init__() self.common_words = common_words def when(self, matches, context): @@ -508,3 +510,22 @@ class RemoveInvalidLanguages(Rule): to_remove.append(match) return to_remove + + +class RemoveUndeterminedLanguages(Rule): + """Remove "und" language matches when next other language if found.""" + + consequence = RemoveMatch + priority = 32 + + def when(self, matches, context): + to_remove = [] + for match in matches.range(0, len(matches.input_string), + predicate=lambda m: m.name in ('language', 'subtitle_language')): + if match.value == "und": + previous = matches.previous(match, index=0) + next_ = matches.next(match, index=0) + if previous and previous.name == 'language' or next_ and next_.name == 'language': + to_remove.append(match) + + return to_remove diff --git a/libs/common/guessit/rules/properties/other.py b/libs/common/guessit/rules/properties/other.py index c7dc9a88..a199e760 100644 --- a/libs/common/guessit/rules/properties/other.py +++ b/libs/common/guessit/rules/properties/other.py @@ -3,15 +3,16 @@ """ other property """ -import copy from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch +from rebulk.match import Match from rebulk.remodule import re from ..common import dash from ..common import seps from ..common.pattern import is_disabled from ..common.validators import seps_after, seps_before, seps_surround, and_ +from ...config import load_config_patterns from ...reutils import build_or_pattern from ...rules.common.formatters import raw_cleanup @@ -29,34 +30,21 @@ def other(config): # pylint:disable=unused-argument,too-many-statements rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) - rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed') - rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed') - rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio') - rebulk.regex('ws', 'wide-?screen', value='Widescreen') - rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') + load_config_patterns(rebulk, config.get('other')) - rebulk.string('Repack', 'Rerip', value='Proper', - tags=['streaming_service.prefix', 'streaming_service.suffix']) - rebulk.string('Proper', value='Proper', - tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix']) + rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, + ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, + ValidateAtEnd, ValidateReal, ProperCountRule) - rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', - tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) - rebulk.regex('Real', value='Proper', - tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real']) + return rebulk - rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after', - 'streaming_service.prefix', 'streaming_service.suffix']) - rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix', - tags=['streaming_service.prefix', 'streaming_service.suffix']) - rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix', - tags=['streaming_service.prefix', 'streaming_service.suffix']) - rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor') - rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor') - - season_words = build_or_pattern(["seasons?", "series?"]) - complete_articles = build_or_pattern(["The"]) +def complete_words(rebulk: Rebulk, season_words, complete_article_words): + """ + Custom pattern to find complete seasons from words. + """ + season_words_pattern = build_or_pattern(season_words) + complete_article_words_pattern = build_or_pattern(complete_article_words) def validate_complete(match): """ @@ -71,82 +59,13 @@ def other(config): # pylint:disable=unused-argument,too-many-statements return False return True - rebulk.regex('(?P' + complete_articles + '-)?' + - '(?P' + season_words + '-)?' + - 'Complete' + '(?P-' + season_words + ')?', + rebulk.regex('(?P' + complete_article_words_pattern + '-)?' + + '(?P' + season_words_pattern + '-)?' + + 'Complete' + '(?P-' + season_words_pattern + ')?', private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': and_(seps_surround, validate_complete)}) - rebulk.string('R5', value='Region 5') - rebulk.string('RC', value='Region C') - rebulk.regex('Pre-?Air', value='Preair') - rebulk.regex('(?:PS-?)Vita', value='PS Vita') - rebulk.regex('Vita', value='PS Vita', tags='has-neighbor') - rebulk.regex('(HD)(?PRip)', value={'other': 'HD', 'another': 'Rip'}, - private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) - - for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'): - rebulk.string(value, value=value) - rebulk.string('3D', value='3D', tags='has-neighbor') - - rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') - rebulk.string('HR', value='High Resolution') - rebulk.string('LD', value='Line Dubbed') - rebulk.string('MD', value='Mic Dubbed') - rebulk.string('mHD', 'HDLight', value='Micro HD') - rebulk.string('LDTV', value='Low Definition') - rebulk.string('HFR', value='High Frame Rate') - rebulk.string('VFR', value='Variable Frame Rate') - rebulk.string('HD', value='HD', validator=None, - tags=['streaming_service.prefix', 'streaming_service.suffix']) - rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, - tags=['streaming_service.prefix', 'streaming_service.suffix']) - rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None, - tags=['streaming_service.prefix', 'streaming_service.suffix']) - rebulk.regex('Upscaled?', value='Upscaled') - - for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail', - 'Colorized', 'Internal'): - rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) - rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix']) - rebulk.regex('Read-?NFO', value='Read NFO') - rebulk.string('CONVERT', value='Converted', tags='has-neighbor') - rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor') - rebulk.string('OM', value='Open Matte', tags='has-neighbor') - rebulk.string('STV', value='Straight to Video', tags='has-neighbor') - rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor') - rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix']) - - for coast in ('East', 'West'): - rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed') - - rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor') - rebulk.string('Ova', 'Oav', value='Original Animated Video') - - rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, - tags=['other.validate.screener', 'source-prefix', 'source-suffix']) - rebulk.string('Mux', value='Mux', validator=seps_after, - tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix']) - rebulk.string('HC', 'vost', value='Hardcoded Subtitles') - - rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor') - rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor') - rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor') - rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') - - rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) - rebulk.string('Extras', value='Extras', tags='has-neighbor') - rebulk.regex('Digital-?Extras?', value='Extras') - rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) - rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) - rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') - - rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, - ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, - ValidateAtEnd, ValidateReal, ProperCountRule) - - return rebulk class ProperCountRule(Rule): @@ -165,15 +84,30 @@ class ProperCountRule(Rule): raws = {} # Count distinct raw values for proper in propers: raws[raw_cleanup(proper.raw)] = proper - proper_count_match = copy.copy(propers[-1]) - proper_count_match.name = 'proper_count' value = 0 - for raw in raws.values(): - value += 2 if 'real' in raw.tags else 1 + start = None + end = None + proper_count_matches = [] + + for proper in raws.values(): + if not start or start > proper.start: + start = proper.start + if not end or end < proper.end: + end = proper.end + if proper.children.named('proper_count', 0): + value += int(proper.children.named('proper_count', 0).value) + elif 'real' in proper.tags: + value += 2 + else: + value += 1 + + proper_count_match = Match(name='proper_count', start=start, end=end, input_string=matches.input_string) proper_count_match.value = value - return proper_count_match + proper_count_matches.append(proper_count_match) + + return proper_count_matches class RenameAnotherToOther(Rule): @@ -360,7 +294,7 @@ class ValidateAtEnd(Rule): predicate=lambda m: m.name == 'other' and 'at-end' in m.tags): if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or matches.range(match.end, filepart.end, predicate=lambda m: m.name not in ( - 'other', 'container'))): + 'other', 'container'))): to_remove.append(match) return to_remove diff --git a/libs/common/guessit/rules/properties/release_group.py b/libs/common/guessit/rules/properties/release_group.py index ecff808b..09e845f5 100644 --- a/libs/common/guessit/rules/properties/release_group.py +++ b/libs/common/guessit/rules/properties/release_group.py @@ -98,7 +98,7 @@ class DashSeparatedReleaseGroup(Rule): def __init__(self, value_formatter): """Default constructor.""" - super(DashSeparatedReleaseGroup, self).__init__() + super().__init__() self.value_formatter = value_formatter @classmethod @@ -212,7 +212,7 @@ class SceneReleaseGroup(Rule): def __init__(self, value_formatter): """Default constructor.""" - super(SceneReleaseGroup, self).__init__() + super().__init__() self.value_formatter = value_formatter @staticmethod @@ -321,7 +321,6 @@ class AnimeReleaseGroup(Rule): for filepart in marker_sorted(matches.markers.named('path'), matches): - # pylint:disable=bad-continuation empty_group = matches.markers.range(filepart.start, filepart.end, lambda marker: (marker.name == 'group' diff --git a/libs/common/guessit/rules/properties/screen_size.py b/libs/common/guessit/rules/properties/screen_size.py index 77d5d052..770a9247 100644 --- a/libs/common/guessit/rules/properties/screen_size.py +++ b/libs/common/guessit/rules/properties/screen_size.py @@ -26,7 +26,7 @@ def screen_size(config): """ interlaced = frozenset(config['interlaced']) progressive = frozenset(config['progressive']) - frame_rates = [re.escape(rate) for rate in config['frame_rates']] + frame_rates = frozenset(config['frame_rates']) min_ar = config['min_ar'] max_ar = config['max_ar'] @@ -45,11 +45,12 @@ def screen_size(config): rebulk.regex(res_pattern + progressive_pattern + r'(?Pp)' + frame_rate_pattern + '?') rebulk.regex(res_pattern + progressive_pattern + r'(?Pp)?(?:hd)') rebulk.regex(res_pattern + progressive_pattern + r'(?Pp)?x?') - rebulk.string('4k', value='2160p') + rebulk.string('4k', value='2160p', + conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else match) rebulk.regex(r'(?P\d{3,4})-?(?:x|\*)-?(?P\d{3,4})', conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) - rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate', + rebulk.regex(frame_rate_pattern + '-?(?:p|fps)', name='frame_rate', formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate')) rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts) @@ -69,7 +70,7 @@ class PostProcessScreenSize(Rule): consequence = AppendMatch def __init__(self, standard_heights, min_ar, max_ar): - super(PostProcessScreenSize, self).__init__() + super().__init__() self.standard_heights = standard_heights self.min_ar = min_ar self.max_ar = max_ar @@ -89,7 +90,7 @@ class PostProcessScreenSize(Rule): scan_type = (values.get('scan_type') or 'p').lower() height = values['height'] if 'width' not in values: - match.value = '{0}{1}'.format(height, scan_type) + match.value = f'{height}{scan_type}' continue width = values['width'] @@ -102,9 +103,9 @@ class PostProcessScreenSize(Rule): to_append.append(aspect_ratio) if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar: - match.value = '{0}{1}'.format(height, scan_type) + match.value = f'{height}{scan_type}' else: - match.value = '{0}x{1}'.format(width, height) + match.value = f'{width}x{height}' return to_append diff --git a/libs/common/guessit/rules/properties/size.py b/libs/common/guessit/rules/properties/size.py index c61580c0..0a097002 100644 --- a/libs/common/guessit/rules/properties/size.py +++ b/libs/common/guessit/rules/properties/size.py @@ -3,7 +3,7 @@ """ size property """ -import re +from rebulk.remodule import re from rebulk import Rebulk diff --git a/libs/common/guessit/rules/properties/source.py b/libs/common/guessit/rules/properties/source.py index 2fe55618..610729f9 100644 --- a/libs/common/guessit/rules/properties/source.py +++ b/libs/common/guessit/rules/properties/source.py @@ -5,12 +5,11 @@ source property """ import copy +from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule from rebulk.remodule import re -from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule - from .audio_codec import HqConflictRule -from ..common import dash, seps +from ..common import dash, seps, optional from ..common.pattern import is_disabled from ..common.validators import seps_before, seps_after, or_ @@ -31,78 +30,74 @@ def source(config): # pylint:disable=unused-argument validate_all=True, validator={'__parent__': or_(seps_before, seps_after)}) - rip_prefix = '(?PRip)-?' - rip_suffix = '-?(?PRip)' - rip_optional_suffix = '(?:' + rip_suffix + ')?' + rip_prefix = config['rip_prefix'] + rip_suffix = config['rip_suffix'] - def build_source_pattern(*patterns, **kwargs): + def build_source_pattern(*patterns, prefix='', suffix=''): """Helper pattern to build source pattern.""" - prefix_format = kwargs.get('prefix') or '' - suffix_format = kwargs.get('suffix') or '' - - string_format = prefix_format + '({0})' + suffix_format - return [string_format.format(pattern) for pattern in patterns] + return [prefix + f'({pattern})' + suffix for pattern in patterns] def demote_other(match, other): # pylint: disable=unused-argument """Default conflict solver with 'other' property.""" - return other if other.name == 'other' or other.name == 'release_group' else '__default__' + return other if other.name in ['other', 'release_group'] else '__default__' - rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('VHS', suffix=optional(rip_suffix)), value={'source': 'VHS', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('CAM', suffix=optional(rip_suffix)), value={'source': 'Camera', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('HD-?CAM', suffix=optional(rip_suffix)), value={'source': 'HD Camera', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix), - value={'source': 'Telesync', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix), + # For TS, we remove 'streaming_service.suffix' tag to avoid "Shots" being guessed as Showtime and TS. + rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=optional(rip_suffix)), + value={'source': 'Telesync', 'other': 'Rip'}, tags=['video-codec-prefix'], overrides=["tags"]) + rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=optional(rip_suffix)), value={'source': 'HD Telesync', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint') - rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=optional(rip_suffix)), value={'source': 'Telecine', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=optional(rip_suffix)), value={'source': 'HD Telecine', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('PPV', suffix=optional(rip_suffix)), value={'source': 'Pay-per-view', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('SD-?TV', suffix=optional(rip_suffix)), value={'source': 'TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching value={'source': 'TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix), value={'source': 'TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV') - rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=optional(rip_suffix)), value={'source': 'Digital TV', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('DVD', suffix=optional(rip_suffix)), value={'source': 'DVD', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('DM', suffix=optional(rip_suffix)), value={'source': 'Digital Master', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ... 'DVD-?9', 'DVD-?5'), value='DVD') - rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other, + rebulk.regex(*build_source_pattern('HD-?TV', suffix=optional(rip_suffix)), conflict_solver=demote_other, value={'source': 'HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other, value={'source': 'HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV', suffix='-?(?PRip-?HD)'), conflict_solver=demote_other, value={'source': 'HDTV', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('VOD', suffix=optional(rip_suffix)), value={'source': 'Video on Demand', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix), value={'source': 'Web', 'other': 'Rip'}) # WEBCap is a synonym to WEBRip, mostly used by non english - rebulk.regex(*build_source_pattern('WEB-?(?PCap)', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('WEB-?(?PCap)', suffix=optional(rip_suffix)), value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'}) rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'), value={'source': 'Web'}) rebulk.regex('(WEB)', value='Web', tags='weak.source') - rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('HD-?DVD', suffix=optional(rip_suffix)), value={'source': 'HD-DVD', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=optional(rip_suffix)), value={'source': 'Blu-ray', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('(?PBR)-?(?=Scr(?:eener)?)', '(?PBR)-?(?=Mux)'), # BRRip value={'source': 'Blu-ray', 'another': 'Reencoded'}) @@ -112,12 +107,12 @@ def source(config): # pylint:disable=unused-argument rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray') rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV') - rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other, + rebulk.regex(*build_source_pattern('UHD-?TV', suffix=optional(rip_suffix)), conflict_solver=demote_other, value={'source': 'Ultra HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other, value={'source': 'Ultra HDTV', 'other': 'Rip'}) - rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix), + rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=optional(rip_suffix)), value={'source': 'Satellite', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix), value={'source': 'Satellite', 'other': 'Rip'}) @@ -145,11 +140,11 @@ class UltraHdBlurayRule(Rule): def validate_range(cls, matches, start, end): """Validate no holes or invalid matches exist in the specified range.""" return ( - not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and - not matches.range(start, end, predicate=( - lambda m: not m.private and ( - m.name not in ('screen_size', 'color_depth') and ( - m.name != 'other' or 'uhdbluray-neighbor' not in m.tags)))) + not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and + not matches.range(start, end, predicate=( + lambda m: not m.private and ( + m.name not in ('screen_size', 'color_depth') and ( + m.name != 'other' or 'uhdbluray-neighbor' not in m.tags)))) ) def when(self, matches, context): diff --git a/libs/common/guessit/rules/properties/streaming_service.py b/libs/common/guessit/rules/properties/streaming_service.py index f467f20a..955418ac 100644 --- a/libs/common/guessit/rules/properties/streaming_service.py +++ b/libs/common/guessit/rules/properties/streaming_service.py @@ -3,12 +3,13 @@ """ streaming_service property """ -import re +from rebulk.remodule import re from rebulk import Rebulk from rebulk.rules import Rule, RemoveMatch from ..common.pattern import is_disabled +from ...config import load_config_patterns from ...rules.common import seps, dash from ...rules.common.validators import seps_before, seps_after @@ -25,13 +26,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) - for value, items in config.items(): - patterns = items if isinstance(items, list) else [items] - for pattern in patterns: - if pattern.startswith('re:'): - rebulk.regex(pattern, value=value) - else: - rebulk.string(pattern, value=value) + load_config_patterns(rebulk, config) rebulk.rules(ValidateStreamingService) diff --git a/libs/common/guessit/rules/properties/title.py b/libs/common/guessit/rules/properties/title.py index 0d263016..77f5bd11 100644 --- a/libs/common/guessit/rules/properties/title.py +++ b/libs/common/guessit/rules/properties/title.py @@ -49,11 +49,11 @@ class TitleBaseRule(Rule): """ Add title match in existing matches """ - # pylint:disable=no-self-use,unused-argument + # pylint:disable=unused-argument consequence = [AppendMatch, RemoveMatch] def __init__(self, match_name, match_tags=None, alternative_match_name=None): - super(TitleBaseRule, self).__init__() + super().__init__() self.match_name = match_name self.match_tags = match_tags self.alternative_match_name = alternative_match_name @@ -205,7 +205,7 @@ class TitleBaseRule(Rule): for ignored_match in ignored_matches: if ignored_match not in to_keep: starting = matches.chain_after(hole.start, seps, - predicate=lambda m: m == ignored_match) + predicate=lambda m, im=ignored_match: m == im) if starting: should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True) if should_keep: @@ -299,7 +299,7 @@ class TitleFromPosition(TitleBaseRule): properties = {'title': [None], 'alternative_title': [None]} def __init__(self): - super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title') + super().__init__('title', ['title'], 'alternative_title') def enabled(self, context): return not is_disabled(context, 'alternative_title') diff --git a/libs/common/guessit/rules/properties/website.py b/libs/common/guessit/rules/properties/website.py index c1965311..e5cea22a 100644 --- a/libs/common/guessit/rules/properties/website.py +++ b/libs/common/guessit/rules/properties/website.py @@ -3,7 +3,11 @@ """ Website property. """ -from pkg_resources import resource_stream # @UnresolvedImport +try: + from importlib.resources import files # @UnresolvedImport +except ImportError: + from importlib_resources import files # @UnresolvedImport + from rebulk.remodule import re from rebulk import Rebulk, Rule, RemoveMatch @@ -27,11 +31,12 @@ def website(config): rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(name="website") - with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file: + with files('guessit.data') as data_files: + tld_file = data_files.joinpath('tlds-alpha-by-domain.txt').read_text(encoding='utf-8') tlds = [ - tld.strip().decode('utf-8') - for tld in tld_file.readlines() - if b'--' not in tld + tld.strip() + for tld in tld_file.split('\n') + if '--' not in tld ][1:] # All registered domain extension safe_tlds = config['safe_tlds'] # For sure a website extension @@ -40,15 +45,15 @@ def website(config): website_prefixes = config['prefixes'] rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + - r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) + + r'\.)+(?:[a-z-0-9-]+\.)+(?:'+build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + - r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) + + r'\.)*[a-z0-9-]+\.(?:'+build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + - r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) + + r'\.)*[a-z0-9-]+\.(?:'+build_or_pattern(safe_prefix) + r'\.)+(?:'+build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) diff --git a/libs/common/guessit/test/__init__.py b/libs/common/guessit/test/__init__.py index e5be370e..c5db3ab8 100644 --- a/libs/common/guessit/test/__init__.py +++ b/libs/common/guessit/test/__init__.py @@ -1,3 +1,3 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name +# pylint: disable=pointless-statement, missing-docstring, invalid-name diff --git a/libs/common/guessit/test/episodes.yml b/libs/common/guessit/test/episodes.yml index 4bbbde4a..52e29eca 100644 --- a/libs/common/guessit/test/episodes.yml +++ b/libs/common/guessit/test/episodes.yml @@ -4690,4 +4690,69 @@ release_group: NOGPR container: mp4 mimetype: video/mp4 - type: episode \ No newline at end of file + type: episode + +? "Seitokai Yakuindomo - 14 OAD [BDRip 1920x1080 x264 FLAC].mkv" +: title: Seitokai Yakuindomo + episode: 14 + source: Blu-ray + other: [Original Animation DVD, Rip] + screen_size: 1080p + aspect_ratio: 1.778 + video_codec: H.264 + audio_codec: FLAC + container: mkv + mimetype: video/x-matroska + type: episode + +? "[EveTaku] Kyouso Giga ONA v2 [540p][128BAC43].mkv" +: release_group: EveTaku + title: Kyouso Giga + other: Original Net Animation + version: 2 + screen_size: 540p + crc32: 128BAC43 + container: mkv + mimetype: video/x-matroska + type: episode + +? '[Erai-raws] Fumetsu no Anata e - 03 [720p][Multiple Subtitle].mkv' +: release_group: Erai-raws + title: Fumetsu no Anata e + episode: 3 + screen_size: 720p + subtitle_language: mul + container: mkv + mimetype: video/x-matroska + type: episode + +? Mom.S06E08.Jell-O.Shots.and.the.Truth.About.Santa.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTb.mkv +: title: Mom + season: 6 + episode: 8 + episode_title: Jell-O Shots and the Truth About Santa + screen_size: 1080p + streaming_service: Amazon Prime + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + release_group: NTb + container: mkv + mimetype: video/x-matroska + type: episode + +? Archer.2009.S12E05.Shots.720p.HULU.WEB-DL.DDP5.1.H.264-NOGRP +: title: Archer + year: 2009 + season: 12 + episode: 5 + episode_title: Shots + screen_size: 720p + streaming_service: Hulu + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + release_group: NOGRP + type: episode diff --git a/libs/common/guessit/test/movies.yml b/libs/common/guessit/test/movies.yml index a534ca0f..ff4232a2 100644 --- a/libs/common/guessit/test/movies.yml +++ b/libs/common/guessit/test/movies.yml @@ -50,6 +50,16 @@ video_codec: Xvid release_group: PUKKA +? Enter.the.Void.2009.2in1.1080p.BluRay.DD5.1.x264-EbP.mkv +: title: Enter the Void + year: 2009 + other: 2in1 + screen_size: 1080p + source: Blu-ray + audio_codec: Dolby Digital + video_codec: H.264 + release_group: EbP + ? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv" : title: Le Prestige source: DVD @@ -79,6 +89,11 @@ year: 1985 cd: 2 +? Movies/Picnic.at.Hanging.Rock.1975.Criterion.Collection.1080p.BluRay.x264.DTS-WiKi +: title: Picnic at Hanging Rock + edition: Criterion + year: 1975 + ? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv : title: Persepolis year: 2007 @@ -1045,6 +1060,36 @@ release_group: Seven type: movie +? Maze.Runner.The.Scorch.Trials.OM.2015.WEB-DLRip.by.Seven +: title: Maze Runner The Scorch Trials + other: [Open Matte, Rip] + year: 2015 + source: Web + release_group: Seven + type: movie + +? Foo Bar 2015 Open Matte 1080p WEB-DL DD+5.1 H.264 +: title: Foo Bar + year: 2015 + other: Open Matte + screen_size: 1080p + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + type: movie + +? foo.bar.2015.open.matte.1080p.web-dl.dd+5.1.h.264 +: title: foo bar + year: 2015 + other: Open Matte + screen_size: 1080p + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + type: movie + ? Kampen Om Tungtvannet aka The Heavy Water War COMPLETE 720p x265 HEVC-Lund : title: Kampen Om Tungtvannet aka The Heavy Water War other: Complete @@ -1314,6 +1359,22 @@ release_group: ETRG type: movie +? Heathers.1988.1080p.BluRay.ARROW.4K.RESTORED.Plus.Comm.DTS.x264-MaG +: title: Heathers + edition: Restored + year: 1988 + screen_size: 1080p + release_group: MaG + type: movie + +? The.Woman.2011.1080p.BluRay.4K.REMASTERED.Remux.AVC.DTS-HD.MA.5.1-PTP.mkv +: title: The Woman + edition: Remastered + year: 2011 + screen_size: 1080p + release_group: PTP + type: movie + ? Delibal 2015 720p Upscale DVDRip x264 DD5.1 AC3 : title: Delibal year: 2015 @@ -1752,6 +1813,7 @@ year: 2018 other: - 3D + - Hybrid - Proper - Remux proper_count: 1 @@ -1784,3 +1846,16 @@ release_group: EVO container: mkv type: movie + +? Kes.1969.1080p.BluRay.FLAC1.0.x264-DON.mkv +: title: Kes + year: 1969 + screen_size: 1080p + source: Blu-ray + audio_codec: FLAC + audio_channels: '1.0' + video_codec: H.264 + release_group: DON + container: mkv + mimetype: video/x-matroska + type: movie diff --git a/libs/common/guessit/test/rules/__init__.py b/libs/common/guessit/test/rules/__init__.py index e5be370e..c5db3ab8 100644 --- a/libs/common/guessit/test/rules/__init__.py +++ b/libs/common/guessit/test/rules/__init__.py @@ -1,3 +1,3 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name +# pylint: disable=pointless-statement, missing-docstring, invalid-name diff --git a/libs/common/guessit/test/rules/audio_codec.yml b/libs/common/guessit/test/rules/audio_codec.yml index 9e381c34..6c937b83 100644 --- a/libs/common/guessit/test/rules/audio_codec.yml +++ b/libs/common/guessit/test/rules/audio_codec.yml @@ -15,11 +15,15 @@ ? +DD ? +Dolby Digital ? +AC3 +? +AC-3 : audio_codec: Dolby Digital ? +DDP ? +DD+ ? +EAC3 +? +EAC-3 +? +E-AC-3 +? +E-AC3 : audio_codec: Dolby Digital Plus ? +DolbyAtmos @@ -88,6 +92,7 @@ ? +stereo : audio_channels: '2.0' +? +1.0 ? +1ch ? +mono : audio_channels: '1.0' diff --git a/libs/common/guessit/test/rules/cds.yml b/libs/common/guessit/test/rules/cd.yml similarity index 100% rename from libs/common/guessit/test/rules/cds.yml rename to libs/common/guessit/test/rules/cd.yml diff --git a/libs/common/guessit/test/rules/edition.yml b/libs/common/guessit/test/rules/edition.yml index 4b7fd986..d1d5277f 100644 --- a/libs/common/guessit/test/rules/edition.yml +++ b/libs/common/guessit/test/rules/edition.yml @@ -15,9 +15,9 @@ : edition: Special ? Criterion Edition +? Criterion Collection ? Edition Criterion ? CC -? -Criterion : edition: Criterion ? Deluxe diff --git a/libs/common/guessit/test/rules/other.yml b/libs/common/guessit/test/rules/other.yml index 447f1787..b1790753 100644 --- a/libs/common/guessit/test/rules/other.yml +++ b/libs/common/guessit/test/rules/other.yml @@ -80,6 +80,9 @@ ? Remux : other: Remux +? Hybrid +: other: Hybrid + ? 3D.2019 : other: 3D @@ -167,3 +170,7 @@ ? Upscale : other: Upscaled +? REPACK5 +? ReRip5 +: other: Proper + proper_count: 5 \ No newline at end of file diff --git a/libs/common/guessit/test/rules/processors_test.py b/libs/common/guessit/test/rules/processors_test.py index c22e968c..57f04beb 100644 --- a/libs/common/guessit/test/rules/processors_test.py +++ b/libs/common/guessit/test/rules/processors_test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement +# pylint: disable=pointless-statement, missing-docstring, invalid-name, pointless-string-statement from rebulk.match import Matches, Match diff --git a/libs/common/guessit/test/rules/screen_size.yml b/libs/common/guessit/test/rules/screen_size.yml index 25d8374f..f814ef00 100644 --- a/libs/common/guessit/test/rules/screen_size.yml +++ b/libs/common/guessit/test/rules/screen_size.yml @@ -81,6 +81,15 @@ ? +852x480i : screen_size: 480i +? +540p +? +540px +? -540i +? -540 +: screen_size: 540p + +? +540i +: screen_size: 540i + ? +576p ? +576px ? -576i diff --git a/libs/common/guessit/test/rules/website.yml b/libs/common/guessit/test/rules/website.yml index 11d434d2..8d3d396e 100644 --- a/libs/common/guessit/test/rules/website.yml +++ b/libs/common/guessit/test/rules/website.yml @@ -21,3 +21,6 @@ ? Dark.Net.S01E06.720p.HDTV.x264-BATV Dark.Net.2015.720p.HDTV.x264-BATV : title: Dark Net + +? www.4MovieRulz.be - Ginny Weds Sunny (2020) 1080p Hindi Proper HDRip x264 DD5.1 - 2.4GB ESub.mkv +: website: www.4MovieRulz.be diff --git a/libs/common/guessit/test/streaming_services.yaml b/libs/common/guessit/test/streaming_services.yaml index adf52e71..1573e92a 100644 --- a/libs/common/guessit/test/streaming_services.yaml +++ b/libs/common/guessit/test/streaming_services.yaml @@ -463,6 +463,7 @@ # Streaming service: Nickelodeon ? Show.Name.S01.1080p.NICK.WEBRip.AAC.2.0.x264-monkee ? Show.Name.S01.1080p.Nickelodeon.WEBRip.AAC.2.0.x264-monkee +? Show.Name.S01.1080p.NICKAPP.WEBRip.AAC.2.0.x264-monkee : title: Show Name season: 1 screen_size: 1080p @@ -577,13 +578,13 @@ release_group: BTW type: episode -# Streaming service: RTÉ One +# Streaming service: RTE One ? Show.Name.S10E01.576p.RTE.WEBRip.AAC2.0.H.264-RTN : title: Show Name season: 10 episode: 1 screen_size: 576p - streaming_service: RTÉ One + streaming_service: RTE One source: Web other: Rip audio_codec: AAC @@ -647,6 +648,19 @@ type: episode video_codec: H.264 +? Junior.Bake.Off.S06E03.1080p.ALL4.WEB-DL.AAC2.0.x264-NTb +: title: Junior Bake Off + season: 6 + episode: 3 + screen_size: 1080p + streaming_service: Channel 4 + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: NTb + type: episode + ? The.Toy.Box.S01E01.720p.AMBC.WEBRip.AAC2.0.x264-BTN : audio_channels: '2.0' audio_codec: AAC @@ -818,7 +832,6 @@ episode: 0 episode_details: Pilot episode_title: Pilot - language: zh other: - Proper - Rip @@ -862,7 +875,6 @@ ? What.The.Fuck.France.S01E01.Le.doublage.CNLP.WEBRip.AAC2.0.x264-TURTLE : audio_channels: '2.0' audio_codec: AAC - country: FR episode: 1 episode_title: Le doublage other: Rip @@ -870,7 +882,7 @@ season: 1 source: Web streaming_service: Canal+ - title: What The Fuck + title: What The Fuck France type: episode video_codec: H.264 @@ -943,14 +955,13 @@ ? The.Amazing.Race.Canada.S03.720p.CTV.WEBRip.AAC2.0.H.264-BTW : audio_channels: '2.0' audio_codec: AAC - country: CA other: Rip release_group: BTW screen_size: 720p season: 3 source: Web streaming_service: CTV - title: The Amazing Race + title: The Amazing Race Canada type: episode video_codec: H.264 @@ -1240,13 +1251,12 @@ ? Big.Brother.Canada.S05.GLBL.WEBRip.AAC2.0.H.264-RTN : audio_channels: '2.0' audio_codec: AAC - country: CA other: Rip release_group: RTN season: 5 source: Web streaming_service: Global - title: Big Brother + title: Big Brother Canada type: episode video_codec: H.264 @@ -1330,7 +1340,6 @@ ? Handmade.in.Japan.S01E01.720p.iP.WEBRip.AAC2.0.H.264-SUP : audio_channels: '2.0' audio_codec: AAC - country: JP episode: 1 other: Rip release_group: SUP @@ -1338,7 +1347,7 @@ season: 1 source: Web streaming_service: BBC iPlayer - title: Handmade in + title: Handmade in Japan type: episode video_codec: H.264 @@ -1463,9 +1472,8 @@ ? Bunsen.is.a.Beast.S01E23.Guinea.Some.Lovin.1080p.NICK.WEBRip.AAC2.0.x264-TVSmash : audio_channels: '2.0' audio_codec: AAC - country: GN episode: 23 - episode_title: Some Lovin + episode_title: Guinea Some Lovin other: Rip release_group: TVSmash screen_size: 1080p @@ -1538,13 +1546,14 @@ episode_title: The Masquerade other: Rip part: 2 - release_group: VP9-BTW + release_group: BTW screen_size: 1080p season: 2 source: Web streaming_service: YouTube Red title: Escape The Night type: episode + video_codec: VP9 ? Escape.The.Night.S02E02.The.Masquerade.Part.II.2160p.RED.WEBRip.AAC5.1.VP9-BTW : audio_channels: '5.1' @@ -1553,13 +1562,14 @@ episode_title: The Masquerade other: Rip part: 2 - release_group: VP9-BTW + release_group: BTW screen_size: 2160p season: 2 source: Web streaming_service: YouTube Red title: Escape The Night type: episode + video_codec: VP9 ? Escape.The.Night.S02E02.The.Masquerade.Part.II.720p.RED.WEBRip.AAC5.1.VP9-BTW : audio_channels: '5.1' @@ -1568,13 +1578,14 @@ episode_title: The Masquerade other: Rip part: 2 - release_group: VP9-BTW + release_group: BTW screen_size: 720p season: 2 source: Web streaming_service: YouTube Red title: Escape The Night type: episode + video_codec: VP9 ? The.Family.Law.S02E01.720p.SBS.WEB-DL.AAC2.0.H.264-BTN : audio_channels: '2.0' @@ -1584,7 +1595,7 @@ screen_size: 720p season: 2 source: Web - streaming_service: SBS (AU) + streaming_service: SBS title: The Family Law type: episode video_codec: H.264 @@ -1892,10 +1903,421 @@ season: 1 source: Web streaming_service: Vimeo - title: '555' + # title: '555' type: episode video_codec: H.264 +? Good.Bones.S02E01.Decaying.Duplex.Transformed.720p.9NOW.WEB-DL.AAC2.0.x264-RTN +: title: Good Bones + season: 2 + episode: 1 + episode_title: Decaying Duplex Transformed + screen_size: 720p + streaming_service: 9Now + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: RTN + type: episode + +? Satisfaction.S01E01.Running.Girl.1080p.BNGE.WEB-DL.DD5.1.H.264-NTb +: title: Satisfaction + season: 1 + episode: 1 + episode_title: Running Girl + screen_size: 1080p + streaming_service: Binge + source: Web + audio_codec: Dolby Digital + audio_channels: '5.1' + video_codec: H.264 + release_group: NTb + type: episode + +? Virgin.S01E01.Birthday.Party.1440p.BKPL.WEB-DL.H.264-LiGHT +: title: Virgin + season: 1 + episode: 1 + episode_title: Birthday Party + screen_size: 1440p + streaming_service: Blackpills + source: Web + video_codec: H.264 + release_group: LiGHT + type: episode + +? Ciplak S01E05 1080p BLU WEBRip AAC2.0 H.264 TURG +: title: Ciplak + season: 1 + episode: 5 + screen_size: 1080p + streaming_service: BluTV + source: Web + other: Rip + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: TURG + type: episode + +? Scooby.Doo.and.Guess.Who.S01E01.Revenge.of.the.Swamp.Monster.1080p.BOOM.WEB-DL.AAC2.0.H.264-QOQ +: title: Scooby Doo and Guess Who + season: 1 + episode: 1 + episode_title: Revenge of the Swamp Monster + screen_size: 1080p + streaming_service: Boomerang + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: QOQ + type: episode + +? The.Beaverton.S01E01.1080p.CRAV.WEB-DL.DD5.1.H.264-NTb +: title: The Beaverton + season: 1 + episode: 1 + screen_size: 1080p + streaming_service: Crave + source: Web + audio_codec: Dolby Digital + audio_channels: '5.1' + video_codec: H.264 + release_group: NTb + type: episode + +? Gold.Rush.Freddy.Dodges.Mine.Rescue.S01E06.Sink.or.Swim.1080p.DSCP.WEB-DL.AAC2.0.X264-RTN +: title: Gold Rush Freddy Dodges Mine Rescue + season: 1 + episode: 6 + episode_title: Sink or Swim + screen_size: 1080p + streaming_service: Discovery Plus + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: RTN + type: episode + +? The.Mandalorian.S01E01.Chapter.1.2160p.DSNP.WEB-DL.DDP5.1.Atmos.DV.HEVC-MZABI +: title: The Mandalorian + season: 1 + episode: 1 + episode_title: Chapter 1 + screen_size: 2160p + streaming_service: Disney+ + source: Web + audio_codec: + - Dolby Digital Plus + - Dolby Atmos + audio_channels: '5.1' + video_codec: 'H.265' + release_group: 'MZABI' + type: episode + +? Sorry.For.Your.Loss.S01E01.One.Fun.Thing.1080p.FBWatch.WEB-DL.AAC2.0.x264-SAMUEL98 +: title: Sorry For Your Loss + season: 1 + episode: 1 + episode_title: One Fun Thing + screen_size: 1080p + streaming_service: Facebook Watch + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: SAMUEL98 + type: episode + +? Heartland.1979.720p.FANDOR.WEB-DL.AAC2.0.H.264-Cinefeel +: title: Heartland + year: 1979 + screen_size: 720p + streaming_service: Fandor + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: Cinefeel + +? Rio.Heroes.S01E01.Sem.Regras.720p.FOXP.WEB-DL.AAC2.0.x264-BTW +: title: Rio Heroes + season: 1 + episode: 1 + episode_title: Sem Regras + screen_size: 720p + streaming_service: Fox Premium + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: BTW + type: episode + +? Deadline.Gallipoli.S01E01.1080p.FXTL.WEB-DL.DDP5.1.H.264-NTb +: title: Deadline Gallipoli + season: 1 + episode: 1 + screen_size: 1080p + streaming_service: Foxtel + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: 'H.264' + release_group: NTb + type: episode + +? My Bromance 2 - 5 Years Later S01E01 1080p Gaga WEB-DL AAC2.0 H.264-FRE3LOV +: title: My Bromance 2 + alternative_title: 5 Years Later + season: 1 + episode: 1 + screen_size: 1080p + streaming_service: GagaOOLala + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: FRE3LOV + type: episode + +? Cheeni.2020.1080p.HoiChoi.WEB-DL.AAC.2.0.H.264-iDC +: title: Cheeni + year: 2020 + screen_size: 1080p + streaming_service: hoichoi + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: iDC + type: movie + +? Surau.dan.Silek.2017.720p.IFX.WEB-DL.AAC.x264-RsA +: title: Surau dan Silek + year: 2017 + screen_size: 720p + streaming_service: iflix + source: Web + audio_codec: AAC + video_codec: H.264 + release_group: RsA + type: movie + +? She.Would.Never.Know.S01E10.1080p.iQIYI.WEB-DL.AAC.2.0.x265-SH3LBY +: title: She Would Never Know + season: 1 + episode: 10 + screen_size: 1080p + streaming_service: iQIYI + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.265 + release_group: SH3LBY + type: episode + +? Direktoren.for.det.hele.2006.SUBBED.1080p.MUBI.WEB-DL.AAC2.0.H264-CMYK +: title: Direktoren for det hele + year: 2006 + screen_size: 1080p + streaming_service: MUBI + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: CMYK + type: movie + +? Les.trois.couronnes.du.matelot.1983.1080p.INA.WEB-DL.AAC2.0.x264-KG +: title: Les trois couronnes du matelot + year: 1983 + screen_size: 1080p + streaming_service: National Audiovisual Institute + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: KG + type: movie + +? Caninabis.Junkie.Dog.1979.480p.NFB.WEB-DL.AAC2.0.H.264-QiQ +: title: Caninabis Junkie Dog + year: 1979 + screen_size: 480p + streaming_service: National Film Board + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: QiQ + type: movie + + +? First.Ladies.S01E04.1080p.OPTO.WEB-DL.H264-CKTV +: title: First Ladies + season: 1 + episode: 4 + screen_size: 1080p + streaming_service: Opto + source: Web + video_codec: H.264 + release_group: CKTV + type: episode + +? Queen.Sugar.S05E01.720p.OWN.WEB-DL.AAC2.0.H.264-BTN +: title: Queen Sugar + season: 5 + episode: 1 + screen_size: 720p + streaming_service: Oprah Winfrey Network + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: BTN + type: episode + +? Saved.by.the.Bell.2020.S01E01.Pilot.1080p.PCOK.WEB-DL.DDP5.1.x264-NTb +: title: Saved by the Bell + year: 2020 + season: 1 + episode: 1 + episode_title: Pilot + screen_size: 1080p + streaming_service: Peacock + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + release_group: NTb + type: episode + +? Where's.Waldo.2019.S01E01.1080p.Peacock.WEB_DL.DD+5.1.H.264-SH3LBY +: title: Where's Waldo + year: 2019 + season: 1 + episode: 1 + screen_size: 1080p + streaming_service: Peacock + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + release_group: SH3LBY + type: episode + +? High.Stakes.Poker.S08E01.1080p.POGO.WEB-DL.AAC2.0.H.264-RAiSY +: title: High Stakes Poker + season: 8 + episode: 1 + screen_size: 1080p + streaming_service: PokerGO + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: RAiSY + type: episode + +? Into.the.Darkness.2020.HDR.2160.RKTN.WEB-DL.x265-ROCCaT +: title: Into the Darkness + year: 2020 + other: HDR10 + screen_size: 2160p + streaming_service: Rakuten TV + source: Web + video_codec: H.265 + release_group: ROCCaT + type: movie + +? Cold.Case.S01E01.Look.Again.1080p.ROKU.WEB-DL.AAC2.0.H.264-ETHiCS +: title: Cold Case + season: 1 + episode: 1 + episode_title: Look Again + screen_size: 1080p + streaming_service: The Roku Channel + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: ETHiCS + type: episode + +? Valmentaja.2018.720p.RUUTU.WEB-DL.AAC2.0.x264-D3 +: title: Valmentaja + year: 2018 + screen_size: 720p + streaming_service: RUUTU + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: D3 + type: movie + +? Mysteries.of.the.Abandoned.S07E10.Mystery.of.the.Alien.Base.720p.SCI.WEBRip.AAC2.0.x264-BOOP +: title: Mysteries of the Abandoned + season: 7 + episode: 10 + episode_title: Mystery of the Alien Base + screen_size: 720p + streaming_service: Science Channel + source: Web + other: Rip + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: BOOP + type: episode + +? Powers (2015) S01E01 Pilot 2160p Sony WEBRip DTS-HD MA 5.1 x264-TrollUHD +: title: Powers + year: 2015 + season: 1 + episode: 1 + episode_title: Pilot + screen_size: 2160p + streaming_service: Sony + source: Web + other: Rip + audio_codec: DTS-HD + audio_profile: Master Audio + audio_channels: '5.1' + video_codec: H.264 + release_group: TrollUHD + type: episode + +? Wellington.Paranormal.S02E01.Taniwha.1080p.TVNZ.WEB-DL.AAC2.0.H.264-Dulus +: title: Wellington Paranormal + season: 2 + episode: 1 + episode_title: Taniwha + screen_size: 1080p + streaming_service: TVNZ + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: Dulus + type: episode + +? UFC.Fight.Night.185.Prelims.FP.WEB-DL.H264-SHREDDiE +: title: UFC Fight Night + # NOTE: E185 exceeds the default configuration of E100 being the max. + season: 1 + episode: 85 + episode_title: Prelims + streaming_service: UFC Fight Pass + source: Web + video_codec: H.264 + release_group: SHREDDiE + type: episode + # All this below shouldn't match any streaming services ? London.2012.Olympics.CTV.Preview.Show.HDTV.x264-2HD : alternative_title: Olympics CTV Preview Show diff --git a/libs/common/guessit/test/test_api.py b/libs/common/guessit/test/test_api.py index 391dbced..e5598341 100644 --- a/libs/common/guessit/test/test_api.py +++ b/libs/common/guessit/test/test_api.py @@ -1,14 +1,15 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement +# pylint: disable=pointless-statement, missing-docstring, invalid-name, pointless-string-statement import json import os -import sys +from pathlib import Path import pytest -import six +from pytest_mock import MockerFixture -from ..api import guessit, properties, suggested_expected, GuessitException +from .. import api +from ..api import guessit, properties, suggested_expected, GuessitException, default_api __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) @@ -19,25 +20,19 @@ def test_default(): def test_forced_unicode(): - ret = guessit(u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') - assert ret and 'title' in ret and isinstance(ret['title'], six.text_type) + ret = guessit('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') + assert ret and 'title' in ret and isinstance(ret['title'], str) def test_forced_binary(): ret = guessit(b'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') - assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type) + assert ret and 'title' in ret and isinstance(ret['title'], bytes) -@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available") def test_pathlike_object(): - try: - from pathlib import Path - - path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') - ret = guessit(path) - assert ret and 'title' in ret - except ImportError: # pragma: no-cover - pass + path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') + ret = guessit(path) + assert ret and 'title' in ret def test_unicode_japanese(): @@ -51,16 +46,8 @@ def test_unicode_japanese_options(): def test_forced_unicode_japanese_options(): - ret = guessit(u"[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi", options={"expected_title": [u"阿维达"]}) - assert ret and 'title' in ret and ret['title'] == u"阿维达" - -# TODO: This doesn't compile on python 3, but should be tested on python 2. -""" -if six.PY2: - def test_forced_binary_japanese_options(): - ret = guessit(b"[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi", options={"expected_title": [b"阿维达"]}) - assert ret and 'title' in ret and ret['title'] == b"阿维达" -""" + ret = guessit("[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi", options={"expected_title": ["阿维达"]}) + assert ret and 'title' in ret and ret['title'] == "阿维达" def test_properties(): @@ -71,13 +58,59 @@ def test_properties(): def test_exception(): with pytest.raises(GuessitException) as excinfo: guessit(object()) - assert "An internal error has occured in guessit" in str(excinfo.value) + assert "An internal error has occurred in guessit" in str(excinfo.value) assert "Guessit Exception Report" in str(excinfo.value) assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value) def test_suggested_expected(): - with open(os.path.join(__location__, 'suggested.json'), 'r') as f: + with open(os.path.join(__location__, 'suggested.json'), 'r', encoding='utf-8') as f: content = json.load(f) actual = suggested_expected(content['titles']) assert actual == content['suggested'] + + +def test_should_rebuild_rebulk_on_advanced_config_change(mocker: MockerFixture): + api.reset() + rebulk_builder_spy = mocker.spy(api, 'rebulk_builder') + + string = "some.movie.trfr.mkv" + + result1 = default_api.guessit(string) + + assert result1.get('title') == 'some movie trfr' + assert 'subtitle_language' not in result1 + + rebulk_builder_spy.assert_called_once_with(mocker.ANY) + rebulk_builder_spy.reset_mock() + + result2 = default_api.guessit(string, {'advanced_config': {'language': {'subtitle_prefixes': ['tr']}}}) + + assert result2.get('title') == 'some movie' + assert str(result2.get('subtitle_language')) == 'fr' + + rebulk_builder_spy.assert_called_once_with(mocker.ANY) + rebulk_builder_spy.reset_mock() + + +def test_should_not_rebuild_rebulk_on_same_advanced_config(mocker: MockerFixture): + api.reset() + rebulk_builder_spy = mocker.spy(api, 'rebulk_builder') + + string = "some.movie.subfr.mkv" + + result1 = default_api.guessit(string) + + assert result1.get('title') == 'some movie' + assert str(result1.get('subtitle_language')) == 'fr' + + rebulk_builder_spy.assert_called_once_with(mocker.ANY) + rebulk_builder_spy.reset_mock() + + result2 = default_api.guessit(string) + + assert result2.get('title') == 'some movie' + assert str(result2.get('subtitle_language')) == 'fr' + + assert rebulk_builder_spy.call_count == 0 + rebulk_builder_spy.reset_mock() diff --git a/libs/common/guessit/test/test_api_unicode_literals.py b/libs/common/guessit/test/test_api_unicode_literals.py index 826f7cd1..fa6227d8 100644 --- a/libs/common/guessit/test/test_api_unicode_literals.py +++ b/libs/common/guessit/test/test_api_unicode_literals.py @@ -1,14 +1,11 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement +# pylint: disable=pointless-statement, missing-docstring, invalid-name, pointless-string-statement -from __future__ import unicode_literals - import os import pytest -import six from ..api import guessit, properties, GuessitException @@ -21,13 +18,13 @@ def test_default(): def test_forced_unicode(): - ret = guessit(u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') - assert ret and 'title' in ret and isinstance(ret['title'], six.text_type) + ret = guessit('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') + assert ret and 'title' in ret and isinstance(ret['title'], str) def test_forced_binary(): ret = guessit(b'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') - assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type) + assert ret and 'title' in ret and isinstance(ret['title'], bytes) def test_unicode_japanese(): @@ -41,24 +38,18 @@ def test_unicode_japanese_options(): def test_forced_unicode_japanese_options(): - ret = guessit(u"[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi", options={"expected_title": [u"阿维达"]}) - assert ret and 'title' in ret and ret['title'] == u"阿维达" - -# TODO: This doesn't compile on python 3, but should be tested on python 2. -""" -if six.PY2: - def test_forced_binary_japanese_options(): - ret = guessit(b"[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi", options={"expected_title": [b"阿维达"]}) - assert ret and 'title' in ret and ret['title'] == b"阿维达" -""" + ret = guessit("[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi", options={"expected_title": ["阿维达"]}) + assert ret and 'title' in ret and ret['title'] == "阿维达" -def test_ensure_standard_string_class(): +def test_ensure_custom_string_class(): class CustomStr(str): pass - ret = guessit(CustomStr('1080p'), options={'advanced': True}) - assert ret and 'screen_size' in ret and not isinstance(ret['screen_size'].input_string, CustomStr) + ret = guessit(CustomStr('some.title.1080p.mkv'), options={'advanced': True}) + assert ret and 'screen_size' in ret and isinstance(ret['screen_size'].input_string, CustomStr) + assert ret and 'title' in ret and isinstance(ret['title'].input_string, CustomStr) + assert ret and 'container' in ret and isinstance(ret['container'].input_string, CustomStr) def test_properties(): @@ -69,6 +60,6 @@ def test_properties(): def test_exception(): with pytest.raises(GuessitException) as excinfo: guessit(object()) - assert "An internal error has occured in guessit" in str(excinfo.value) + assert "An internal error has occurred in guessit" in str(excinfo.value) assert "Guessit Exception Report" in str(excinfo.value) assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value) diff --git a/libs/common/guessit/test/test_benchmark.py b/libs/common/guessit/test/test_benchmark.py index 34386e30..0c561ee1 100644 --- a/libs/common/guessit/test/test_benchmark.py +++ b/libs/common/guessit/test/test_benchmark.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use,pointless-statement,missing-docstring,invalid-name,line-too-long +# pylint: disable=pointless-statement,missing-docstring,invalid-name,line-too-long import time import pytest @@ -34,7 +34,7 @@ def case4(): warmup=False ) @pytest.mark.skipif(True, reason="Disabled") -class TestBenchmark(object): +class TestBenchmark: def test_case1(self, benchmark): ret = benchmark(case1) assert ret diff --git a/libs/common/guessit/test/test_main.py b/libs/common/guessit/test/test_main.py index cbdba7aa..90b81690 100644 --- a/libs/common/guessit/test/test_main.py +++ b/libs/common/guessit/test/test_main.py @@ -1,16 +1,25 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name - +# pylint: disable=pointless-statement, missing-docstring, invalid-name +import json import os +import sys import pytest +from _pytest.capture import CaptureFixture from ..__main__ import main __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) +# Prevent output from spamming the console +@pytest.fixture(scope="function", autouse=True) +def no_stdout(monkeypatch): + with open(os.devnull, "w") as f: # pylint:disable=unspecified-encoding + monkeypatch.setattr(sys, "stdout", f) + yield + def test_main_no_args(): main([]) @@ -24,7 +33,7 @@ def test_main_unicode(): def test_main_forced_unicode(): - main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv']) + main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv']) def test_main_verbose(): @@ -70,3 +79,22 @@ def test_main_help(): def test_main_version(): main(['--version']) + + +def test_json_output_input_string(capsys: CaptureFixture): + main(['--json', '--output-input-string', 'test.avi']) + + outerr = capsys.readouterr() + data = json.loads(outerr.out) + + assert 'input_string' in data + assert data['input_string'] == 'test.avi' + + +def test_json_no_output_input_string(capsys: CaptureFixture): + main(['--json', 'test.avi']) + + outerr = capsys.readouterr() + data = json.loads(outerr.out) + + assert 'input_string' not in data diff --git a/libs/common/guessit/test/test_options.py b/libs/common/guessit/test/test_options.py index 4f019b34..de13180f 100644 --- a/libs/common/guessit/test/test_options.py +++ b/libs/common/guessit/test/test_options.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement +# pylint: disable=pointless-statement, missing-docstring, invalid-name, pointless-string-statement import os import pytest diff --git a/libs/common/guessit/test/test_yml.py b/libs/common/guessit/test/test_yml.py index 040796de..6adced19 100644 --- a/libs/common/guessit/test/test_yml.py +++ b/libs/common/guessit/test/test_yml.py @@ -1,13 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name +# pylint: disable=pointless-statement, missing-docstring, invalid-name import logging import os -# io.open supports encoding= in python 2.7 -from io import open # pylint: disable=redefined-builtin import babelfish -import six # pylint:disable=wrong-import-order import yaml # pylint:disable=wrong-import-order from rebulk.remodule import re from rebulk.utils import is_iterable @@ -21,7 +18,7 @@ logger = logging.getLogger(__name__) __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) -class EntryResult(object): +class EntryResult: def __init__(self, string, negates=False): self.string = string self.negates = negates @@ -53,16 +50,16 @@ class EntryResult(object): if self.ok: return self.string + ': OK!' if self.warning: - return '%s%s: WARNING! (valid=%i, extra=%i)' % ('-' if self.negates else '', self.string, len(self.valid), - len(self.extra)) + return f'{"-" if self.negates else ""}{self.string}: ' \ + f'WARNING! (valid={len(self.valid)}, extra={self.extra})' if self.error: - return '%s%s: ERROR! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \ - ('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different), - len(self.extra), len(self.others)) + return f'{"-" if self.negates else ""}{self.string}: ' \ + f'ERROR! (valid={len(self.valid)}, extra={self.extra}, ' \ + f'missing={self.missing}, different={self.different}, others={self.others})' - return '%s%s: UNKOWN! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \ - ('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different), - len(self.extra), len(self.others)) + return f'{"-" if self.negates else ""}{self.string}: ' \ + f'UNKOWN! (valid={len(self.valid)}, extra={self.extra}, ' \ + f'missing={self.missing}, different={self.different}, others={self.others})' @property def details(self): @@ -110,14 +107,14 @@ def files_and_ids(predicate=None): for filename in filenames: name, ext = os.path.splitext(filename) filepath = os.path.join(dirpath_rel, filename) - if ext == '.yml' and (not predicate or predicate(filepath)): + if ext in ['.yml', '.yaml'] and (not predicate or predicate(filepath)): files.append(filepath) ids.append(os.path.join(dirpath_rel, name)) return files, ids -class TestYml(object): +class TestYml: """ Run tests from yaml files. Multiple input strings having same expected results can be chained. @@ -161,7 +158,7 @@ class TestYml(object): for string, expected in data.items(): TestYml.set_default(expected, default) - string = TestYml.fix_encoding(string, expected) + string = TestYml.fix_encoding(string) entries.append((filename, string, expected)) unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string)) @@ -178,17 +175,7 @@ class TestYml(object): expected[k] = v @classmethod - def fix_encoding(cls, string, expected): - if six.PY2: - if isinstance(string, six.text_type): - string = string.encode('utf-8') - converts = [] - for k, v in expected.items(): - if isinstance(v, six.text_type): - v = v.encode('utf-8') - converts.append((k, v)) - for k, v in converts: - expected[k] = v + def fix_encoding(cls, string): if not isinstance(string, str): string = str(string) return string diff --git a/libs/common/guessit/yamlutils.py b/libs/common/guessit/yamlutils.py index d04be641..983abda7 100644 --- a/libs/common/guessit/yamlutils.py +++ b/libs/common/guessit/yamlutils.py @@ -4,12 +4,9 @@ Options """ -try: - from collections import OrderedDict -except ImportError: # pragma: no-cover - from ordereddict import OrderedDict # pylint:disable=import-error -import babelfish +from collections import OrderedDict +import babelfish import yaml # pylint:disable=wrong-import-order from .rules.common.quantity import BitRate, FrameRate, Size @@ -24,8 +21,8 @@ class OrderedDictYAMLLoader(yaml.SafeLoader): def __init__(self, *args, **kwargs): yaml.SafeLoader.__init__(self, *args, **kwargs) - self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map) - self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map) + self.add_constructor('tag:yaml.org,2002:map', type(self).construct_yaml_map) + self.add_constructor('tag:yaml.org,2002:omap', type(self).construct_yaml_map) def construct_yaml_map(self, node): data = OrderedDict() @@ -38,7 +35,7 @@ class OrderedDictYAMLLoader(yaml.SafeLoader): self.flatten_mapping(node) else: # pragma: no cover raise yaml.constructor.ConstructorError(None, None, - 'expected a mapping node, but found %s' % node.id, node.start_mark) + f'expected a mapping node, but found {node.id}', node.start_mark) mapping = OrderedDict() for key_node, value_node in node.value: @@ -47,8 +44,8 @@ class OrderedDictYAMLLoader(yaml.SafeLoader): hash(key) except TypeError as exc: # pragma: no cover raise yaml.constructor.ConstructorError('while constructing a mapping', - node.start_mark, 'found unacceptable key (%s)' - % exc, key_node.start_mark) + node.start_mark, f'found unacceptable key ({exc})' + , key_node.start_mark) value = self.construct_object(value_node, deep=deep) mapping[key] = value return mapping diff --git a/libs/common/idna/__init__.py b/libs/common/idna/__init__.py index 847bf935..a40eeafc 100644 --- a/libs/common/idna/__init__.py +++ b/libs/common/idna/__init__.py @@ -1,2 +1,44 @@ from .package_data import __version__ -from .core import * +from .core import ( + IDNABidiError, + IDNAError, + InvalidCodepoint, + InvalidCodepointContext, + alabel, + check_bidi, + check_hyphen_ok, + check_initial_combiner, + check_label, + check_nfc, + decode, + encode, + ulabel, + uts46_remap, + valid_contextj, + valid_contexto, + valid_label_length, + valid_string_length, +) +from .intranges import intranges_contain + +__all__ = [ + "IDNABidiError", + "IDNAError", + "InvalidCodepoint", + "InvalidCodepointContext", + "alabel", + "check_bidi", + "check_hyphen_ok", + "check_initial_combiner", + "check_label", + "check_nfc", + "decode", + "encode", + "intranges_contain", + "ulabel", + "uts46_remap", + "valid_contextj", + "valid_contexto", + "valid_label_length", + "valid_string_length", +] diff --git a/libs/common/idna/codec.py b/libs/common/idna/codec.py index 98c65ead..1ca9ba62 100644 --- a/libs/common/idna/codec.py +++ b/libs/common/idna/codec.py @@ -1,41 +1,40 @@ from .core import encode, decode, alabel, ulabel, IDNAError import codecs import re +from typing import Tuple, Optional -_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') +_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]') class Codec(codecs.Codec): - def encode(self, data, errors='strict'): - + def encode(self, data: str, errors: str = 'strict') -> Tuple[bytes, int]: if errors != 'strict': - raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) if not data: - return "", 0 + return b"", 0 return encode(data), len(data) - def decode(self, data, errors='strict'): - + def decode(self, data: bytes, errors: str = 'strict') -> Tuple[str, int]: if errors != 'strict': - raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) if not data: - return u"", 0 + return '', 0 return decode(data), len(data) class IncrementalEncoder(codecs.BufferedIncrementalEncoder): - def _buffer_encode(self, data, errors, final): + def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore if errors != 'strict': - raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) if not data: - return ("", 0) + return "", 0 labels = _unicode_dots_re.split(data) - trailing_dot = u'' + trailing_dot = '' if labels: if not labels[-1]: trailing_dot = '.' @@ -55,37 +54,29 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): size += len(label) # Join with U+002E - result = ".".join(result) + trailing_dot + result_str = '.'.join(result) + trailing_dot # type: ignore size += len(trailing_dot) - return (result, size) + return result_str, size class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, data, errors, final): + def _buffer_decode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore if errors != 'strict': - raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) if not data: - return (u"", 0) + return ('', 0) - # IDNA allows decoding to operate on Unicode strings, too. - if isinstance(data, unicode): - labels = _unicode_dots_re.split(data) - else: - # Must be ASCII string - data = str(data) - unicode(data, "ascii") - labels = data.split(".") - - trailing_dot = u'' + labels = _unicode_dots_re.split(data) + trailing_dot = '' if labels: if not labels[-1]: - trailing_dot = u'.' + trailing_dot = '.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: - trailing_dot = u'.' + trailing_dot = '.' result = [] size = 0 @@ -95,22 +86,25 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): size += 1 size += len(label) - result = u".".join(result) + trailing_dot + result_str = '.'.join(result) + trailing_dot size += len(trailing_dot) - return (result, size) + return (result_str, size) class StreamWriter(Codec, codecs.StreamWriter): pass + class StreamReader(Codec, codecs.StreamReader): pass -def getregentry(): + +def getregentry() -> codecs.CodecInfo: + # Compatibility as a search_function for codecs.register() return codecs.CodecInfo( name='idna', - encode=Codec().encode, - decode=Codec().decode, + encode=Codec().encode, # type: ignore + decode=Codec().decode, # type: ignore incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, diff --git a/libs/common/idna/compat.py b/libs/common/idna/compat.py index 4d47f336..786e6bda 100644 --- a/libs/common/idna/compat.py +++ b/libs/common/idna/compat.py @@ -1,12 +1,13 @@ from .core import * from .codec import * +from typing import Any, Union -def ToASCII(label): +def ToASCII(label: str) -> bytes: return encode(label) -def ToUnicode(label): +def ToUnicode(label: Union[bytes, bytearray]) -> str: return decode(label) -def nameprep(s): - raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol") +def nameprep(s: Any) -> None: + raise NotImplementedError('IDNA 2008 does not utilise nameprep protocol') diff --git a/libs/common/idna/core.py b/libs/common/idna/core.py index 41ec5c71..4f300371 100644 --- a/libs/common/idna/core.py +++ b/libs/common/idna/core.py @@ -2,16 +2,12 @@ from . import idnadata import bisect import unicodedata import re -import sys +from typing import Union, Optional from .intranges import intranges_contain _virama_combining_class = 9 _alabel_prefix = b'xn--' -_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') - -if sys.version_info[0] >= 3: - unicode = str - unichr = chr +_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]') class IDNAError(UnicodeError): """ Base exception for all IDNA-encoding related problems """ @@ -33,46 +29,43 @@ class InvalidCodepointContext(IDNAError): pass -def _combining_class(cp): - v = unicodedata.combining(unichr(cp)) +def _combining_class(cp: int) -> int: + v = unicodedata.combining(chr(cp)) if v == 0: - if not unicodedata.name(unichr(cp)): - raise ValueError("Unknown character in unicodedata") + if not unicodedata.name(chr(cp)): + raise ValueError('Unknown character in unicodedata') return v -def _is_script(cp, script): +def _is_script(cp: str, script: str) -> bool: return intranges_contain(ord(cp), idnadata.scripts[script]) -def _punycode(s): +def _punycode(s: str) -> bytes: return s.encode('punycode') -def _unot(s): - return 'U+{0:04X}'.format(s) +def _unot(s: int) -> str: + return 'U+{:04X}'.format(s) -def valid_label_length(label): - +def valid_label_length(label: Union[bytes, str]) -> bool: if len(label) > 63: return False return True -def valid_string_length(label, trailing_dot): - +def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: if len(label) > (254 if trailing_dot else 253): return False return True -def check_bidi(label, check_ltr=False): - +def check_bidi(label: str, check_ltr: bool = False) -> bool: # Bidi rules should only be applied if string contains RTL characters bidi_label = False for (idx, cp) in enumerate(label, 1): direction = unicodedata.bidirectional(cp) if direction == '': # String likely comes from a newer version of Unicode - raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx)) + raise IDNABidiError('Unknown directionality in label {} at position {}'.format(repr(label), idx)) if direction in ['R', 'AL', 'AN']: bidi_label = True if not bidi_label and not check_ltr: @@ -85,17 +78,17 @@ def check_bidi(label, check_ltr=False): elif direction == 'L': rtl = False else: - raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label))) + raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label))) valid_ending = False - number_type = False + number_type = None # type: Optional[str] for (idx, cp) in enumerate(label, 1): direction = unicodedata.bidirectional(cp) if rtl: # Bidi rule 2 if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: - raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx)) + raise IDNABidiError('Invalid direction for codepoint at position {} in a right-to-left label'.format(idx)) # Bidi rule 3 if direction in ['R', 'AL', 'EN', 'AN']: valid_ending = True @@ -111,7 +104,7 @@ def check_bidi(label, check_ltr=False): else: # Bidi rule 5 if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: - raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx)) + raise IDNABidiError('Invalid direction for codepoint at position {} in a left-to-right label'.format(idx)) # Bidi rule 6 if direction in ['L', 'EN']: valid_ending = True @@ -124,15 +117,13 @@ def check_bidi(label, check_ltr=False): return True -def check_initial_combiner(label): - +def check_initial_combiner(label: str) -> bool: if unicodedata.category(label[0])[0] == 'M': raise IDNAError('Label begins with an illegal combining character') return True -def check_hyphen_ok(label): - +def check_hyphen_ok(label: str) -> bool: if label[2:4] == '--': raise IDNAError('Label has disallowed hyphens in 3rd and 4th position') if label[0] == '-' or label[-1] == '-': @@ -140,14 +131,12 @@ def check_hyphen_ok(label): return True -def check_nfc(label): - +def check_nfc(label: str) -> None: if unicodedata.normalize('NFC', label) != label: raise IDNAError('Label must be in Normalization Form C') -def valid_contextj(label, pos): - +def valid_contextj(label: str, pos: int) -> bool: cp_value = ord(label[pos]) if cp_value == 0x200c: @@ -190,8 +179,7 @@ def valid_contextj(label, pos): return False -def valid_contexto(label, pos, exception=False): - +def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: cp_value = ord(label[pos]) if cp_value == 0x00b7: @@ -212,7 +200,7 @@ def valid_contexto(label, pos, exception=False): elif cp_value == 0x30fb: for cp in label: - if cp == u'\u30fb': + if cp == '\u30fb': continue if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'): return True @@ -230,9 +218,10 @@ def valid_contexto(label, pos, exception=False): return False return True + return False -def check_label(label): +def check_label(label: Union[str, bytes, bytearray]) -> None: if isinstance(label, (bytes, bytearray)): label = label.decode('utf-8') if len(label) == 0: @@ -249,102 +238,111 @@ def check_label(label): elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']): try: if not valid_contextj(label, pos): - raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format( + raise InvalidCodepointContext('Joiner {} not allowed at position {} in {}'.format( _unot(cp_value), pos+1, repr(label))) except ValueError: - raise IDNAError('Unknown codepoint adjacent to joiner {0} at position {1} in {2}'.format( + raise IDNAError('Unknown codepoint adjacent to joiner {} at position {} in {}'.format( _unot(cp_value), pos+1, repr(label))) elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']): if not valid_contexto(label, pos): - raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label))) + raise InvalidCodepointContext('Codepoint {} not allowed at position {} in {}'.format(_unot(cp_value), pos+1, repr(label))) else: - raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label))) + raise InvalidCodepoint('Codepoint {} at position {} of {} not allowed'.format(_unot(cp_value), pos+1, repr(label))) check_bidi(label) -def alabel(label): - +def alabel(label: str) -> bytes: try: - label = label.encode('ascii') - ulabel(label) - if not valid_label_length(label): + label_bytes = label.encode('ascii') + ulabel(label_bytes) + if not valid_label_length(label_bytes): raise IDNAError('Label too long') - return label + return label_bytes except UnicodeEncodeError: pass if not label: raise IDNAError('No Input') - label = unicode(label) + label = str(label) check_label(label) - label = _punycode(label) - label = _alabel_prefix + label + label_bytes = _punycode(label) + label_bytes = _alabel_prefix + label_bytes - if not valid_label_length(label): + if not valid_label_length(label_bytes): raise IDNAError('Label too long') - return label + return label_bytes -def ulabel(label): - +def ulabel(label: Union[str, bytes, bytearray]) -> str: if not isinstance(label, (bytes, bytearray)): try: - label = label.encode('ascii') + label_bytes = label.encode('ascii') except UnicodeEncodeError: check_label(label) return label + else: + label_bytes = label - label = label.lower() - if label.startswith(_alabel_prefix): - label = label[len(_alabel_prefix):] - if not label: + label_bytes = label_bytes.lower() + if label_bytes.startswith(_alabel_prefix): + label_bytes = label_bytes[len(_alabel_prefix):] + if not label_bytes: raise IDNAError('Malformed A-label, no Punycode eligible content found') - if label.decode('ascii')[-1] == '-': + if label_bytes.decode('ascii')[-1] == '-': raise IDNAError('A-label must not end with a hyphen') else: - check_label(label) - return label.decode('ascii') + check_label(label_bytes) + return label_bytes.decode('ascii') - label = label.decode('punycode') + try: + label = label_bytes.decode('punycode') + except UnicodeError: + raise IDNAError('Invalid A-label') check_label(label) return label -def uts46_remap(domain, std3_rules=True, transitional=False): +def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: """Re-map the characters in the string according to UTS46 processing.""" from .uts46data import uts46data - output = u"" - try: - for pos, char in enumerate(domain): - code_point = ord(char) + output = '' + + for pos, char in enumerate(domain): + code_point = ord(char) + try: uts46row = uts46data[code_point if code_point < 256 else - bisect.bisect_left(uts46data, (code_point, "Z")) - 1] + bisect.bisect_left(uts46data, (code_point, 'Z')) - 1] status = uts46row[1] - replacement = uts46row[2] if len(uts46row) == 3 else None - if (status == "V" or - (status == "D" and not transitional) or - (status == "3" and not std3_rules and replacement is None)): + replacement = None # type: Optional[str] + if len(uts46row) == 3: + replacement = uts46row[2] # type: ignore + if (status == 'V' or + (status == 'D' and not transitional) or + (status == '3' and not std3_rules and replacement is None)): output += char - elif replacement is not None and (status == "M" or - (status == "3" and not std3_rules) or - (status == "D" and transitional)): + elif replacement is not None and (status == 'M' or + (status == '3' and not std3_rules) or + (status == 'D' and transitional)): output += replacement - elif status != "I": + elif status != 'I': raise IndexError() - return unicodedata.normalize("NFC", output) - except IndexError: - raise InvalidCodepoint( - "Codepoint {0} not allowed at position {1} in {2}".format( - _unot(code_point), pos + 1, repr(domain))) + except IndexError: + raise InvalidCodepoint( + 'Codepoint {} not allowed at position {} in {}'.format( + _unot(code_point), pos + 1, repr(domain))) + + return unicodedata.normalize('NFC', output) -def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False): - +def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes: if isinstance(s, (bytes, bytearray)): - s = s.decode("ascii") + try: + s = s.decode('ascii') + except UnicodeDecodeError: + raise IDNAError('should pass a unicode string to the function rather than a byte string.') if uts46: s = uts46_remap(s, std3_rules, transitional) trailing_dot = False @@ -372,10 +370,12 @@ def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False): return s -def decode(s, strict=False, uts46=False, std3_rules=False): - - if isinstance(s, (bytes, bytearray)): - s = s.decode("ascii") +def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str: + try: + if isinstance(s, (bytes, bytearray)): + s = s.decode('ascii') + except UnicodeDecodeError: + raise IDNAError('Invalid ASCII in A-label') if uts46: s = uts46_remap(s, std3_rules, False) trailing_dot = False @@ -383,7 +383,7 @@ def decode(s, strict=False, uts46=False, std3_rules=False): if not strict: labels = _unicode_dots_re.split(s) else: - labels = s.split(u'.') + labels = s.split('.') if not labels or labels == ['']: raise IDNAError('Empty domain') if not labels[-1]: @@ -396,5 +396,5 @@ def decode(s, strict=False, uts46=False, std3_rules=False): else: raise IDNAError('Empty label') if trailing_dot: - result.append(u'') - return u'.'.join(result) + result.append('') + return '.'.join(result) diff --git a/libs/common/idna/idnadata.py b/libs/common/idna/idnadata.py index a284e4c8..67db4625 100644 --- a/libs/common/idna/idnadata.py +++ b/libs/common/idna/idnadata.py @@ -1,6 +1,6 @@ # This file is automatically generated by tools/idna-data -__version__ = "13.0.0" +__version__ = '15.0.0' scripts = { 'Greek': ( 0x37000000374, @@ -49,17 +49,19 @@ scripts = { 0x30210000302a, 0x30380000303c, 0x340000004dc0, - 0x4e0000009ffd, + 0x4e000000a000, 0xf9000000fa6e, 0xfa700000fada, + 0x16fe200016fe4, 0x16ff000016ff2, - 0x200000002a6de, - 0x2a7000002b735, + 0x200000002a6e0, + 0x2a7000002b73a, 0x2b7400002b81e, 0x2b8200002cea2, 0x2ceb00002ebe1, 0x2f8000002fa1e, 0x300000003134b, + 0x31350000323b0, ), 'Hebrew': ( 0x591000005c8, @@ -75,7 +77,8 @@ scripts = { 'Hiragana': ( 0x304100003097, 0x309d000030a0, - 0x1b0010001b11f, + 0x1b0010001b120, + 0x1b1320001b133, 0x1b1500001b153, 0x1f2000001f201, ), @@ -87,7 +90,12 @@ scripts = { 0x330000003358, 0xff660000ff70, 0xff710000ff9e, + 0x1aff00001aff4, + 0x1aff50001affc, + 0x1affd0001afff, 0x1b0000001b001, + 0x1b1200001b123, + 0x1b1550001b156, 0x1b1640001b168, ), } @@ -405,6 +413,39 @@ joining_types = { 0x868: 68, 0x869: 82, 0x86a: 82, + 0x870: 82, + 0x871: 82, + 0x872: 82, + 0x873: 82, + 0x874: 82, + 0x875: 82, + 0x876: 82, + 0x877: 82, + 0x878: 82, + 0x879: 82, + 0x87a: 82, + 0x87b: 82, + 0x87c: 82, + 0x87d: 82, + 0x87e: 82, + 0x87f: 82, + 0x880: 82, + 0x881: 82, + 0x882: 82, + 0x883: 67, + 0x884: 67, + 0x885: 67, + 0x886: 68, + 0x887: 85, + 0x888: 85, + 0x889: 68, + 0x88a: 68, + 0x88b: 68, + 0x88c: 68, + 0x88d: 68, + 0x88e: 82, + 0x890: 85, + 0x891: 85, 0x8a0: 68, 0x8a1: 68, 0x8a2: 68, @@ -426,6 +467,7 @@ joining_types = { 0x8b2: 82, 0x8b3: 68, 0x8b4: 68, + 0x8b5: 68, 0x8b6: 68, 0x8b7: 68, 0x8b8: 68, @@ -444,6 +486,7 @@ joining_types = { 0x8c5: 68, 0x8c6: 68, 0x8c7: 68, + 0x8c8: 68, 0x8e2: 85, 0x1806: 85, 0x1807: 68, @@ -768,6 +811,24 @@ joining_types = { 0x10f52: 68, 0x10f53: 68, 0x10f54: 82, + 0x10f70: 68, + 0x10f71: 68, + 0x10f72: 68, + 0x10f73: 68, + 0x10f74: 82, + 0x10f75: 82, + 0x10f76: 68, + 0x10f77: 68, + 0x10f78: 68, + 0x10f79: 68, + 0x10f7a: 68, + 0x10f7b: 68, + 0x10f7c: 68, + 0x10f7d: 68, + 0x10f7e: 68, + 0x10f7f: 68, + 0x10f80: 68, + 0x10f81: 68, 0x10fb0: 68, 0x10fb1: 85, 0x10fb2: 68, @@ -1168,9 +1229,9 @@ codepoint_classes = { 0x8000000082e, 0x8400000085c, 0x8600000086b, - 0x8a0000008b5, - 0x8b6000008c8, - 0x8d3000008e2, + 0x87000000888, + 0x8890000088f, + 0x898000008e2, 0x8e300000958, 0x96000000964, 0x96600000970, @@ -1252,11 +1313,12 @@ codepoint_classes = { 0xc0e00000c11, 0xc1200000c29, 0xc2a00000c3a, - 0xc3d00000c45, + 0xc3c00000c45, 0xc4600000c49, 0xc4a00000c4e, 0xc5500000c57, 0xc5800000c5b, + 0xc5d00000c5e, 0xc6000000c64, 0xc6600000c70, 0xc8000000c84, @@ -1269,10 +1331,10 @@ codepoint_classes = { 0xcc600000cc9, 0xcca00000cce, 0xcd500000cd7, - 0xcde00000cdf, + 0xcdd00000cdf, 0xce000000ce4, 0xce600000cf0, - 0xcf100000cf3, + 0xcf100000cf4, 0xd0000000d0d, 0xd0e00000d11, 0xd1200000d45, @@ -1307,7 +1369,7 @@ codepoint_classes = { 0xeb400000ebe, 0xec000000ec5, 0xec600000ec7, - 0xec800000ece, + 0xec800000ecf, 0xed000000eda, 0xede00000ee0, 0xf0000000f01, @@ -1366,9 +1428,8 @@ codepoint_classes = { 0x16810000169b, 0x16a0000016eb, 0x16f1000016f9, - 0x17000000170d, - 0x170e00001715, - 0x172000001735, + 0x170000001716, + 0x171f00001735, 0x174000001754, 0x17600000176d, 0x176e00001771, @@ -1397,8 +1458,8 @@ codepoint_classes = { 0x1a9000001a9a, 0x1aa700001aa8, 0x1ab000001abe, - 0x1abf00001ac1, - 0x1b0000001b4c, + 0x1abf00001acf, + 0x1b0000001b4d, 0x1b5000001b5a, 0x1b6b00001b74, 0x1b8000001bf4, @@ -1413,8 +1474,7 @@ codepoint_classes = { 0x1d4e00001d4f, 0x1d6b00001d78, 0x1d7900001d9b, - 0x1dc000001dfa, - 0x1dfb00001e00, + 0x1dc000001e00, 0x1e0100001e02, 0x1e0300001e04, 0x1e0500001e06, @@ -1563,7 +1623,7 @@ codepoint_classes = { 0x1ff600001ff7, 0x214e0000214f, 0x218400002185, - 0x2c3000002c5f, + 0x2c3000002c60, 0x2c6100002c62, 0x2c6500002c67, 0x2c6800002c69, @@ -1652,8 +1712,7 @@ codepoint_classes = { 0x31a0000031c0, 0x31f000003200, 0x340000004dc0, - 0x4e0000009ffd, - 0xa0000000a48d, + 0x4e000000a48d, 0xa4d00000a4fe, 0xa5000000a60d, 0xa6100000a62c, @@ -1766,9 +1825,16 @@ codepoint_classes = { 0xa7bb0000a7bc, 0xa7bd0000a7be, 0xa7bf0000a7c0, + 0xa7c10000a7c2, 0xa7c30000a7c4, 0xa7c80000a7c9, 0xa7ca0000a7cb, + 0xa7d10000a7d2, + 0xa7d30000a7d4, + 0xa7d50000a7d6, + 0xa7d70000a7d8, + 0xa7d90000a7da, + 0xa7f20000a7f5, 0xa7f60000a7f8, 0xa7fa0000a828, 0xa82c0000a82d, @@ -1796,7 +1862,7 @@ codepoint_classes = { 0xab200000ab27, 0xab280000ab2f, 0xab300000ab5b, - 0xab600000ab6a, + 0xab600000ab69, 0xabc00000abeb, 0xabec0000abee, 0xabf00000abfa, @@ -1834,9 +1900,16 @@ codepoint_classes = { 0x104d8000104fc, 0x1050000010528, 0x1053000010564, + 0x10597000105a2, + 0x105a3000105b2, + 0x105b3000105ba, + 0x105bb000105bd, 0x1060000010737, 0x1074000010756, 0x1076000010768, + 0x1078000010786, + 0x10787000107b1, + 0x107b2000107bb, 0x1080000010806, 0x1080800010809, 0x1080a00010836, @@ -1873,14 +1946,16 @@ codepoint_classes = { 0x10e8000010eaa, 0x10eab00010ead, 0x10eb000010eb2, - 0x10f0000010f1d, + 0x10efd00010f1d, 0x10f2700010f28, 0x10f3000010f51, + 0x10f7000010f86, 0x10fb000010fc5, 0x10fe000010ff7, 0x1100000011047, - 0x1106600011070, + 0x1106600011076, 0x1107f000110bb, + 0x110c2000110c3, 0x110d0000110e9, 0x110f0000110fa, 0x1110000011135, @@ -1894,7 +1969,7 @@ codepoint_classes = { 0x111dc000111dd, 0x1120000011212, 0x1121300011238, - 0x1123e0001123f, + 0x1123e00011242, 0x1128000011287, 0x1128800011289, 0x1128a0001128e, @@ -1934,6 +2009,7 @@ codepoint_classes = { 0x117000001171b, 0x1171d0001172c, 0x117300001173a, + 0x1174000011747, 0x118000001183b, 0x118c0000118ea, 0x118ff00011907, @@ -1952,7 +2028,7 @@ codepoint_classes = { 0x11a4700011a48, 0x11a5000011a9a, 0x11a9d00011a9e, - 0x11ac000011af9, + 0x11ab000011af9, 0x11c0000011c09, 0x11c0a00011c37, 0x11c3800011c41, @@ -1974,14 +2050,22 @@ codepoint_classes = { 0x11d9300011d99, 0x11da000011daa, 0x11ee000011ef7, + 0x11f0000011f11, + 0x11f1200011f3b, + 0x11f3e00011f43, + 0x11f5000011f5a, 0x11fb000011fb1, 0x120000001239a, 0x1248000012544, - 0x130000001342f, + 0x12f9000012ff1, + 0x1300000013430, + 0x1344000013456, 0x1440000014647, 0x1680000016a39, 0x16a4000016a5f, 0x16a6000016a6a, + 0x16a7000016abf, + 0x16ac000016aca, 0x16ad000016aee, 0x16af000016af5, 0x16b0000016b37, @@ -1999,8 +2083,13 @@ codepoint_classes = { 0x17000000187f8, 0x1880000018cd6, 0x18d0000018d09, - 0x1b0000001b11f, + 0x1aff00001aff4, + 0x1aff50001affc, + 0x1affd0001afff, + 0x1b0000001b123, + 0x1b1320001b133, 0x1b1500001b153, + 0x1b1550001b156, 0x1b1640001b168, 0x1b1700001b2fc, 0x1bc000001bc6b, @@ -2008,33 +2097,45 @@ codepoint_classes = { 0x1bc800001bc89, 0x1bc900001bc9a, 0x1bc9d0001bc9f, + 0x1cf000001cf2e, + 0x1cf300001cf47, 0x1da000001da37, 0x1da3b0001da6d, 0x1da750001da76, 0x1da840001da85, 0x1da9b0001daa0, 0x1daa10001dab0, + 0x1df000001df1f, + 0x1df250001df2b, 0x1e0000001e007, 0x1e0080001e019, 0x1e01b0001e022, 0x1e0230001e025, 0x1e0260001e02b, + 0x1e0300001e06e, + 0x1e08f0001e090, 0x1e1000001e12d, 0x1e1300001e13e, 0x1e1400001e14a, 0x1e14e0001e14f, + 0x1e2900001e2af, 0x1e2c00001e2fa, + 0x1e4d00001e4fa, + 0x1e7e00001e7e7, + 0x1e7e80001e7ec, + 0x1e7ed0001e7ef, + 0x1e7f00001e7ff, 0x1e8000001e8c5, 0x1e8d00001e8d7, 0x1e9220001e94c, 0x1e9500001e95a, - 0x1fbf00001fbfa, - 0x200000002a6de, - 0x2a7000002b735, + 0x200000002a6e0, + 0x2a7000002b73a, 0x2b7400002b81e, 0x2b8200002cea2, 0x2ceb00002ebe1, 0x300000003134b, + 0x31350000323b0, ), 'CONTEXTJ': ( 0x200c0000200e, diff --git a/libs/common/idna/intranges.py b/libs/common/idna/intranges.py index fa8a7356..6a43b047 100644 --- a/libs/common/idna/intranges.py +++ b/libs/common/idna/intranges.py @@ -6,8 +6,9 @@ in the original list?" in time O(log(# runs)). """ import bisect +from typing import List, Tuple -def intranges_from_list(list_): +def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: """Represent a list of integers as a sequence of ranges: ((start_0, end_0), (start_1, end_1), ...), such that the original integers are exactly those x such that start_i <= x < end_i for some i. @@ -28,14 +29,14 @@ def intranges_from_list(list_): return tuple(ranges) -def _encode_range(start, end): +def _encode_range(start: int, end: int) -> int: return (start << 32) | end -def _decode_range(r): +def _decode_range(r: int) -> Tuple[int, int]: return (r >> 32), (r & ((1 << 32) - 1)) -def intranges_contain(int_, ranges): +def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool: """Determine if `int_` falls into one of the ranges in `ranges`.""" tuple_ = _encode_range(int_, 0) pos = bisect.bisect_left(ranges, tuple_) diff --git a/libs/common/idna/package_data.py b/libs/common/idna/package_data.py index ce1c521d..8501893b 100644 --- a/libs/common/idna/package_data.py +++ b/libs/common/idna/package_data.py @@ -1,2 +1,2 @@ -__version__ = '2.10' +__version__ = '3.4' diff --git a/libs/common/idna/py.typed b/libs/common/idna/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/idna/uts46data.py b/libs/common/idna/uts46data.py index 3766dd49..186796c1 100644 --- a/libs/common/idna/uts46data.py +++ b/libs/common/idna/uts46data.py @@ -1,11 +1,14 @@ # This file is automatically generated by tools/idna-data # vim: set fileencoding=utf-8 : +from typing import List, Tuple, Union + + """IDNA Mapping Table from UTS46.""" -__version__ = "13.0.0" -def _seg_0(): +__version__ = '15.0.0' +def _seg_0() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x0, '3'), (0x1, '3'), @@ -72,32 +75,32 @@ def _seg_0(): (0x3E, '3'), (0x3F, '3'), (0x40, '3'), - (0x41, 'M', u'a'), - (0x42, 'M', u'b'), - (0x43, 'M', u'c'), - (0x44, 'M', u'd'), - (0x45, 'M', u'e'), - (0x46, 'M', u'f'), - (0x47, 'M', u'g'), - (0x48, 'M', u'h'), - (0x49, 'M', u'i'), - (0x4A, 'M', u'j'), - (0x4B, 'M', u'k'), - (0x4C, 'M', u'l'), - (0x4D, 'M', u'm'), - (0x4E, 'M', u'n'), - (0x4F, 'M', u'o'), - (0x50, 'M', u'p'), - (0x51, 'M', u'q'), - (0x52, 'M', u'r'), - (0x53, 'M', u's'), - (0x54, 'M', u't'), - (0x55, 'M', u'u'), - (0x56, 'M', u'v'), - (0x57, 'M', u'w'), - (0x58, 'M', u'x'), - (0x59, 'M', u'y'), - (0x5A, 'M', u'z'), + (0x41, 'M', 'a'), + (0x42, 'M', 'b'), + (0x43, 'M', 'c'), + (0x44, 'M', 'd'), + (0x45, 'M', 'e'), + (0x46, 'M', 'f'), + (0x47, 'M', 'g'), + (0x48, 'M', 'h'), + (0x49, 'M', 'i'), + (0x4A, 'M', 'j'), + (0x4B, 'M', 'k'), + (0x4C, 'M', 'l'), + (0x4D, 'M', 'm'), + (0x4E, 'M', 'n'), + (0x4F, 'M', 'o'), + (0x50, 'M', 'p'), + (0x51, 'M', 'q'), + (0x52, 'M', 'r'), + (0x53, 'M', 's'), + (0x54, 'M', 't'), + (0x55, 'M', 'u'), + (0x56, 'M', 'v'), + (0x57, 'M', 'w'), + (0x58, 'M', 'x'), + (0x59, 'M', 'y'), + (0x5A, 'M', 'z'), (0x5B, '3'), (0x5C, '3'), (0x5D, '3'), @@ -109,7 +112,7 @@ def _seg_0(): (0x63, 'V'), ] -def _seg_1(): +def _seg_1() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x64, 'V'), (0x65, 'V'), @@ -171,7 +174,7 @@ def _seg_1(): (0x9D, 'X'), (0x9E, 'X'), (0x9F, 'X'), - (0xA0, '3', u' '), + (0xA0, '3', ' '), (0xA1, 'V'), (0xA2, 'V'), (0xA3, 'V'), @@ -179,66 +182,66 @@ def _seg_1(): (0xA5, 'V'), (0xA6, 'V'), (0xA7, 'V'), - (0xA8, '3', u' ̈'), + (0xA8, '3', ' ̈'), (0xA9, 'V'), - (0xAA, 'M', u'a'), + (0xAA, 'M', 'a'), (0xAB, 'V'), (0xAC, 'V'), (0xAD, 'I'), (0xAE, 'V'), - (0xAF, '3', u' ̄'), + (0xAF, '3', ' ̄'), (0xB0, 'V'), (0xB1, 'V'), - (0xB2, 'M', u'2'), - (0xB3, 'M', u'3'), - (0xB4, '3', u' ́'), - (0xB5, 'M', u'μ'), + (0xB2, 'M', '2'), + (0xB3, 'M', '3'), + (0xB4, '3', ' ́'), + (0xB5, 'M', 'μ'), (0xB6, 'V'), (0xB7, 'V'), - (0xB8, '3', u' ̧'), - (0xB9, 'M', u'1'), - (0xBA, 'M', u'o'), + (0xB8, '3', ' ̧'), + (0xB9, 'M', '1'), + (0xBA, 'M', 'o'), (0xBB, 'V'), - (0xBC, 'M', u'1⁄4'), - (0xBD, 'M', u'1⁄2'), - (0xBE, 'M', u'3⁄4'), + (0xBC, 'M', '1⁄4'), + (0xBD, 'M', '1⁄2'), + (0xBE, 'M', '3⁄4'), (0xBF, 'V'), - (0xC0, 'M', u'à'), - (0xC1, 'M', u'á'), - (0xC2, 'M', u'â'), - (0xC3, 'M', u'ã'), - (0xC4, 'M', u'ä'), - (0xC5, 'M', u'å'), - (0xC6, 'M', u'æ'), - (0xC7, 'M', u'ç'), + (0xC0, 'M', 'à'), + (0xC1, 'M', 'á'), + (0xC2, 'M', 'â'), + (0xC3, 'M', 'ã'), + (0xC4, 'M', 'ä'), + (0xC5, 'M', 'å'), + (0xC6, 'M', 'æ'), + (0xC7, 'M', 'ç'), ] -def _seg_2(): +def _seg_2() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xC8, 'M', u'è'), - (0xC9, 'M', u'é'), - (0xCA, 'M', u'ê'), - (0xCB, 'M', u'ë'), - (0xCC, 'M', u'ì'), - (0xCD, 'M', u'í'), - (0xCE, 'M', u'î'), - (0xCF, 'M', u'ï'), - (0xD0, 'M', u'ð'), - (0xD1, 'M', u'ñ'), - (0xD2, 'M', u'ò'), - (0xD3, 'M', u'ó'), - (0xD4, 'M', u'ô'), - (0xD5, 'M', u'õ'), - (0xD6, 'M', u'ö'), + (0xC8, 'M', 'è'), + (0xC9, 'M', 'é'), + (0xCA, 'M', 'ê'), + (0xCB, 'M', 'ë'), + (0xCC, 'M', 'ì'), + (0xCD, 'M', 'í'), + (0xCE, 'M', 'î'), + (0xCF, 'M', 'ï'), + (0xD0, 'M', 'ð'), + (0xD1, 'M', 'ñ'), + (0xD2, 'M', 'ò'), + (0xD3, 'M', 'ó'), + (0xD4, 'M', 'ô'), + (0xD5, 'M', 'õ'), + (0xD6, 'M', 'ö'), (0xD7, 'V'), - (0xD8, 'M', u'ø'), - (0xD9, 'M', u'ù'), - (0xDA, 'M', u'ú'), - (0xDB, 'M', u'û'), - (0xDC, 'M', u'ü'), - (0xDD, 'M', u'ý'), - (0xDE, 'M', u'þ'), - (0xDF, 'D', u'ss'), + (0xD8, 'M', 'ø'), + (0xD9, 'M', 'ù'), + (0xDA, 'M', 'ú'), + (0xDB, 'M', 'û'), + (0xDC, 'M', 'ü'), + (0xDD, 'M', 'ý'), + (0xDE, 'M', 'þ'), + (0xDF, 'D', 'ss'), (0xE0, 'V'), (0xE1, 'V'), (0xE2, 'V'), @@ -271,765 +274,765 @@ def _seg_2(): (0xFD, 'V'), (0xFE, 'V'), (0xFF, 'V'), - (0x100, 'M', u'ā'), + (0x100, 'M', 'ā'), (0x101, 'V'), - (0x102, 'M', u'ă'), + (0x102, 'M', 'ă'), (0x103, 'V'), - (0x104, 'M', u'ą'), + (0x104, 'M', 'ą'), (0x105, 'V'), - (0x106, 'M', u'ć'), + (0x106, 'M', 'ć'), (0x107, 'V'), - (0x108, 'M', u'ĉ'), + (0x108, 'M', 'ĉ'), (0x109, 'V'), - (0x10A, 'M', u'ċ'), + (0x10A, 'M', 'ċ'), (0x10B, 'V'), - (0x10C, 'M', u'č'), + (0x10C, 'M', 'č'), (0x10D, 'V'), - (0x10E, 'M', u'ď'), + (0x10E, 'M', 'ď'), (0x10F, 'V'), - (0x110, 'M', u'đ'), + (0x110, 'M', 'đ'), (0x111, 'V'), - (0x112, 'M', u'ē'), + (0x112, 'M', 'ē'), (0x113, 'V'), - (0x114, 'M', u'ĕ'), + (0x114, 'M', 'ĕ'), (0x115, 'V'), - (0x116, 'M', u'ė'), + (0x116, 'M', 'ė'), (0x117, 'V'), - (0x118, 'M', u'ę'), + (0x118, 'M', 'ę'), (0x119, 'V'), - (0x11A, 'M', u'ě'), + (0x11A, 'M', 'ě'), (0x11B, 'V'), - (0x11C, 'M', u'ĝ'), + (0x11C, 'M', 'ĝ'), (0x11D, 'V'), - (0x11E, 'M', u'ğ'), + (0x11E, 'M', 'ğ'), (0x11F, 'V'), - (0x120, 'M', u'ġ'), + (0x120, 'M', 'ġ'), (0x121, 'V'), - (0x122, 'M', u'ģ'), + (0x122, 'M', 'ģ'), (0x123, 'V'), - (0x124, 'M', u'ĥ'), + (0x124, 'M', 'ĥ'), (0x125, 'V'), - (0x126, 'M', u'ħ'), + (0x126, 'M', 'ħ'), (0x127, 'V'), - (0x128, 'M', u'ĩ'), + (0x128, 'M', 'ĩ'), (0x129, 'V'), - (0x12A, 'M', u'ī'), + (0x12A, 'M', 'ī'), (0x12B, 'V'), ] -def _seg_3(): +def _seg_3() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x12C, 'M', u'ĭ'), + (0x12C, 'M', 'ĭ'), (0x12D, 'V'), - (0x12E, 'M', u'į'), + (0x12E, 'M', 'į'), (0x12F, 'V'), - (0x130, 'M', u'i̇'), + (0x130, 'M', 'i̇'), (0x131, 'V'), - (0x132, 'M', u'ij'), - (0x134, 'M', u'ĵ'), + (0x132, 'M', 'ij'), + (0x134, 'M', 'ĵ'), (0x135, 'V'), - (0x136, 'M', u'ķ'), + (0x136, 'M', 'ķ'), (0x137, 'V'), - (0x139, 'M', u'ĺ'), + (0x139, 'M', 'ĺ'), (0x13A, 'V'), - (0x13B, 'M', u'ļ'), + (0x13B, 'M', 'ļ'), (0x13C, 'V'), - (0x13D, 'M', u'ľ'), + (0x13D, 'M', 'ľ'), (0x13E, 'V'), - (0x13F, 'M', u'l·'), - (0x141, 'M', u'ł'), + (0x13F, 'M', 'l·'), + (0x141, 'M', 'ł'), (0x142, 'V'), - (0x143, 'M', u'ń'), + (0x143, 'M', 'ń'), (0x144, 'V'), - (0x145, 'M', u'ņ'), + (0x145, 'M', 'ņ'), (0x146, 'V'), - (0x147, 'M', u'ň'), + (0x147, 'M', 'ň'), (0x148, 'V'), - (0x149, 'M', u'ʼn'), - (0x14A, 'M', u'ŋ'), + (0x149, 'M', 'ʼn'), + (0x14A, 'M', 'ŋ'), (0x14B, 'V'), - (0x14C, 'M', u'ō'), + (0x14C, 'M', 'ō'), (0x14D, 'V'), - (0x14E, 'M', u'ŏ'), + (0x14E, 'M', 'ŏ'), (0x14F, 'V'), - (0x150, 'M', u'ő'), + (0x150, 'M', 'ő'), (0x151, 'V'), - (0x152, 'M', u'œ'), + (0x152, 'M', 'œ'), (0x153, 'V'), - (0x154, 'M', u'ŕ'), + (0x154, 'M', 'ŕ'), (0x155, 'V'), - (0x156, 'M', u'ŗ'), + (0x156, 'M', 'ŗ'), (0x157, 'V'), - (0x158, 'M', u'ř'), + (0x158, 'M', 'ř'), (0x159, 'V'), - (0x15A, 'M', u'ś'), + (0x15A, 'M', 'ś'), (0x15B, 'V'), - (0x15C, 'M', u'ŝ'), + (0x15C, 'M', 'ŝ'), (0x15D, 'V'), - (0x15E, 'M', u'ş'), + (0x15E, 'M', 'ş'), (0x15F, 'V'), - (0x160, 'M', u'š'), + (0x160, 'M', 'š'), (0x161, 'V'), - (0x162, 'M', u'ţ'), + (0x162, 'M', 'ţ'), (0x163, 'V'), - (0x164, 'M', u'ť'), + (0x164, 'M', 'ť'), (0x165, 'V'), - (0x166, 'M', u'ŧ'), + (0x166, 'M', 'ŧ'), (0x167, 'V'), - (0x168, 'M', u'ũ'), + (0x168, 'M', 'ũ'), (0x169, 'V'), - (0x16A, 'M', u'ū'), + (0x16A, 'M', 'ū'), (0x16B, 'V'), - (0x16C, 'M', u'ŭ'), + (0x16C, 'M', 'ŭ'), (0x16D, 'V'), - (0x16E, 'M', u'ů'), + (0x16E, 'M', 'ů'), (0x16F, 'V'), - (0x170, 'M', u'ű'), + (0x170, 'M', 'ű'), (0x171, 'V'), - (0x172, 'M', u'ų'), + (0x172, 'M', 'ų'), (0x173, 'V'), - (0x174, 'M', u'ŵ'), + (0x174, 'M', 'ŵ'), (0x175, 'V'), - (0x176, 'M', u'ŷ'), + (0x176, 'M', 'ŷ'), (0x177, 'V'), - (0x178, 'M', u'ÿ'), - (0x179, 'M', u'ź'), + (0x178, 'M', 'ÿ'), + (0x179, 'M', 'ź'), (0x17A, 'V'), - (0x17B, 'M', u'ż'), + (0x17B, 'M', 'ż'), (0x17C, 'V'), - (0x17D, 'M', u'ž'), + (0x17D, 'M', 'ž'), (0x17E, 'V'), - (0x17F, 'M', u's'), + (0x17F, 'M', 's'), (0x180, 'V'), - (0x181, 'M', u'ɓ'), - (0x182, 'M', u'ƃ'), + (0x181, 'M', 'ɓ'), + (0x182, 'M', 'ƃ'), (0x183, 'V'), - (0x184, 'M', u'ƅ'), + (0x184, 'M', 'ƅ'), (0x185, 'V'), - (0x186, 'M', u'ɔ'), - (0x187, 'M', u'ƈ'), + (0x186, 'M', 'ɔ'), + (0x187, 'M', 'ƈ'), (0x188, 'V'), - (0x189, 'M', u'ɖ'), - (0x18A, 'M', u'ɗ'), - (0x18B, 'M', u'ƌ'), + (0x189, 'M', 'ɖ'), + (0x18A, 'M', 'ɗ'), + (0x18B, 'M', 'ƌ'), (0x18C, 'V'), - (0x18E, 'M', u'ǝ'), - (0x18F, 'M', u'ə'), - (0x190, 'M', u'ɛ'), - (0x191, 'M', u'ƒ'), + (0x18E, 'M', 'ǝ'), + (0x18F, 'M', 'ə'), + (0x190, 'M', 'ɛ'), + (0x191, 'M', 'ƒ'), (0x192, 'V'), - (0x193, 'M', u'ɠ'), + (0x193, 'M', 'ɠ'), ] -def _seg_4(): +def _seg_4() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x194, 'M', u'ɣ'), + (0x194, 'M', 'ɣ'), (0x195, 'V'), - (0x196, 'M', u'ɩ'), - (0x197, 'M', u'ɨ'), - (0x198, 'M', u'ƙ'), + (0x196, 'M', 'ɩ'), + (0x197, 'M', 'ɨ'), + (0x198, 'M', 'ƙ'), (0x199, 'V'), - (0x19C, 'M', u'ɯ'), - (0x19D, 'M', u'ɲ'), + (0x19C, 'M', 'ɯ'), + (0x19D, 'M', 'ɲ'), (0x19E, 'V'), - (0x19F, 'M', u'ɵ'), - (0x1A0, 'M', u'ơ'), + (0x19F, 'M', 'ɵ'), + (0x1A0, 'M', 'ơ'), (0x1A1, 'V'), - (0x1A2, 'M', u'ƣ'), + (0x1A2, 'M', 'ƣ'), (0x1A3, 'V'), - (0x1A4, 'M', u'ƥ'), + (0x1A4, 'M', 'ƥ'), (0x1A5, 'V'), - (0x1A6, 'M', u'ʀ'), - (0x1A7, 'M', u'ƨ'), + (0x1A6, 'M', 'ʀ'), + (0x1A7, 'M', 'ƨ'), (0x1A8, 'V'), - (0x1A9, 'M', u'ʃ'), + (0x1A9, 'M', 'ʃ'), (0x1AA, 'V'), - (0x1AC, 'M', u'ƭ'), + (0x1AC, 'M', 'ƭ'), (0x1AD, 'V'), - (0x1AE, 'M', u'ʈ'), - (0x1AF, 'M', u'ư'), + (0x1AE, 'M', 'ʈ'), + (0x1AF, 'M', 'ư'), (0x1B0, 'V'), - (0x1B1, 'M', u'ʊ'), - (0x1B2, 'M', u'ʋ'), - (0x1B3, 'M', u'ƴ'), + (0x1B1, 'M', 'ʊ'), + (0x1B2, 'M', 'ʋ'), + (0x1B3, 'M', 'ƴ'), (0x1B4, 'V'), - (0x1B5, 'M', u'ƶ'), + (0x1B5, 'M', 'ƶ'), (0x1B6, 'V'), - (0x1B7, 'M', u'ʒ'), - (0x1B8, 'M', u'ƹ'), + (0x1B7, 'M', 'ʒ'), + (0x1B8, 'M', 'ƹ'), (0x1B9, 'V'), - (0x1BC, 'M', u'ƽ'), + (0x1BC, 'M', 'ƽ'), (0x1BD, 'V'), - (0x1C4, 'M', u'dž'), - (0x1C7, 'M', u'lj'), - (0x1CA, 'M', u'nj'), - (0x1CD, 'M', u'ǎ'), + (0x1C4, 'M', 'dž'), + (0x1C7, 'M', 'lj'), + (0x1CA, 'M', 'nj'), + (0x1CD, 'M', 'ǎ'), (0x1CE, 'V'), - (0x1CF, 'M', u'ǐ'), + (0x1CF, 'M', 'ǐ'), (0x1D0, 'V'), - (0x1D1, 'M', u'ǒ'), + (0x1D1, 'M', 'ǒ'), (0x1D2, 'V'), - (0x1D3, 'M', u'ǔ'), + (0x1D3, 'M', 'ǔ'), (0x1D4, 'V'), - (0x1D5, 'M', u'ǖ'), + (0x1D5, 'M', 'ǖ'), (0x1D6, 'V'), - (0x1D7, 'M', u'ǘ'), + (0x1D7, 'M', 'ǘ'), (0x1D8, 'V'), - (0x1D9, 'M', u'ǚ'), + (0x1D9, 'M', 'ǚ'), (0x1DA, 'V'), - (0x1DB, 'M', u'ǜ'), + (0x1DB, 'M', 'ǜ'), (0x1DC, 'V'), - (0x1DE, 'M', u'ǟ'), + (0x1DE, 'M', 'ǟ'), (0x1DF, 'V'), - (0x1E0, 'M', u'ǡ'), + (0x1E0, 'M', 'ǡ'), (0x1E1, 'V'), - (0x1E2, 'M', u'ǣ'), + (0x1E2, 'M', 'ǣ'), (0x1E3, 'V'), - (0x1E4, 'M', u'ǥ'), + (0x1E4, 'M', 'ǥ'), (0x1E5, 'V'), - (0x1E6, 'M', u'ǧ'), + (0x1E6, 'M', 'ǧ'), (0x1E7, 'V'), - (0x1E8, 'M', u'ǩ'), + (0x1E8, 'M', 'ǩ'), (0x1E9, 'V'), - (0x1EA, 'M', u'ǫ'), + (0x1EA, 'M', 'ǫ'), (0x1EB, 'V'), - (0x1EC, 'M', u'ǭ'), + (0x1EC, 'M', 'ǭ'), (0x1ED, 'V'), - (0x1EE, 'M', u'ǯ'), + (0x1EE, 'M', 'ǯ'), (0x1EF, 'V'), - (0x1F1, 'M', u'dz'), - (0x1F4, 'M', u'ǵ'), + (0x1F1, 'M', 'dz'), + (0x1F4, 'M', 'ǵ'), (0x1F5, 'V'), - (0x1F6, 'M', u'ƕ'), - (0x1F7, 'M', u'ƿ'), - (0x1F8, 'M', u'ǹ'), + (0x1F6, 'M', 'ƕ'), + (0x1F7, 'M', 'ƿ'), + (0x1F8, 'M', 'ǹ'), (0x1F9, 'V'), - (0x1FA, 'M', u'ǻ'), + (0x1FA, 'M', 'ǻ'), (0x1FB, 'V'), - (0x1FC, 'M', u'ǽ'), + (0x1FC, 'M', 'ǽ'), (0x1FD, 'V'), - (0x1FE, 'M', u'ǿ'), + (0x1FE, 'M', 'ǿ'), (0x1FF, 'V'), - (0x200, 'M', u'ȁ'), + (0x200, 'M', 'ȁ'), (0x201, 'V'), - (0x202, 'M', u'ȃ'), + (0x202, 'M', 'ȃ'), (0x203, 'V'), - (0x204, 'M', u'ȅ'), + (0x204, 'M', 'ȅ'), (0x205, 'V'), - (0x206, 'M', u'ȇ'), + (0x206, 'M', 'ȇ'), (0x207, 'V'), - (0x208, 'M', u'ȉ'), + (0x208, 'M', 'ȉ'), (0x209, 'V'), - (0x20A, 'M', u'ȋ'), + (0x20A, 'M', 'ȋ'), (0x20B, 'V'), - (0x20C, 'M', u'ȍ'), + (0x20C, 'M', 'ȍ'), ] -def _seg_5(): +def _seg_5() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x20D, 'V'), - (0x20E, 'M', u'ȏ'), + (0x20E, 'M', 'ȏ'), (0x20F, 'V'), - (0x210, 'M', u'ȑ'), + (0x210, 'M', 'ȑ'), (0x211, 'V'), - (0x212, 'M', u'ȓ'), + (0x212, 'M', 'ȓ'), (0x213, 'V'), - (0x214, 'M', u'ȕ'), + (0x214, 'M', 'ȕ'), (0x215, 'V'), - (0x216, 'M', u'ȗ'), + (0x216, 'M', 'ȗ'), (0x217, 'V'), - (0x218, 'M', u'ș'), + (0x218, 'M', 'ș'), (0x219, 'V'), - (0x21A, 'M', u'ț'), + (0x21A, 'M', 'ț'), (0x21B, 'V'), - (0x21C, 'M', u'ȝ'), + (0x21C, 'M', 'ȝ'), (0x21D, 'V'), - (0x21E, 'M', u'ȟ'), + (0x21E, 'M', 'ȟ'), (0x21F, 'V'), - (0x220, 'M', u'ƞ'), + (0x220, 'M', 'ƞ'), (0x221, 'V'), - (0x222, 'M', u'ȣ'), + (0x222, 'M', 'ȣ'), (0x223, 'V'), - (0x224, 'M', u'ȥ'), + (0x224, 'M', 'ȥ'), (0x225, 'V'), - (0x226, 'M', u'ȧ'), + (0x226, 'M', 'ȧ'), (0x227, 'V'), - (0x228, 'M', u'ȩ'), + (0x228, 'M', 'ȩ'), (0x229, 'V'), - (0x22A, 'M', u'ȫ'), + (0x22A, 'M', 'ȫ'), (0x22B, 'V'), - (0x22C, 'M', u'ȭ'), + (0x22C, 'M', 'ȭ'), (0x22D, 'V'), - (0x22E, 'M', u'ȯ'), + (0x22E, 'M', 'ȯ'), (0x22F, 'V'), - (0x230, 'M', u'ȱ'), + (0x230, 'M', 'ȱ'), (0x231, 'V'), - (0x232, 'M', u'ȳ'), + (0x232, 'M', 'ȳ'), (0x233, 'V'), - (0x23A, 'M', u'ⱥ'), - (0x23B, 'M', u'ȼ'), + (0x23A, 'M', 'ⱥ'), + (0x23B, 'M', 'ȼ'), (0x23C, 'V'), - (0x23D, 'M', u'ƚ'), - (0x23E, 'M', u'ⱦ'), + (0x23D, 'M', 'ƚ'), + (0x23E, 'M', 'ⱦ'), (0x23F, 'V'), - (0x241, 'M', u'ɂ'), + (0x241, 'M', 'ɂ'), (0x242, 'V'), - (0x243, 'M', u'ƀ'), - (0x244, 'M', u'ʉ'), - (0x245, 'M', u'ʌ'), - (0x246, 'M', u'ɇ'), + (0x243, 'M', 'ƀ'), + (0x244, 'M', 'ʉ'), + (0x245, 'M', 'ʌ'), + (0x246, 'M', 'ɇ'), (0x247, 'V'), - (0x248, 'M', u'ɉ'), + (0x248, 'M', 'ɉ'), (0x249, 'V'), - (0x24A, 'M', u'ɋ'), + (0x24A, 'M', 'ɋ'), (0x24B, 'V'), - (0x24C, 'M', u'ɍ'), + (0x24C, 'M', 'ɍ'), (0x24D, 'V'), - (0x24E, 'M', u'ɏ'), + (0x24E, 'M', 'ɏ'), (0x24F, 'V'), - (0x2B0, 'M', u'h'), - (0x2B1, 'M', u'ɦ'), - (0x2B2, 'M', u'j'), - (0x2B3, 'M', u'r'), - (0x2B4, 'M', u'ɹ'), - (0x2B5, 'M', u'ɻ'), - (0x2B6, 'M', u'ʁ'), - (0x2B7, 'M', u'w'), - (0x2B8, 'M', u'y'), + (0x2B0, 'M', 'h'), + (0x2B1, 'M', 'ɦ'), + (0x2B2, 'M', 'j'), + (0x2B3, 'M', 'r'), + (0x2B4, 'M', 'ɹ'), + (0x2B5, 'M', 'ɻ'), + (0x2B6, 'M', 'ʁ'), + (0x2B7, 'M', 'w'), + (0x2B8, 'M', 'y'), (0x2B9, 'V'), - (0x2D8, '3', u' ̆'), - (0x2D9, '3', u' ̇'), - (0x2DA, '3', u' ̊'), - (0x2DB, '3', u' ̨'), - (0x2DC, '3', u' ̃'), - (0x2DD, '3', u' ̋'), + (0x2D8, '3', ' ̆'), + (0x2D9, '3', ' ̇'), + (0x2DA, '3', ' ̊'), + (0x2DB, '3', ' ̨'), + (0x2DC, '3', ' ̃'), + (0x2DD, '3', ' ̋'), (0x2DE, 'V'), - (0x2E0, 'M', u'ɣ'), - (0x2E1, 'M', u'l'), - (0x2E2, 'M', u's'), - (0x2E3, 'M', u'x'), - (0x2E4, 'M', u'ʕ'), + (0x2E0, 'M', 'ɣ'), + (0x2E1, 'M', 'l'), + (0x2E2, 'M', 's'), + (0x2E3, 'M', 'x'), + (0x2E4, 'M', 'ʕ'), (0x2E5, 'V'), - (0x340, 'M', u'̀'), - (0x341, 'M', u'́'), + (0x340, 'M', '̀'), + (0x341, 'M', '́'), (0x342, 'V'), - (0x343, 'M', u'̓'), - (0x344, 'M', u'̈́'), - (0x345, 'M', u'ι'), + (0x343, 'M', '̓'), + (0x344, 'M', '̈́'), + (0x345, 'M', 'ι'), (0x346, 'V'), (0x34F, 'I'), (0x350, 'V'), - (0x370, 'M', u'ͱ'), + (0x370, 'M', 'ͱ'), (0x371, 'V'), - (0x372, 'M', u'ͳ'), + (0x372, 'M', 'ͳ'), (0x373, 'V'), - (0x374, 'M', u'ʹ'), + (0x374, 'M', 'ʹ'), (0x375, 'V'), - (0x376, 'M', u'ͷ'), + (0x376, 'M', 'ͷ'), (0x377, 'V'), ] -def _seg_6(): +def _seg_6() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x378, 'X'), - (0x37A, '3', u' ι'), + (0x37A, '3', ' ι'), (0x37B, 'V'), - (0x37E, '3', u';'), - (0x37F, 'M', u'ϳ'), + (0x37E, '3', ';'), + (0x37F, 'M', 'ϳ'), (0x380, 'X'), - (0x384, '3', u' ́'), - (0x385, '3', u' ̈́'), - (0x386, 'M', u'ά'), - (0x387, 'M', u'·'), - (0x388, 'M', u'έ'), - (0x389, 'M', u'ή'), - (0x38A, 'M', u'ί'), + (0x384, '3', ' ́'), + (0x385, '3', ' ̈́'), + (0x386, 'M', 'ά'), + (0x387, 'M', '·'), + (0x388, 'M', 'έ'), + (0x389, 'M', 'ή'), + (0x38A, 'M', 'ί'), (0x38B, 'X'), - (0x38C, 'M', u'ό'), + (0x38C, 'M', 'ό'), (0x38D, 'X'), - (0x38E, 'M', u'ύ'), - (0x38F, 'M', u'ώ'), + (0x38E, 'M', 'ύ'), + (0x38F, 'M', 'ώ'), (0x390, 'V'), - (0x391, 'M', u'α'), - (0x392, 'M', u'β'), - (0x393, 'M', u'γ'), - (0x394, 'M', u'δ'), - (0x395, 'M', u'ε'), - (0x396, 'M', u'ζ'), - (0x397, 'M', u'η'), - (0x398, 'M', u'θ'), - (0x399, 'M', u'ι'), - (0x39A, 'M', u'κ'), - (0x39B, 'M', u'λ'), - (0x39C, 'M', u'μ'), - (0x39D, 'M', u'ν'), - (0x39E, 'M', u'ξ'), - (0x39F, 'M', u'ο'), - (0x3A0, 'M', u'π'), - (0x3A1, 'M', u'ρ'), + (0x391, 'M', 'α'), + (0x392, 'M', 'β'), + (0x393, 'M', 'γ'), + (0x394, 'M', 'δ'), + (0x395, 'M', 'ε'), + (0x396, 'M', 'ζ'), + (0x397, 'M', 'η'), + (0x398, 'M', 'θ'), + (0x399, 'M', 'ι'), + (0x39A, 'M', 'κ'), + (0x39B, 'M', 'λ'), + (0x39C, 'M', 'μ'), + (0x39D, 'M', 'ν'), + (0x39E, 'M', 'ξ'), + (0x39F, 'M', 'ο'), + (0x3A0, 'M', 'π'), + (0x3A1, 'M', 'ρ'), (0x3A2, 'X'), - (0x3A3, 'M', u'σ'), - (0x3A4, 'M', u'τ'), - (0x3A5, 'M', u'υ'), - (0x3A6, 'M', u'φ'), - (0x3A7, 'M', u'χ'), - (0x3A8, 'M', u'ψ'), - (0x3A9, 'M', u'ω'), - (0x3AA, 'M', u'ϊ'), - (0x3AB, 'M', u'ϋ'), + (0x3A3, 'M', 'σ'), + (0x3A4, 'M', 'τ'), + (0x3A5, 'M', 'υ'), + (0x3A6, 'M', 'φ'), + (0x3A7, 'M', 'χ'), + (0x3A8, 'M', 'ψ'), + (0x3A9, 'M', 'ω'), + (0x3AA, 'M', 'ϊ'), + (0x3AB, 'M', 'ϋ'), (0x3AC, 'V'), - (0x3C2, 'D', u'σ'), + (0x3C2, 'D', 'σ'), (0x3C3, 'V'), - (0x3CF, 'M', u'ϗ'), - (0x3D0, 'M', u'β'), - (0x3D1, 'M', u'θ'), - (0x3D2, 'M', u'υ'), - (0x3D3, 'M', u'ύ'), - (0x3D4, 'M', u'ϋ'), - (0x3D5, 'M', u'φ'), - (0x3D6, 'M', u'π'), + (0x3CF, 'M', 'ϗ'), + (0x3D0, 'M', 'β'), + (0x3D1, 'M', 'θ'), + (0x3D2, 'M', 'υ'), + (0x3D3, 'M', 'ύ'), + (0x3D4, 'M', 'ϋ'), + (0x3D5, 'M', 'φ'), + (0x3D6, 'M', 'π'), (0x3D7, 'V'), - (0x3D8, 'M', u'ϙ'), + (0x3D8, 'M', 'ϙ'), (0x3D9, 'V'), - (0x3DA, 'M', u'ϛ'), + (0x3DA, 'M', 'ϛ'), (0x3DB, 'V'), - (0x3DC, 'M', u'ϝ'), + (0x3DC, 'M', 'ϝ'), (0x3DD, 'V'), - (0x3DE, 'M', u'ϟ'), + (0x3DE, 'M', 'ϟ'), (0x3DF, 'V'), - (0x3E0, 'M', u'ϡ'), + (0x3E0, 'M', 'ϡ'), (0x3E1, 'V'), - (0x3E2, 'M', u'ϣ'), + (0x3E2, 'M', 'ϣ'), (0x3E3, 'V'), - (0x3E4, 'M', u'ϥ'), + (0x3E4, 'M', 'ϥ'), (0x3E5, 'V'), - (0x3E6, 'M', u'ϧ'), + (0x3E6, 'M', 'ϧ'), (0x3E7, 'V'), - (0x3E8, 'M', u'ϩ'), + (0x3E8, 'M', 'ϩ'), (0x3E9, 'V'), - (0x3EA, 'M', u'ϫ'), + (0x3EA, 'M', 'ϫ'), (0x3EB, 'V'), - (0x3EC, 'M', u'ϭ'), + (0x3EC, 'M', 'ϭ'), (0x3ED, 'V'), - (0x3EE, 'M', u'ϯ'), + (0x3EE, 'M', 'ϯ'), (0x3EF, 'V'), - (0x3F0, 'M', u'κ'), - (0x3F1, 'M', u'ρ'), - (0x3F2, 'M', u'σ'), + (0x3F0, 'M', 'κ'), + (0x3F1, 'M', 'ρ'), + (0x3F2, 'M', 'σ'), (0x3F3, 'V'), - (0x3F4, 'M', u'θ'), - (0x3F5, 'M', u'ε'), + (0x3F4, 'M', 'θ'), + (0x3F5, 'M', 'ε'), (0x3F6, 'V'), - (0x3F7, 'M', u'ϸ'), + (0x3F7, 'M', 'ϸ'), (0x3F8, 'V'), - (0x3F9, 'M', u'σ'), - (0x3FA, 'M', u'ϻ'), + (0x3F9, 'M', 'σ'), + (0x3FA, 'M', 'ϻ'), (0x3FB, 'V'), - (0x3FD, 'M', u'ͻ'), - (0x3FE, 'M', u'ͼ'), - (0x3FF, 'M', u'ͽ'), - (0x400, 'M', u'ѐ'), - (0x401, 'M', u'ё'), - (0x402, 'M', u'ђ'), + (0x3FD, 'M', 'ͻ'), + (0x3FE, 'M', 'ͼ'), + (0x3FF, 'M', 'ͽ'), + (0x400, 'M', 'ѐ'), + (0x401, 'M', 'ё'), + (0x402, 'M', 'ђ'), ] -def _seg_7(): +def _seg_7() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x403, 'M', u'ѓ'), - (0x404, 'M', u'є'), - (0x405, 'M', u'ѕ'), - (0x406, 'M', u'і'), - (0x407, 'M', u'ї'), - (0x408, 'M', u'ј'), - (0x409, 'M', u'љ'), - (0x40A, 'M', u'њ'), - (0x40B, 'M', u'ћ'), - (0x40C, 'M', u'ќ'), - (0x40D, 'M', u'ѝ'), - (0x40E, 'M', u'ў'), - (0x40F, 'M', u'џ'), - (0x410, 'M', u'а'), - (0x411, 'M', u'б'), - (0x412, 'M', u'в'), - (0x413, 'M', u'г'), - (0x414, 'M', u'д'), - (0x415, 'M', u'е'), - (0x416, 'M', u'ж'), - (0x417, 'M', u'з'), - (0x418, 'M', u'и'), - (0x419, 'M', u'й'), - (0x41A, 'M', u'к'), - (0x41B, 'M', u'л'), - (0x41C, 'M', u'м'), - (0x41D, 'M', u'н'), - (0x41E, 'M', u'о'), - (0x41F, 'M', u'п'), - (0x420, 'M', u'р'), - (0x421, 'M', u'с'), - (0x422, 'M', u'т'), - (0x423, 'M', u'у'), - (0x424, 'M', u'ф'), - (0x425, 'M', u'х'), - (0x426, 'M', u'ц'), - (0x427, 'M', u'ч'), - (0x428, 'M', u'ш'), - (0x429, 'M', u'щ'), - (0x42A, 'M', u'ъ'), - (0x42B, 'M', u'ы'), - (0x42C, 'M', u'ь'), - (0x42D, 'M', u'э'), - (0x42E, 'M', u'ю'), - (0x42F, 'M', u'я'), + (0x403, 'M', 'ѓ'), + (0x404, 'M', 'є'), + (0x405, 'M', 'ѕ'), + (0x406, 'M', 'і'), + (0x407, 'M', 'ї'), + (0x408, 'M', 'ј'), + (0x409, 'M', 'љ'), + (0x40A, 'M', 'њ'), + (0x40B, 'M', 'ћ'), + (0x40C, 'M', 'ќ'), + (0x40D, 'M', 'ѝ'), + (0x40E, 'M', 'ў'), + (0x40F, 'M', 'џ'), + (0x410, 'M', 'а'), + (0x411, 'M', 'б'), + (0x412, 'M', 'в'), + (0x413, 'M', 'г'), + (0x414, 'M', 'д'), + (0x415, 'M', 'е'), + (0x416, 'M', 'ж'), + (0x417, 'M', 'з'), + (0x418, 'M', 'и'), + (0x419, 'M', 'й'), + (0x41A, 'M', 'к'), + (0x41B, 'M', 'л'), + (0x41C, 'M', 'м'), + (0x41D, 'M', 'н'), + (0x41E, 'M', 'о'), + (0x41F, 'M', 'п'), + (0x420, 'M', 'р'), + (0x421, 'M', 'с'), + (0x422, 'M', 'т'), + (0x423, 'M', 'у'), + (0x424, 'M', 'ф'), + (0x425, 'M', 'х'), + (0x426, 'M', 'ц'), + (0x427, 'M', 'ч'), + (0x428, 'M', 'ш'), + (0x429, 'M', 'щ'), + (0x42A, 'M', 'ъ'), + (0x42B, 'M', 'ы'), + (0x42C, 'M', 'ь'), + (0x42D, 'M', 'э'), + (0x42E, 'M', 'ю'), + (0x42F, 'M', 'я'), (0x430, 'V'), - (0x460, 'M', u'ѡ'), + (0x460, 'M', 'ѡ'), (0x461, 'V'), - (0x462, 'M', u'ѣ'), + (0x462, 'M', 'ѣ'), (0x463, 'V'), - (0x464, 'M', u'ѥ'), + (0x464, 'M', 'ѥ'), (0x465, 'V'), - (0x466, 'M', u'ѧ'), + (0x466, 'M', 'ѧ'), (0x467, 'V'), - (0x468, 'M', u'ѩ'), + (0x468, 'M', 'ѩ'), (0x469, 'V'), - (0x46A, 'M', u'ѫ'), + (0x46A, 'M', 'ѫ'), (0x46B, 'V'), - (0x46C, 'M', u'ѭ'), + (0x46C, 'M', 'ѭ'), (0x46D, 'V'), - (0x46E, 'M', u'ѯ'), + (0x46E, 'M', 'ѯ'), (0x46F, 'V'), - (0x470, 'M', u'ѱ'), + (0x470, 'M', 'ѱ'), (0x471, 'V'), - (0x472, 'M', u'ѳ'), + (0x472, 'M', 'ѳ'), (0x473, 'V'), - (0x474, 'M', u'ѵ'), + (0x474, 'M', 'ѵ'), (0x475, 'V'), - (0x476, 'M', u'ѷ'), + (0x476, 'M', 'ѷ'), (0x477, 'V'), - (0x478, 'M', u'ѹ'), + (0x478, 'M', 'ѹ'), (0x479, 'V'), - (0x47A, 'M', u'ѻ'), + (0x47A, 'M', 'ѻ'), (0x47B, 'V'), - (0x47C, 'M', u'ѽ'), + (0x47C, 'M', 'ѽ'), (0x47D, 'V'), - (0x47E, 'M', u'ѿ'), + (0x47E, 'M', 'ѿ'), (0x47F, 'V'), - (0x480, 'M', u'ҁ'), + (0x480, 'M', 'ҁ'), (0x481, 'V'), - (0x48A, 'M', u'ҋ'), + (0x48A, 'M', 'ҋ'), (0x48B, 'V'), - (0x48C, 'M', u'ҍ'), + (0x48C, 'M', 'ҍ'), (0x48D, 'V'), - (0x48E, 'M', u'ҏ'), + (0x48E, 'M', 'ҏ'), (0x48F, 'V'), - (0x490, 'M', u'ґ'), + (0x490, 'M', 'ґ'), (0x491, 'V'), - (0x492, 'M', u'ғ'), + (0x492, 'M', 'ғ'), (0x493, 'V'), - (0x494, 'M', u'ҕ'), + (0x494, 'M', 'ҕ'), (0x495, 'V'), - (0x496, 'M', u'җ'), + (0x496, 'M', 'җ'), (0x497, 'V'), - (0x498, 'M', u'ҙ'), + (0x498, 'M', 'ҙ'), (0x499, 'V'), - (0x49A, 'M', u'қ'), + (0x49A, 'M', 'қ'), (0x49B, 'V'), - (0x49C, 'M', u'ҝ'), + (0x49C, 'M', 'ҝ'), (0x49D, 'V'), ] -def _seg_8(): +def _seg_8() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x49E, 'M', u'ҟ'), + (0x49E, 'M', 'ҟ'), (0x49F, 'V'), - (0x4A0, 'M', u'ҡ'), + (0x4A0, 'M', 'ҡ'), (0x4A1, 'V'), - (0x4A2, 'M', u'ң'), + (0x4A2, 'M', 'ң'), (0x4A3, 'V'), - (0x4A4, 'M', u'ҥ'), + (0x4A4, 'M', 'ҥ'), (0x4A5, 'V'), - (0x4A6, 'M', u'ҧ'), + (0x4A6, 'M', 'ҧ'), (0x4A7, 'V'), - (0x4A8, 'M', u'ҩ'), + (0x4A8, 'M', 'ҩ'), (0x4A9, 'V'), - (0x4AA, 'M', u'ҫ'), + (0x4AA, 'M', 'ҫ'), (0x4AB, 'V'), - (0x4AC, 'M', u'ҭ'), + (0x4AC, 'M', 'ҭ'), (0x4AD, 'V'), - (0x4AE, 'M', u'ү'), + (0x4AE, 'M', 'ү'), (0x4AF, 'V'), - (0x4B0, 'M', u'ұ'), + (0x4B0, 'M', 'ұ'), (0x4B1, 'V'), - (0x4B2, 'M', u'ҳ'), + (0x4B2, 'M', 'ҳ'), (0x4B3, 'V'), - (0x4B4, 'M', u'ҵ'), + (0x4B4, 'M', 'ҵ'), (0x4B5, 'V'), - (0x4B6, 'M', u'ҷ'), + (0x4B6, 'M', 'ҷ'), (0x4B7, 'V'), - (0x4B8, 'M', u'ҹ'), + (0x4B8, 'M', 'ҹ'), (0x4B9, 'V'), - (0x4BA, 'M', u'һ'), + (0x4BA, 'M', 'һ'), (0x4BB, 'V'), - (0x4BC, 'M', u'ҽ'), + (0x4BC, 'M', 'ҽ'), (0x4BD, 'V'), - (0x4BE, 'M', u'ҿ'), + (0x4BE, 'M', 'ҿ'), (0x4BF, 'V'), (0x4C0, 'X'), - (0x4C1, 'M', u'ӂ'), + (0x4C1, 'M', 'ӂ'), (0x4C2, 'V'), - (0x4C3, 'M', u'ӄ'), + (0x4C3, 'M', 'ӄ'), (0x4C4, 'V'), - (0x4C5, 'M', u'ӆ'), + (0x4C5, 'M', 'ӆ'), (0x4C6, 'V'), - (0x4C7, 'M', u'ӈ'), + (0x4C7, 'M', 'ӈ'), (0x4C8, 'V'), - (0x4C9, 'M', u'ӊ'), + (0x4C9, 'M', 'ӊ'), (0x4CA, 'V'), - (0x4CB, 'M', u'ӌ'), + (0x4CB, 'M', 'ӌ'), (0x4CC, 'V'), - (0x4CD, 'M', u'ӎ'), + (0x4CD, 'M', 'ӎ'), (0x4CE, 'V'), - (0x4D0, 'M', u'ӑ'), + (0x4D0, 'M', 'ӑ'), (0x4D1, 'V'), - (0x4D2, 'M', u'ӓ'), + (0x4D2, 'M', 'ӓ'), (0x4D3, 'V'), - (0x4D4, 'M', u'ӕ'), + (0x4D4, 'M', 'ӕ'), (0x4D5, 'V'), - (0x4D6, 'M', u'ӗ'), + (0x4D6, 'M', 'ӗ'), (0x4D7, 'V'), - (0x4D8, 'M', u'ә'), + (0x4D8, 'M', 'ә'), (0x4D9, 'V'), - (0x4DA, 'M', u'ӛ'), + (0x4DA, 'M', 'ӛ'), (0x4DB, 'V'), - (0x4DC, 'M', u'ӝ'), + (0x4DC, 'M', 'ӝ'), (0x4DD, 'V'), - (0x4DE, 'M', u'ӟ'), + (0x4DE, 'M', 'ӟ'), (0x4DF, 'V'), - (0x4E0, 'M', u'ӡ'), + (0x4E0, 'M', 'ӡ'), (0x4E1, 'V'), - (0x4E2, 'M', u'ӣ'), + (0x4E2, 'M', 'ӣ'), (0x4E3, 'V'), - (0x4E4, 'M', u'ӥ'), + (0x4E4, 'M', 'ӥ'), (0x4E5, 'V'), - (0x4E6, 'M', u'ӧ'), + (0x4E6, 'M', 'ӧ'), (0x4E7, 'V'), - (0x4E8, 'M', u'ө'), + (0x4E8, 'M', 'ө'), (0x4E9, 'V'), - (0x4EA, 'M', u'ӫ'), + (0x4EA, 'M', 'ӫ'), (0x4EB, 'V'), - (0x4EC, 'M', u'ӭ'), + (0x4EC, 'M', 'ӭ'), (0x4ED, 'V'), - (0x4EE, 'M', u'ӯ'), + (0x4EE, 'M', 'ӯ'), (0x4EF, 'V'), - (0x4F0, 'M', u'ӱ'), + (0x4F0, 'M', 'ӱ'), (0x4F1, 'V'), - (0x4F2, 'M', u'ӳ'), + (0x4F2, 'M', 'ӳ'), (0x4F3, 'V'), - (0x4F4, 'M', u'ӵ'), + (0x4F4, 'M', 'ӵ'), (0x4F5, 'V'), - (0x4F6, 'M', u'ӷ'), + (0x4F6, 'M', 'ӷ'), (0x4F7, 'V'), - (0x4F8, 'M', u'ӹ'), + (0x4F8, 'M', 'ӹ'), (0x4F9, 'V'), - (0x4FA, 'M', u'ӻ'), + (0x4FA, 'M', 'ӻ'), (0x4FB, 'V'), - (0x4FC, 'M', u'ӽ'), + (0x4FC, 'M', 'ӽ'), (0x4FD, 'V'), - (0x4FE, 'M', u'ӿ'), + (0x4FE, 'M', 'ӿ'), (0x4FF, 'V'), - (0x500, 'M', u'ԁ'), + (0x500, 'M', 'ԁ'), (0x501, 'V'), - (0x502, 'M', u'ԃ'), + (0x502, 'M', 'ԃ'), ] -def _seg_9(): +def _seg_9() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x503, 'V'), - (0x504, 'M', u'ԅ'), + (0x504, 'M', 'ԅ'), (0x505, 'V'), - (0x506, 'M', u'ԇ'), + (0x506, 'M', 'ԇ'), (0x507, 'V'), - (0x508, 'M', u'ԉ'), + (0x508, 'M', 'ԉ'), (0x509, 'V'), - (0x50A, 'M', u'ԋ'), + (0x50A, 'M', 'ԋ'), (0x50B, 'V'), - (0x50C, 'M', u'ԍ'), + (0x50C, 'M', 'ԍ'), (0x50D, 'V'), - (0x50E, 'M', u'ԏ'), + (0x50E, 'M', 'ԏ'), (0x50F, 'V'), - (0x510, 'M', u'ԑ'), + (0x510, 'M', 'ԑ'), (0x511, 'V'), - (0x512, 'M', u'ԓ'), + (0x512, 'M', 'ԓ'), (0x513, 'V'), - (0x514, 'M', u'ԕ'), + (0x514, 'M', 'ԕ'), (0x515, 'V'), - (0x516, 'M', u'ԗ'), + (0x516, 'M', 'ԗ'), (0x517, 'V'), - (0x518, 'M', u'ԙ'), + (0x518, 'M', 'ԙ'), (0x519, 'V'), - (0x51A, 'M', u'ԛ'), + (0x51A, 'M', 'ԛ'), (0x51B, 'V'), - (0x51C, 'M', u'ԝ'), + (0x51C, 'M', 'ԝ'), (0x51D, 'V'), - (0x51E, 'M', u'ԟ'), + (0x51E, 'M', 'ԟ'), (0x51F, 'V'), - (0x520, 'M', u'ԡ'), + (0x520, 'M', 'ԡ'), (0x521, 'V'), - (0x522, 'M', u'ԣ'), + (0x522, 'M', 'ԣ'), (0x523, 'V'), - (0x524, 'M', u'ԥ'), + (0x524, 'M', 'ԥ'), (0x525, 'V'), - (0x526, 'M', u'ԧ'), + (0x526, 'M', 'ԧ'), (0x527, 'V'), - (0x528, 'M', u'ԩ'), + (0x528, 'M', 'ԩ'), (0x529, 'V'), - (0x52A, 'M', u'ԫ'), + (0x52A, 'M', 'ԫ'), (0x52B, 'V'), - (0x52C, 'M', u'ԭ'), + (0x52C, 'M', 'ԭ'), (0x52D, 'V'), - (0x52E, 'M', u'ԯ'), + (0x52E, 'M', 'ԯ'), (0x52F, 'V'), (0x530, 'X'), - (0x531, 'M', u'ա'), - (0x532, 'M', u'բ'), - (0x533, 'M', u'գ'), - (0x534, 'M', u'դ'), - (0x535, 'M', u'ե'), - (0x536, 'M', u'զ'), - (0x537, 'M', u'է'), - (0x538, 'M', u'ը'), - (0x539, 'M', u'թ'), - (0x53A, 'M', u'ժ'), - (0x53B, 'M', u'ի'), - (0x53C, 'M', u'լ'), - (0x53D, 'M', u'խ'), - (0x53E, 'M', u'ծ'), - (0x53F, 'M', u'կ'), - (0x540, 'M', u'հ'), - (0x541, 'M', u'ձ'), - (0x542, 'M', u'ղ'), - (0x543, 'M', u'ճ'), - (0x544, 'M', u'մ'), - (0x545, 'M', u'յ'), - (0x546, 'M', u'ն'), - (0x547, 'M', u'շ'), - (0x548, 'M', u'ո'), - (0x549, 'M', u'չ'), - (0x54A, 'M', u'պ'), - (0x54B, 'M', u'ջ'), - (0x54C, 'M', u'ռ'), - (0x54D, 'M', u'ս'), - (0x54E, 'M', u'վ'), - (0x54F, 'M', u'տ'), - (0x550, 'M', u'ր'), - (0x551, 'M', u'ց'), - (0x552, 'M', u'ւ'), - (0x553, 'M', u'փ'), - (0x554, 'M', u'ք'), - (0x555, 'M', u'օ'), - (0x556, 'M', u'ֆ'), + (0x531, 'M', 'ա'), + (0x532, 'M', 'բ'), + (0x533, 'M', 'գ'), + (0x534, 'M', 'դ'), + (0x535, 'M', 'ե'), + (0x536, 'M', 'զ'), + (0x537, 'M', 'է'), + (0x538, 'M', 'ը'), + (0x539, 'M', 'թ'), + (0x53A, 'M', 'ժ'), + (0x53B, 'M', 'ի'), + (0x53C, 'M', 'լ'), + (0x53D, 'M', 'խ'), + (0x53E, 'M', 'ծ'), + (0x53F, 'M', 'կ'), + (0x540, 'M', 'հ'), + (0x541, 'M', 'ձ'), + (0x542, 'M', 'ղ'), + (0x543, 'M', 'ճ'), + (0x544, 'M', 'մ'), + (0x545, 'M', 'յ'), + (0x546, 'M', 'ն'), + (0x547, 'M', 'շ'), + (0x548, 'M', 'ո'), + (0x549, 'M', 'չ'), + (0x54A, 'M', 'պ'), + (0x54B, 'M', 'ջ'), + (0x54C, 'M', 'ռ'), + (0x54D, 'M', 'ս'), + (0x54E, 'M', 'վ'), + (0x54F, 'M', 'տ'), + (0x550, 'M', 'ր'), + (0x551, 'M', 'ց'), + (0x552, 'M', 'ւ'), + (0x553, 'M', 'փ'), + (0x554, 'M', 'ք'), + (0x555, 'M', 'օ'), + (0x556, 'M', 'ֆ'), (0x557, 'X'), (0x559, 'V'), - (0x587, 'M', u'եւ'), + (0x587, 'M', 'եւ'), (0x588, 'V'), (0x58B, 'X'), (0x58D, 'V'), @@ -1042,15 +1045,15 @@ def _seg_9(): (0x5F5, 'X'), (0x606, 'V'), (0x61C, 'X'), - (0x61E, 'V'), + (0x61D, 'V'), ] -def _seg_10(): +def _seg_10() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x675, 'M', u'اٴ'), - (0x676, 'M', u'وٴ'), - (0x677, 'M', u'ۇٴ'), - (0x678, 'M', u'يٴ'), + (0x675, 'M', 'اٴ'), + (0x676, 'M', 'وٴ'), + (0x677, 'M', 'ۇٴ'), + (0x678, 'M', 'يٴ'), (0x679, 'V'), (0x6DD, 'X'), (0x6DE, 'V'), @@ -1071,21 +1074,19 @@ def _seg_10(): (0x85F, 'X'), (0x860, 'V'), (0x86B, 'X'), - (0x8A0, 'V'), - (0x8B5, 'X'), - (0x8B6, 'V'), - (0x8C8, 'X'), - (0x8D3, 'V'), + (0x870, 'V'), + (0x88F, 'X'), + (0x898, 'V'), (0x8E2, 'X'), (0x8E3, 'V'), - (0x958, 'M', u'क़'), - (0x959, 'M', u'ख़'), - (0x95A, 'M', u'ग़'), - (0x95B, 'M', u'ज़'), - (0x95C, 'M', u'ड़'), - (0x95D, 'M', u'ढ़'), - (0x95E, 'M', u'फ़'), - (0x95F, 'M', u'य़'), + (0x958, 'M', 'क़'), + (0x959, 'M', 'ख़'), + (0x95A, 'M', 'ग़'), + (0x95B, 'M', 'ज़'), + (0x95C, 'M', 'ड़'), + (0x95D, 'M', 'ढ़'), + (0x95E, 'M', 'फ़'), + (0x95F, 'M', 'य़'), (0x960, 'V'), (0x984, 'X'), (0x985, 'V'), @@ -1108,10 +1109,10 @@ def _seg_10(): (0x9CF, 'X'), (0x9D7, 'V'), (0x9D8, 'X'), - (0x9DC, 'M', u'ড়'), - (0x9DD, 'M', u'ঢ়'), + (0x9DC, 'M', 'ড়'), + (0x9DD, 'M', 'ঢ়'), (0x9DE, 'X'), - (0x9DF, 'M', u'য়'), + (0x9DF, 'M', 'য়'), (0x9E0, 'V'), (0x9E4, 'X'), (0x9E6, 'V'), @@ -1127,10 +1128,10 @@ def _seg_10(): (0xA2A, 'V'), (0xA31, 'X'), (0xA32, 'V'), - (0xA33, 'M', u'ਲ਼'), + (0xA33, 'M', 'ਲ਼'), (0xA34, 'X'), (0xA35, 'V'), - (0xA36, 'M', u'ਸ਼'), + (0xA36, 'M', 'ਸ਼'), (0xA37, 'X'), (0xA38, 'V'), (0xA3A, 'X'), @@ -1144,16 +1145,16 @@ def _seg_10(): (0xA4E, 'X'), (0xA51, 'V'), (0xA52, 'X'), - (0xA59, 'M', u'ਖ਼'), - (0xA5A, 'M', u'ਗ਼'), - (0xA5B, 'M', u'ਜ਼'), - ] - -def _seg_11(): - return [ + (0xA59, 'M', 'ਖ਼'), + (0xA5A, 'M', 'ਗ਼'), + (0xA5B, 'M', 'ਜ਼'), (0xA5C, 'V'), (0xA5D, 'X'), - (0xA5E, 'M', u'ਫ਼'), + ] + +def _seg_11() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA5E, 'M', 'ਫ਼'), (0xA5F, 'X'), (0xA66, 'V'), (0xA77, 'X'), @@ -1207,8 +1208,8 @@ def _seg_11(): (0xB4E, 'X'), (0xB55, 'V'), (0xB58, 'X'), - (0xB5C, 'M', u'ଡ଼'), - (0xB5D, 'M', u'ଢ଼'), + (0xB5C, 'M', 'ଡ଼'), + (0xB5D, 'M', 'ଢ଼'), (0xB5E, 'X'), (0xB5F, 'V'), (0xB64, 'X'), @@ -1251,14 +1252,14 @@ def _seg_11(): (0xC0E, 'V'), (0xC11, 'X'), (0xC12, 'V'), - ] - -def _seg_12(): - return [ (0xC29, 'X'), (0xC2A, 'V'), + ] + +def _seg_12() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0xC3A, 'X'), - (0xC3D, 'V'), + (0xC3C, 'V'), (0xC45, 'X'), (0xC46, 'V'), (0xC49, 'X'), @@ -1268,6 +1269,8 @@ def _seg_12(): (0xC57, 'X'), (0xC58, 'V'), (0xC5B, 'X'), + (0xC5D, 'V'), + (0xC5E, 'X'), (0xC60, 'V'), (0xC64, 'X'), (0xC66, 'V'), @@ -1290,14 +1293,14 @@ def _seg_12(): (0xCCE, 'X'), (0xCD5, 'V'), (0xCD7, 'X'), - (0xCDE, 'V'), + (0xCDD, 'V'), (0xCDF, 'X'), (0xCE0, 'V'), (0xCE4, 'X'), (0xCE6, 'V'), (0xCF0, 'X'), (0xCF1, 'V'), - (0xCF3, 'X'), + (0xCF4, 'X'), (0xD00, 'V'), (0xD0D, 'X'), (0xD0E, 'V'), @@ -1337,7 +1340,7 @@ def _seg_12(): (0xDF2, 'V'), (0xDF5, 'X'), (0xE01, 'V'), - (0xE33, 'M', u'ํา'), + (0xE33, 'M', 'ํา'), (0xE34, 'V'), (0xE3B, 'X'), (0xE3F, 'V'), @@ -1353,11 +1356,11 @@ def _seg_12(): (0xEA5, 'V'), (0xEA6, 'X'), (0xEA7, 'V'), - (0xEB3, 'M', u'ໍາ'), + (0xEB3, 'M', 'ໍາ'), (0xEB4, 'V'), ] -def _seg_13(): +def _seg_13() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0xEBE, 'X'), (0xEC0, 'V'), @@ -1365,55 +1368,55 @@ def _seg_13(): (0xEC6, 'V'), (0xEC7, 'X'), (0xEC8, 'V'), - (0xECE, 'X'), + (0xECF, 'X'), (0xED0, 'V'), (0xEDA, 'X'), - (0xEDC, 'M', u'ຫນ'), - (0xEDD, 'M', u'ຫມ'), + (0xEDC, 'M', 'ຫນ'), + (0xEDD, 'M', 'ຫມ'), (0xEDE, 'V'), (0xEE0, 'X'), (0xF00, 'V'), - (0xF0C, 'M', u'་'), + (0xF0C, 'M', '་'), (0xF0D, 'V'), - (0xF43, 'M', u'གྷ'), + (0xF43, 'M', 'གྷ'), (0xF44, 'V'), (0xF48, 'X'), (0xF49, 'V'), - (0xF4D, 'M', u'ཌྷ'), + (0xF4D, 'M', 'ཌྷ'), (0xF4E, 'V'), - (0xF52, 'M', u'དྷ'), + (0xF52, 'M', 'དྷ'), (0xF53, 'V'), - (0xF57, 'M', u'བྷ'), + (0xF57, 'M', 'བྷ'), (0xF58, 'V'), - (0xF5C, 'M', u'ཛྷ'), + (0xF5C, 'M', 'ཛྷ'), (0xF5D, 'V'), - (0xF69, 'M', u'ཀྵ'), + (0xF69, 'M', 'ཀྵ'), (0xF6A, 'V'), (0xF6D, 'X'), (0xF71, 'V'), - (0xF73, 'M', u'ཱི'), + (0xF73, 'M', 'ཱི'), (0xF74, 'V'), - (0xF75, 'M', u'ཱུ'), - (0xF76, 'M', u'ྲྀ'), - (0xF77, 'M', u'ྲཱྀ'), - (0xF78, 'M', u'ླྀ'), - (0xF79, 'M', u'ླཱྀ'), + (0xF75, 'M', 'ཱུ'), + (0xF76, 'M', 'ྲྀ'), + (0xF77, 'M', 'ྲཱྀ'), + (0xF78, 'M', 'ླྀ'), + (0xF79, 'M', 'ླཱྀ'), (0xF7A, 'V'), - (0xF81, 'M', u'ཱྀ'), + (0xF81, 'M', 'ཱྀ'), (0xF82, 'V'), - (0xF93, 'M', u'ྒྷ'), + (0xF93, 'M', 'ྒྷ'), (0xF94, 'V'), (0xF98, 'X'), (0xF99, 'V'), - (0xF9D, 'M', u'ྜྷ'), + (0xF9D, 'M', 'ྜྷ'), (0xF9E, 'V'), - (0xFA2, 'M', u'ྡྷ'), + (0xFA2, 'M', 'ྡྷ'), (0xFA3, 'V'), - (0xFA7, 'M', u'ྦྷ'), + (0xFA7, 'M', 'ྦྷ'), (0xFA8, 'V'), - (0xFAC, 'M', u'ྫྷ'), + (0xFAC, 'M', 'ྫྷ'), (0xFAD, 'V'), - (0xFB9, 'M', u'ྐྵ'), + (0xFB9, 'M', 'ྐྵ'), (0xFBA, 'V'), (0xFBD, 'X'), (0xFBE, 'V'), @@ -1422,12 +1425,12 @@ def _seg_13(): (0xFDB, 'X'), (0x1000, 'V'), (0x10A0, 'X'), - (0x10C7, 'M', u'ⴧ'), + (0x10C7, 'M', 'ⴧ'), (0x10C8, 'X'), - (0x10CD, 'M', u'ⴭ'), + (0x10CD, 'M', 'ⴭ'), (0x10CE, 'X'), (0x10D0, 'V'), - (0x10FC, 'M', u'ნ'), + (0x10FC, 'M', 'ნ'), (0x10FD, 'V'), (0x115F, 'X'), (0x1161, 'V'), @@ -1461,7 +1464,7 @@ def _seg_13(): (0x1312, 'V'), ] -def _seg_14(): +def _seg_14() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x1316, 'X'), (0x1318, 'V'), @@ -1472,12 +1475,12 @@ def _seg_14(): (0x139A, 'X'), (0x13A0, 'V'), (0x13F6, 'X'), - (0x13F8, 'M', u'Ᏸ'), - (0x13F9, 'M', u'Ᏹ'), - (0x13FA, 'M', u'Ᏺ'), - (0x13FB, 'M', u'Ᏻ'), - (0x13FC, 'M', u'Ᏼ'), - (0x13FD, 'M', u'Ᏽ'), + (0x13F8, 'M', 'Ᏸ'), + (0x13F9, 'M', 'Ᏹ'), + (0x13FA, 'M', 'Ᏺ'), + (0x13FB, 'M', 'Ᏻ'), + (0x13FC, 'M', 'Ᏼ'), + (0x13FD, 'M', 'Ᏽ'), (0x13FE, 'X'), (0x1400, 'V'), (0x1680, 'X'), @@ -1486,10 +1489,8 @@ def _seg_14(): (0x16A0, 'V'), (0x16F9, 'X'), (0x1700, 'V'), - (0x170D, 'X'), - (0x170E, 'V'), - (0x1715, 'X'), - (0x1720, 'V'), + (0x1716, 'X'), + (0x171F, 'V'), (0x1737, 'X'), (0x1740, 'V'), (0x1754, 'X'), @@ -1512,6 +1513,7 @@ def _seg_14(): (0x1807, 'V'), (0x180B, 'I'), (0x180E, 'X'), + (0x180F, 'I'), (0x1810, 'V'), (0x181A, 'X'), (0x1820, 'V'), @@ -1551,11 +1553,11 @@ def _seg_14(): (0x1AA0, 'V'), (0x1AAE, 'X'), (0x1AB0, 'V'), - (0x1AC1, 'X'), + (0x1ACF, 'X'), (0x1B00, 'V'), - (0x1B4C, 'X'), + (0x1B4D, 'X'), (0x1B50, 'V'), - (0x1B7D, 'X'), + (0x1B7F, 'X'), (0x1B80, 'V'), (0x1BF4, 'X'), (0x1BFC, 'V'), @@ -1563,1196 +1565,1193 @@ def _seg_14(): (0x1C3B, 'V'), (0x1C4A, 'X'), (0x1C4D, 'V'), + (0x1C80, 'M', 'в'), ] -def _seg_15(): +def _seg_15() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1C80, 'M', u'в'), - (0x1C81, 'M', u'д'), - (0x1C82, 'M', u'о'), - (0x1C83, 'M', u'с'), - (0x1C84, 'M', u'т'), - (0x1C86, 'M', u'ъ'), - (0x1C87, 'M', u'ѣ'), - (0x1C88, 'M', u'ꙋ'), + (0x1C81, 'M', 'д'), + (0x1C82, 'M', 'о'), + (0x1C83, 'M', 'с'), + (0x1C84, 'M', 'т'), + (0x1C86, 'M', 'ъ'), + (0x1C87, 'M', 'ѣ'), + (0x1C88, 'M', 'ꙋ'), (0x1C89, 'X'), - (0x1C90, 'M', u'ა'), - (0x1C91, 'M', u'ბ'), - (0x1C92, 'M', u'გ'), - (0x1C93, 'M', u'დ'), - (0x1C94, 'M', u'ე'), - (0x1C95, 'M', u'ვ'), - (0x1C96, 'M', u'ზ'), - (0x1C97, 'M', u'თ'), - (0x1C98, 'M', u'ი'), - (0x1C99, 'M', u'კ'), - (0x1C9A, 'M', u'ლ'), - (0x1C9B, 'M', u'მ'), - (0x1C9C, 'M', u'ნ'), - (0x1C9D, 'M', u'ო'), - (0x1C9E, 'M', u'პ'), - (0x1C9F, 'M', u'ჟ'), - (0x1CA0, 'M', u'რ'), - (0x1CA1, 'M', u'ს'), - (0x1CA2, 'M', u'ტ'), - (0x1CA3, 'M', u'უ'), - (0x1CA4, 'M', u'ფ'), - (0x1CA5, 'M', u'ქ'), - (0x1CA6, 'M', u'ღ'), - (0x1CA7, 'M', u'ყ'), - (0x1CA8, 'M', u'შ'), - (0x1CA9, 'M', u'ჩ'), - (0x1CAA, 'M', u'ც'), - (0x1CAB, 'M', u'ძ'), - (0x1CAC, 'M', u'წ'), - (0x1CAD, 'M', u'ჭ'), - (0x1CAE, 'M', u'ხ'), - (0x1CAF, 'M', u'ჯ'), - (0x1CB0, 'M', u'ჰ'), - (0x1CB1, 'M', u'ჱ'), - (0x1CB2, 'M', u'ჲ'), - (0x1CB3, 'M', u'ჳ'), - (0x1CB4, 'M', u'ჴ'), - (0x1CB5, 'M', u'ჵ'), - (0x1CB6, 'M', u'ჶ'), - (0x1CB7, 'M', u'ჷ'), - (0x1CB8, 'M', u'ჸ'), - (0x1CB9, 'M', u'ჹ'), - (0x1CBA, 'M', u'ჺ'), + (0x1C90, 'M', 'ა'), + (0x1C91, 'M', 'ბ'), + (0x1C92, 'M', 'გ'), + (0x1C93, 'M', 'დ'), + (0x1C94, 'M', 'ე'), + (0x1C95, 'M', 'ვ'), + (0x1C96, 'M', 'ზ'), + (0x1C97, 'M', 'თ'), + (0x1C98, 'M', 'ი'), + (0x1C99, 'M', 'კ'), + (0x1C9A, 'M', 'ლ'), + (0x1C9B, 'M', 'მ'), + (0x1C9C, 'M', 'ნ'), + (0x1C9D, 'M', 'ო'), + (0x1C9E, 'M', 'პ'), + (0x1C9F, 'M', 'ჟ'), + (0x1CA0, 'M', 'რ'), + (0x1CA1, 'M', 'ს'), + (0x1CA2, 'M', 'ტ'), + (0x1CA3, 'M', 'უ'), + (0x1CA4, 'M', 'ფ'), + (0x1CA5, 'M', 'ქ'), + (0x1CA6, 'M', 'ღ'), + (0x1CA7, 'M', 'ყ'), + (0x1CA8, 'M', 'შ'), + (0x1CA9, 'M', 'ჩ'), + (0x1CAA, 'M', 'ც'), + (0x1CAB, 'M', 'ძ'), + (0x1CAC, 'M', 'წ'), + (0x1CAD, 'M', 'ჭ'), + (0x1CAE, 'M', 'ხ'), + (0x1CAF, 'M', 'ჯ'), + (0x1CB0, 'M', 'ჰ'), + (0x1CB1, 'M', 'ჱ'), + (0x1CB2, 'M', 'ჲ'), + (0x1CB3, 'M', 'ჳ'), + (0x1CB4, 'M', 'ჴ'), + (0x1CB5, 'M', 'ჵ'), + (0x1CB6, 'M', 'ჶ'), + (0x1CB7, 'M', 'ჷ'), + (0x1CB8, 'M', 'ჸ'), + (0x1CB9, 'M', 'ჹ'), + (0x1CBA, 'M', 'ჺ'), (0x1CBB, 'X'), - (0x1CBD, 'M', u'ჽ'), - (0x1CBE, 'M', u'ჾ'), - (0x1CBF, 'M', u'ჿ'), + (0x1CBD, 'M', 'ჽ'), + (0x1CBE, 'M', 'ჾ'), + (0x1CBF, 'M', 'ჿ'), (0x1CC0, 'V'), (0x1CC8, 'X'), (0x1CD0, 'V'), (0x1CFB, 'X'), (0x1D00, 'V'), - (0x1D2C, 'M', u'a'), - (0x1D2D, 'M', u'æ'), - (0x1D2E, 'M', u'b'), + (0x1D2C, 'M', 'a'), + (0x1D2D, 'M', 'æ'), + (0x1D2E, 'M', 'b'), (0x1D2F, 'V'), - (0x1D30, 'M', u'd'), - (0x1D31, 'M', u'e'), - (0x1D32, 'M', u'ǝ'), - (0x1D33, 'M', u'g'), - (0x1D34, 'M', u'h'), - (0x1D35, 'M', u'i'), - (0x1D36, 'M', u'j'), - (0x1D37, 'M', u'k'), - (0x1D38, 'M', u'l'), - (0x1D39, 'M', u'm'), - (0x1D3A, 'M', u'n'), + (0x1D30, 'M', 'd'), + (0x1D31, 'M', 'e'), + (0x1D32, 'M', 'ǝ'), + (0x1D33, 'M', 'g'), + (0x1D34, 'M', 'h'), + (0x1D35, 'M', 'i'), + (0x1D36, 'M', 'j'), + (0x1D37, 'M', 'k'), + (0x1D38, 'M', 'l'), + (0x1D39, 'M', 'm'), + (0x1D3A, 'M', 'n'), (0x1D3B, 'V'), - (0x1D3C, 'M', u'o'), - (0x1D3D, 'M', u'ȣ'), - (0x1D3E, 'M', u'p'), - (0x1D3F, 'M', u'r'), - (0x1D40, 'M', u't'), - (0x1D41, 'M', u'u'), - (0x1D42, 'M', u'w'), - (0x1D43, 'M', u'a'), - (0x1D44, 'M', u'ɐ'), - (0x1D45, 'M', u'ɑ'), - (0x1D46, 'M', u'ᴂ'), - (0x1D47, 'M', u'b'), - (0x1D48, 'M', u'd'), - (0x1D49, 'M', u'e'), - (0x1D4A, 'M', u'ə'), - (0x1D4B, 'M', u'ɛ'), - (0x1D4C, 'M', u'ɜ'), - (0x1D4D, 'M', u'g'), + (0x1D3C, 'M', 'o'), + (0x1D3D, 'M', 'ȣ'), + (0x1D3E, 'M', 'p'), + (0x1D3F, 'M', 'r'), + (0x1D40, 'M', 't'), + (0x1D41, 'M', 'u'), + (0x1D42, 'M', 'w'), + (0x1D43, 'M', 'a'), + (0x1D44, 'M', 'ɐ'), + (0x1D45, 'M', 'ɑ'), + (0x1D46, 'M', 'ᴂ'), + (0x1D47, 'M', 'b'), + (0x1D48, 'M', 'd'), + (0x1D49, 'M', 'e'), + (0x1D4A, 'M', 'ə'), + (0x1D4B, 'M', 'ɛ'), + (0x1D4C, 'M', 'ɜ'), + (0x1D4D, 'M', 'g'), (0x1D4E, 'V'), - (0x1D4F, 'M', u'k'), - (0x1D50, 'M', u'm'), - (0x1D51, 'M', u'ŋ'), - (0x1D52, 'M', u'o'), + (0x1D4F, 'M', 'k'), + (0x1D50, 'M', 'm'), + (0x1D51, 'M', 'ŋ'), + (0x1D52, 'M', 'o'), + (0x1D53, 'M', 'ɔ'), ] -def _seg_16(): +def _seg_16() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D53, 'M', u'ɔ'), - (0x1D54, 'M', u'ᴖ'), - (0x1D55, 'M', u'ᴗ'), - (0x1D56, 'M', u'p'), - (0x1D57, 'M', u't'), - (0x1D58, 'M', u'u'), - (0x1D59, 'M', u'ᴝ'), - (0x1D5A, 'M', u'ɯ'), - (0x1D5B, 'M', u'v'), - (0x1D5C, 'M', u'ᴥ'), - (0x1D5D, 'M', u'β'), - (0x1D5E, 'M', u'γ'), - (0x1D5F, 'M', u'δ'), - (0x1D60, 'M', u'φ'), - (0x1D61, 'M', u'χ'), - (0x1D62, 'M', u'i'), - (0x1D63, 'M', u'r'), - (0x1D64, 'M', u'u'), - (0x1D65, 'M', u'v'), - (0x1D66, 'M', u'β'), - (0x1D67, 'M', u'γ'), - (0x1D68, 'M', u'ρ'), - (0x1D69, 'M', u'φ'), - (0x1D6A, 'M', u'χ'), + (0x1D54, 'M', 'ᴖ'), + (0x1D55, 'M', 'ᴗ'), + (0x1D56, 'M', 'p'), + (0x1D57, 'M', 't'), + (0x1D58, 'M', 'u'), + (0x1D59, 'M', 'ᴝ'), + (0x1D5A, 'M', 'ɯ'), + (0x1D5B, 'M', 'v'), + (0x1D5C, 'M', 'ᴥ'), + (0x1D5D, 'M', 'β'), + (0x1D5E, 'M', 'γ'), + (0x1D5F, 'M', 'δ'), + (0x1D60, 'M', 'φ'), + (0x1D61, 'M', 'χ'), + (0x1D62, 'M', 'i'), + (0x1D63, 'M', 'r'), + (0x1D64, 'M', 'u'), + (0x1D65, 'M', 'v'), + (0x1D66, 'M', 'β'), + (0x1D67, 'M', 'γ'), + (0x1D68, 'M', 'ρ'), + (0x1D69, 'M', 'φ'), + (0x1D6A, 'M', 'χ'), (0x1D6B, 'V'), - (0x1D78, 'M', u'н'), + (0x1D78, 'M', 'н'), (0x1D79, 'V'), - (0x1D9B, 'M', u'ɒ'), - (0x1D9C, 'M', u'c'), - (0x1D9D, 'M', u'ɕ'), - (0x1D9E, 'M', u'ð'), - (0x1D9F, 'M', u'ɜ'), - (0x1DA0, 'M', u'f'), - (0x1DA1, 'M', u'ɟ'), - (0x1DA2, 'M', u'ɡ'), - (0x1DA3, 'M', u'ɥ'), - (0x1DA4, 'M', u'ɨ'), - (0x1DA5, 'M', u'ɩ'), - (0x1DA6, 'M', u'ɪ'), - (0x1DA7, 'M', u'ᵻ'), - (0x1DA8, 'M', u'ʝ'), - (0x1DA9, 'M', u'ɭ'), - (0x1DAA, 'M', u'ᶅ'), - (0x1DAB, 'M', u'ʟ'), - (0x1DAC, 'M', u'ɱ'), - (0x1DAD, 'M', u'ɰ'), - (0x1DAE, 'M', u'ɲ'), - (0x1DAF, 'M', u'ɳ'), - (0x1DB0, 'M', u'ɴ'), - (0x1DB1, 'M', u'ɵ'), - (0x1DB2, 'M', u'ɸ'), - (0x1DB3, 'M', u'ʂ'), - (0x1DB4, 'M', u'ʃ'), - (0x1DB5, 'M', u'ƫ'), - (0x1DB6, 'M', u'ʉ'), - (0x1DB7, 'M', u'ʊ'), - (0x1DB8, 'M', u'ᴜ'), - (0x1DB9, 'M', u'ʋ'), - (0x1DBA, 'M', u'ʌ'), - (0x1DBB, 'M', u'z'), - (0x1DBC, 'M', u'ʐ'), - (0x1DBD, 'M', u'ʑ'), - (0x1DBE, 'M', u'ʒ'), - (0x1DBF, 'M', u'θ'), + (0x1D9B, 'M', 'ɒ'), + (0x1D9C, 'M', 'c'), + (0x1D9D, 'M', 'ɕ'), + (0x1D9E, 'M', 'ð'), + (0x1D9F, 'M', 'ɜ'), + (0x1DA0, 'M', 'f'), + (0x1DA1, 'M', 'ɟ'), + (0x1DA2, 'M', 'ɡ'), + (0x1DA3, 'M', 'ɥ'), + (0x1DA4, 'M', 'ɨ'), + (0x1DA5, 'M', 'ɩ'), + (0x1DA6, 'M', 'ɪ'), + (0x1DA7, 'M', 'ᵻ'), + (0x1DA8, 'M', 'ʝ'), + (0x1DA9, 'M', 'ɭ'), + (0x1DAA, 'M', 'ᶅ'), + (0x1DAB, 'M', 'ʟ'), + (0x1DAC, 'M', 'ɱ'), + (0x1DAD, 'M', 'ɰ'), + (0x1DAE, 'M', 'ɲ'), + (0x1DAF, 'M', 'ɳ'), + (0x1DB0, 'M', 'ɴ'), + (0x1DB1, 'M', 'ɵ'), + (0x1DB2, 'M', 'ɸ'), + (0x1DB3, 'M', 'ʂ'), + (0x1DB4, 'M', 'ʃ'), + (0x1DB5, 'M', 'ƫ'), + (0x1DB6, 'M', 'ʉ'), + (0x1DB7, 'M', 'ʊ'), + (0x1DB8, 'M', 'ᴜ'), + (0x1DB9, 'M', 'ʋ'), + (0x1DBA, 'M', 'ʌ'), + (0x1DBB, 'M', 'z'), + (0x1DBC, 'M', 'ʐ'), + (0x1DBD, 'M', 'ʑ'), + (0x1DBE, 'M', 'ʒ'), + (0x1DBF, 'M', 'θ'), (0x1DC0, 'V'), - (0x1DFA, 'X'), - (0x1DFB, 'V'), - (0x1E00, 'M', u'ḁ'), + (0x1E00, 'M', 'ḁ'), (0x1E01, 'V'), - (0x1E02, 'M', u'ḃ'), + (0x1E02, 'M', 'ḃ'), (0x1E03, 'V'), - (0x1E04, 'M', u'ḅ'), + (0x1E04, 'M', 'ḅ'), (0x1E05, 'V'), - (0x1E06, 'M', u'ḇ'), + (0x1E06, 'M', 'ḇ'), (0x1E07, 'V'), - (0x1E08, 'M', u'ḉ'), + (0x1E08, 'M', 'ḉ'), (0x1E09, 'V'), - (0x1E0A, 'M', u'ḋ'), + (0x1E0A, 'M', 'ḋ'), (0x1E0B, 'V'), - (0x1E0C, 'M', u'ḍ'), + (0x1E0C, 'M', 'ḍ'), (0x1E0D, 'V'), - (0x1E0E, 'M', u'ḏ'), + (0x1E0E, 'M', 'ḏ'), (0x1E0F, 'V'), - (0x1E10, 'M', u'ḑ'), + (0x1E10, 'M', 'ḑ'), (0x1E11, 'V'), - (0x1E12, 'M', u'ḓ'), + (0x1E12, 'M', 'ḓ'), (0x1E13, 'V'), - (0x1E14, 'M', u'ḕ'), + (0x1E14, 'M', 'ḕ'), (0x1E15, 'V'), - (0x1E16, 'M', u'ḗ'), + (0x1E16, 'M', 'ḗ'), (0x1E17, 'V'), - (0x1E18, 'M', u'ḙ'), + (0x1E18, 'M', 'ḙ'), (0x1E19, 'V'), - (0x1E1A, 'M', u'ḛ'), + (0x1E1A, 'M', 'ḛ'), (0x1E1B, 'V'), - (0x1E1C, 'M', u'ḝ'), + (0x1E1C, 'M', 'ḝ'), (0x1E1D, 'V'), - (0x1E1E, 'M', u'ḟ'), + (0x1E1E, 'M', 'ḟ'), (0x1E1F, 'V'), - (0x1E20, 'M', u'ḡ'), - ] - -def _seg_17(): - return [ + (0x1E20, 'M', 'ḡ'), (0x1E21, 'V'), - (0x1E22, 'M', u'ḣ'), + (0x1E22, 'M', 'ḣ'), (0x1E23, 'V'), - (0x1E24, 'M', u'ḥ'), + ] + +def _seg_17() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E24, 'M', 'ḥ'), (0x1E25, 'V'), - (0x1E26, 'M', u'ḧ'), + (0x1E26, 'M', 'ḧ'), (0x1E27, 'V'), - (0x1E28, 'M', u'ḩ'), + (0x1E28, 'M', 'ḩ'), (0x1E29, 'V'), - (0x1E2A, 'M', u'ḫ'), + (0x1E2A, 'M', 'ḫ'), (0x1E2B, 'V'), - (0x1E2C, 'M', u'ḭ'), + (0x1E2C, 'M', 'ḭ'), (0x1E2D, 'V'), - (0x1E2E, 'M', u'ḯ'), + (0x1E2E, 'M', 'ḯ'), (0x1E2F, 'V'), - (0x1E30, 'M', u'ḱ'), + (0x1E30, 'M', 'ḱ'), (0x1E31, 'V'), - (0x1E32, 'M', u'ḳ'), + (0x1E32, 'M', 'ḳ'), (0x1E33, 'V'), - (0x1E34, 'M', u'ḵ'), + (0x1E34, 'M', 'ḵ'), (0x1E35, 'V'), - (0x1E36, 'M', u'ḷ'), + (0x1E36, 'M', 'ḷ'), (0x1E37, 'V'), - (0x1E38, 'M', u'ḹ'), + (0x1E38, 'M', 'ḹ'), (0x1E39, 'V'), - (0x1E3A, 'M', u'ḻ'), + (0x1E3A, 'M', 'ḻ'), (0x1E3B, 'V'), - (0x1E3C, 'M', u'ḽ'), + (0x1E3C, 'M', 'ḽ'), (0x1E3D, 'V'), - (0x1E3E, 'M', u'ḿ'), + (0x1E3E, 'M', 'ḿ'), (0x1E3F, 'V'), - (0x1E40, 'M', u'ṁ'), + (0x1E40, 'M', 'ṁ'), (0x1E41, 'V'), - (0x1E42, 'M', u'ṃ'), + (0x1E42, 'M', 'ṃ'), (0x1E43, 'V'), - (0x1E44, 'M', u'ṅ'), + (0x1E44, 'M', 'ṅ'), (0x1E45, 'V'), - (0x1E46, 'M', u'ṇ'), + (0x1E46, 'M', 'ṇ'), (0x1E47, 'V'), - (0x1E48, 'M', u'ṉ'), + (0x1E48, 'M', 'ṉ'), (0x1E49, 'V'), - (0x1E4A, 'M', u'ṋ'), + (0x1E4A, 'M', 'ṋ'), (0x1E4B, 'V'), - (0x1E4C, 'M', u'ṍ'), + (0x1E4C, 'M', 'ṍ'), (0x1E4D, 'V'), - (0x1E4E, 'M', u'ṏ'), + (0x1E4E, 'M', 'ṏ'), (0x1E4F, 'V'), - (0x1E50, 'M', u'ṑ'), + (0x1E50, 'M', 'ṑ'), (0x1E51, 'V'), - (0x1E52, 'M', u'ṓ'), + (0x1E52, 'M', 'ṓ'), (0x1E53, 'V'), - (0x1E54, 'M', u'ṕ'), + (0x1E54, 'M', 'ṕ'), (0x1E55, 'V'), - (0x1E56, 'M', u'ṗ'), + (0x1E56, 'M', 'ṗ'), (0x1E57, 'V'), - (0x1E58, 'M', u'ṙ'), + (0x1E58, 'M', 'ṙ'), (0x1E59, 'V'), - (0x1E5A, 'M', u'ṛ'), + (0x1E5A, 'M', 'ṛ'), (0x1E5B, 'V'), - (0x1E5C, 'M', u'ṝ'), + (0x1E5C, 'M', 'ṝ'), (0x1E5D, 'V'), - (0x1E5E, 'M', u'ṟ'), + (0x1E5E, 'M', 'ṟ'), (0x1E5F, 'V'), - (0x1E60, 'M', u'ṡ'), + (0x1E60, 'M', 'ṡ'), (0x1E61, 'V'), - (0x1E62, 'M', u'ṣ'), + (0x1E62, 'M', 'ṣ'), (0x1E63, 'V'), - (0x1E64, 'M', u'ṥ'), + (0x1E64, 'M', 'ṥ'), (0x1E65, 'V'), - (0x1E66, 'M', u'ṧ'), + (0x1E66, 'M', 'ṧ'), (0x1E67, 'V'), - (0x1E68, 'M', u'ṩ'), + (0x1E68, 'M', 'ṩ'), (0x1E69, 'V'), - (0x1E6A, 'M', u'ṫ'), + (0x1E6A, 'M', 'ṫ'), (0x1E6B, 'V'), - (0x1E6C, 'M', u'ṭ'), + (0x1E6C, 'M', 'ṭ'), (0x1E6D, 'V'), - (0x1E6E, 'M', u'ṯ'), + (0x1E6E, 'M', 'ṯ'), (0x1E6F, 'V'), - (0x1E70, 'M', u'ṱ'), + (0x1E70, 'M', 'ṱ'), (0x1E71, 'V'), - (0x1E72, 'M', u'ṳ'), + (0x1E72, 'M', 'ṳ'), (0x1E73, 'V'), - (0x1E74, 'M', u'ṵ'), + (0x1E74, 'M', 'ṵ'), (0x1E75, 'V'), - (0x1E76, 'M', u'ṷ'), + (0x1E76, 'M', 'ṷ'), (0x1E77, 'V'), - (0x1E78, 'M', u'ṹ'), + (0x1E78, 'M', 'ṹ'), (0x1E79, 'V'), - (0x1E7A, 'M', u'ṻ'), + (0x1E7A, 'M', 'ṻ'), (0x1E7B, 'V'), - (0x1E7C, 'M', u'ṽ'), + (0x1E7C, 'M', 'ṽ'), (0x1E7D, 'V'), - (0x1E7E, 'M', u'ṿ'), + (0x1E7E, 'M', 'ṿ'), (0x1E7F, 'V'), - (0x1E80, 'M', u'ẁ'), + (0x1E80, 'M', 'ẁ'), (0x1E81, 'V'), - (0x1E82, 'M', u'ẃ'), + (0x1E82, 'M', 'ẃ'), (0x1E83, 'V'), - (0x1E84, 'M', u'ẅ'), - ] - -def _seg_18(): - return [ + (0x1E84, 'M', 'ẅ'), (0x1E85, 'V'), - (0x1E86, 'M', u'ẇ'), + (0x1E86, 'M', 'ẇ'), (0x1E87, 'V'), - (0x1E88, 'M', u'ẉ'), - (0x1E89, 'V'), - (0x1E8A, 'M', u'ẋ'), - (0x1E8B, 'V'), - (0x1E8C, 'M', u'ẍ'), - (0x1E8D, 'V'), - (0x1E8E, 'M', u'ẏ'), - (0x1E8F, 'V'), - (0x1E90, 'M', u'ẑ'), - (0x1E91, 'V'), - (0x1E92, 'M', u'ẓ'), - (0x1E93, 'V'), - (0x1E94, 'M', u'ẕ'), - (0x1E95, 'V'), - (0x1E9A, 'M', u'aʾ'), - (0x1E9B, 'M', u'ṡ'), - (0x1E9C, 'V'), - (0x1E9E, 'M', u'ss'), - (0x1E9F, 'V'), - (0x1EA0, 'M', u'ạ'), - (0x1EA1, 'V'), - (0x1EA2, 'M', u'ả'), - (0x1EA3, 'V'), - (0x1EA4, 'M', u'ấ'), - (0x1EA5, 'V'), - (0x1EA6, 'M', u'ầ'), - (0x1EA7, 'V'), - (0x1EA8, 'M', u'ẩ'), - (0x1EA9, 'V'), - (0x1EAA, 'M', u'ẫ'), - (0x1EAB, 'V'), - (0x1EAC, 'M', u'ậ'), - (0x1EAD, 'V'), - (0x1EAE, 'M', u'ắ'), - (0x1EAF, 'V'), - (0x1EB0, 'M', u'ằ'), - (0x1EB1, 'V'), - (0x1EB2, 'M', u'ẳ'), - (0x1EB3, 'V'), - (0x1EB4, 'M', u'ẵ'), - (0x1EB5, 'V'), - (0x1EB6, 'M', u'ặ'), - (0x1EB7, 'V'), - (0x1EB8, 'M', u'ẹ'), - (0x1EB9, 'V'), - (0x1EBA, 'M', u'ẻ'), - (0x1EBB, 'V'), - (0x1EBC, 'M', u'ẽ'), - (0x1EBD, 'V'), - (0x1EBE, 'M', u'ế'), - (0x1EBF, 'V'), - (0x1EC0, 'M', u'ề'), - (0x1EC1, 'V'), - (0x1EC2, 'M', u'ể'), - (0x1EC3, 'V'), - (0x1EC4, 'M', u'ễ'), - (0x1EC5, 'V'), - (0x1EC6, 'M', u'ệ'), - (0x1EC7, 'V'), - (0x1EC8, 'M', u'ỉ'), - (0x1EC9, 'V'), - (0x1ECA, 'M', u'ị'), - (0x1ECB, 'V'), - (0x1ECC, 'M', u'ọ'), - (0x1ECD, 'V'), - (0x1ECE, 'M', u'ỏ'), - (0x1ECF, 'V'), - (0x1ED0, 'M', u'ố'), - (0x1ED1, 'V'), - (0x1ED2, 'M', u'ồ'), - (0x1ED3, 'V'), - (0x1ED4, 'M', u'ổ'), - (0x1ED5, 'V'), - (0x1ED6, 'M', u'ỗ'), - (0x1ED7, 'V'), - (0x1ED8, 'M', u'ộ'), - (0x1ED9, 'V'), - (0x1EDA, 'M', u'ớ'), - (0x1EDB, 'V'), - (0x1EDC, 'M', u'ờ'), - (0x1EDD, 'V'), - (0x1EDE, 'M', u'ở'), - (0x1EDF, 'V'), - (0x1EE0, 'M', u'ỡ'), - (0x1EE1, 'V'), - (0x1EE2, 'M', u'ợ'), - (0x1EE3, 'V'), - (0x1EE4, 'M', u'ụ'), - (0x1EE5, 'V'), - (0x1EE6, 'M', u'ủ'), - (0x1EE7, 'V'), - (0x1EE8, 'M', u'ứ'), - (0x1EE9, 'V'), - (0x1EEA, 'M', u'ừ'), - (0x1EEB, 'V'), - (0x1EEC, 'M', u'ử'), - (0x1EED, 'V'), ] -def _seg_19(): +def _seg_18() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1EEE, 'M', u'ữ'), + (0x1E88, 'M', 'ẉ'), + (0x1E89, 'V'), + (0x1E8A, 'M', 'ẋ'), + (0x1E8B, 'V'), + (0x1E8C, 'M', 'ẍ'), + (0x1E8D, 'V'), + (0x1E8E, 'M', 'ẏ'), + (0x1E8F, 'V'), + (0x1E90, 'M', 'ẑ'), + (0x1E91, 'V'), + (0x1E92, 'M', 'ẓ'), + (0x1E93, 'V'), + (0x1E94, 'M', 'ẕ'), + (0x1E95, 'V'), + (0x1E9A, 'M', 'aʾ'), + (0x1E9B, 'M', 'ṡ'), + (0x1E9C, 'V'), + (0x1E9E, 'M', 'ss'), + (0x1E9F, 'V'), + (0x1EA0, 'M', 'ạ'), + (0x1EA1, 'V'), + (0x1EA2, 'M', 'ả'), + (0x1EA3, 'V'), + (0x1EA4, 'M', 'ấ'), + (0x1EA5, 'V'), + (0x1EA6, 'M', 'ầ'), + (0x1EA7, 'V'), + (0x1EA8, 'M', 'ẩ'), + (0x1EA9, 'V'), + (0x1EAA, 'M', 'ẫ'), + (0x1EAB, 'V'), + (0x1EAC, 'M', 'ậ'), + (0x1EAD, 'V'), + (0x1EAE, 'M', 'ắ'), + (0x1EAF, 'V'), + (0x1EB0, 'M', 'ằ'), + (0x1EB1, 'V'), + (0x1EB2, 'M', 'ẳ'), + (0x1EB3, 'V'), + (0x1EB4, 'M', 'ẵ'), + (0x1EB5, 'V'), + (0x1EB6, 'M', 'ặ'), + (0x1EB7, 'V'), + (0x1EB8, 'M', 'ẹ'), + (0x1EB9, 'V'), + (0x1EBA, 'M', 'ẻ'), + (0x1EBB, 'V'), + (0x1EBC, 'M', 'ẽ'), + (0x1EBD, 'V'), + (0x1EBE, 'M', 'ế'), + (0x1EBF, 'V'), + (0x1EC0, 'M', 'ề'), + (0x1EC1, 'V'), + (0x1EC2, 'M', 'ể'), + (0x1EC3, 'V'), + (0x1EC4, 'M', 'ễ'), + (0x1EC5, 'V'), + (0x1EC6, 'M', 'ệ'), + (0x1EC7, 'V'), + (0x1EC8, 'M', 'ỉ'), + (0x1EC9, 'V'), + (0x1ECA, 'M', 'ị'), + (0x1ECB, 'V'), + (0x1ECC, 'M', 'ọ'), + (0x1ECD, 'V'), + (0x1ECE, 'M', 'ỏ'), + (0x1ECF, 'V'), + (0x1ED0, 'M', 'ố'), + (0x1ED1, 'V'), + (0x1ED2, 'M', 'ồ'), + (0x1ED3, 'V'), + (0x1ED4, 'M', 'ổ'), + (0x1ED5, 'V'), + (0x1ED6, 'M', 'ỗ'), + (0x1ED7, 'V'), + (0x1ED8, 'M', 'ộ'), + (0x1ED9, 'V'), + (0x1EDA, 'M', 'ớ'), + (0x1EDB, 'V'), + (0x1EDC, 'M', 'ờ'), + (0x1EDD, 'V'), + (0x1EDE, 'M', 'ở'), + (0x1EDF, 'V'), + (0x1EE0, 'M', 'ỡ'), + (0x1EE1, 'V'), + (0x1EE2, 'M', 'ợ'), + (0x1EE3, 'V'), + (0x1EE4, 'M', 'ụ'), + (0x1EE5, 'V'), + (0x1EE6, 'M', 'ủ'), + (0x1EE7, 'V'), + (0x1EE8, 'M', 'ứ'), + (0x1EE9, 'V'), + (0x1EEA, 'M', 'ừ'), + (0x1EEB, 'V'), + (0x1EEC, 'M', 'ử'), + (0x1EED, 'V'), + (0x1EEE, 'M', 'ữ'), (0x1EEF, 'V'), - (0x1EF0, 'M', u'ự'), + (0x1EF0, 'M', 'ự'), + ] + +def _seg_19() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1EF1, 'V'), - (0x1EF2, 'M', u'ỳ'), + (0x1EF2, 'M', 'ỳ'), (0x1EF3, 'V'), - (0x1EF4, 'M', u'ỵ'), + (0x1EF4, 'M', 'ỵ'), (0x1EF5, 'V'), - (0x1EF6, 'M', u'ỷ'), + (0x1EF6, 'M', 'ỷ'), (0x1EF7, 'V'), - (0x1EF8, 'M', u'ỹ'), + (0x1EF8, 'M', 'ỹ'), (0x1EF9, 'V'), - (0x1EFA, 'M', u'ỻ'), + (0x1EFA, 'M', 'ỻ'), (0x1EFB, 'V'), - (0x1EFC, 'M', u'ỽ'), + (0x1EFC, 'M', 'ỽ'), (0x1EFD, 'V'), - (0x1EFE, 'M', u'ỿ'), + (0x1EFE, 'M', 'ỿ'), (0x1EFF, 'V'), - (0x1F08, 'M', u'ἀ'), - (0x1F09, 'M', u'ἁ'), - (0x1F0A, 'M', u'ἂ'), - (0x1F0B, 'M', u'ἃ'), - (0x1F0C, 'M', u'ἄ'), - (0x1F0D, 'M', u'ἅ'), - (0x1F0E, 'M', u'ἆ'), - (0x1F0F, 'M', u'ἇ'), + (0x1F08, 'M', 'ἀ'), + (0x1F09, 'M', 'ἁ'), + (0x1F0A, 'M', 'ἂ'), + (0x1F0B, 'M', 'ἃ'), + (0x1F0C, 'M', 'ἄ'), + (0x1F0D, 'M', 'ἅ'), + (0x1F0E, 'M', 'ἆ'), + (0x1F0F, 'M', 'ἇ'), (0x1F10, 'V'), (0x1F16, 'X'), - (0x1F18, 'M', u'ἐ'), - (0x1F19, 'M', u'ἑ'), - (0x1F1A, 'M', u'ἒ'), - (0x1F1B, 'M', u'ἓ'), - (0x1F1C, 'M', u'ἔ'), - (0x1F1D, 'M', u'ἕ'), + (0x1F18, 'M', 'ἐ'), + (0x1F19, 'M', 'ἑ'), + (0x1F1A, 'M', 'ἒ'), + (0x1F1B, 'M', 'ἓ'), + (0x1F1C, 'M', 'ἔ'), + (0x1F1D, 'M', 'ἕ'), (0x1F1E, 'X'), (0x1F20, 'V'), - (0x1F28, 'M', u'ἠ'), - (0x1F29, 'M', u'ἡ'), - (0x1F2A, 'M', u'ἢ'), - (0x1F2B, 'M', u'ἣ'), - (0x1F2C, 'M', u'ἤ'), - (0x1F2D, 'M', u'ἥ'), - (0x1F2E, 'M', u'ἦ'), - (0x1F2F, 'M', u'ἧ'), + (0x1F28, 'M', 'ἠ'), + (0x1F29, 'M', 'ἡ'), + (0x1F2A, 'M', 'ἢ'), + (0x1F2B, 'M', 'ἣ'), + (0x1F2C, 'M', 'ἤ'), + (0x1F2D, 'M', 'ἥ'), + (0x1F2E, 'M', 'ἦ'), + (0x1F2F, 'M', 'ἧ'), (0x1F30, 'V'), - (0x1F38, 'M', u'ἰ'), - (0x1F39, 'M', u'ἱ'), - (0x1F3A, 'M', u'ἲ'), - (0x1F3B, 'M', u'ἳ'), - (0x1F3C, 'M', u'ἴ'), - (0x1F3D, 'M', u'ἵ'), - (0x1F3E, 'M', u'ἶ'), - (0x1F3F, 'M', u'ἷ'), + (0x1F38, 'M', 'ἰ'), + (0x1F39, 'M', 'ἱ'), + (0x1F3A, 'M', 'ἲ'), + (0x1F3B, 'M', 'ἳ'), + (0x1F3C, 'M', 'ἴ'), + (0x1F3D, 'M', 'ἵ'), + (0x1F3E, 'M', 'ἶ'), + (0x1F3F, 'M', 'ἷ'), (0x1F40, 'V'), (0x1F46, 'X'), - (0x1F48, 'M', u'ὀ'), - (0x1F49, 'M', u'ὁ'), - (0x1F4A, 'M', u'ὂ'), - (0x1F4B, 'M', u'ὃ'), - (0x1F4C, 'M', u'ὄ'), - (0x1F4D, 'M', u'ὅ'), + (0x1F48, 'M', 'ὀ'), + (0x1F49, 'M', 'ὁ'), + (0x1F4A, 'M', 'ὂ'), + (0x1F4B, 'M', 'ὃ'), + (0x1F4C, 'M', 'ὄ'), + (0x1F4D, 'M', 'ὅ'), (0x1F4E, 'X'), (0x1F50, 'V'), (0x1F58, 'X'), - (0x1F59, 'M', u'ὑ'), + (0x1F59, 'M', 'ὑ'), (0x1F5A, 'X'), - (0x1F5B, 'M', u'ὓ'), + (0x1F5B, 'M', 'ὓ'), (0x1F5C, 'X'), - (0x1F5D, 'M', u'ὕ'), + (0x1F5D, 'M', 'ὕ'), (0x1F5E, 'X'), - (0x1F5F, 'M', u'ὗ'), + (0x1F5F, 'M', 'ὗ'), (0x1F60, 'V'), - (0x1F68, 'M', u'ὠ'), - (0x1F69, 'M', u'ὡ'), - (0x1F6A, 'M', u'ὢ'), - (0x1F6B, 'M', u'ὣ'), - (0x1F6C, 'M', u'ὤ'), - (0x1F6D, 'M', u'ὥ'), - (0x1F6E, 'M', u'ὦ'), - (0x1F6F, 'M', u'ὧ'), + (0x1F68, 'M', 'ὠ'), + (0x1F69, 'M', 'ὡ'), + (0x1F6A, 'M', 'ὢ'), + (0x1F6B, 'M', 'ὣ'), + (0x1F6C, 'M', 'ὤ'), + (0x1F6D, 'M', 'ὥ'), + (0x1F6E, 'M', 'ὦ'), + (0x1F6F, 'M', 'ὧ'), (0x1F70, 'V'), - (0x1F71, 'M', u'ά'), + (0x1F71, 'M', 'ά'), (0x1F72, 'V'), - (0x1F73, 'M', u'έ'), + (0x1F73, 'M', 'έ'), (0x1F74, 'V'), - (0x1F75, 'M', u'ή'), + (0x1F75, 'M', 'ή'), (0x1F76, 'V'), - (0x1F77, 'M', u'ί'), + (0x1F77, 'M', 'ί'), (0x1F78, 'V'), - (0x1F79, 'M', u'ό'), + (0x1F79, 'M', 'ό'), (0x1F7A, 'V'), - (0x1F7B, 'M', u'ύ'), + (0x1F7B, 'M', 'ύ'), (0x1F7C, 'V'), - (0x1F7D, 'M', u'ώ'), + (0x1F7D, 'M', 'ώ'), (0x1F7E, 'X'), - (0x1F80, 'M', u'ἀι'), - (0x1F81, 'M', u'ἁι'), - (0x1F82, 'M', u'ἂι'), - (0x1F83, 'M', u'ἃι'), - (0x1F84, 'M', u'ἄι'), + (0x1F80, 'M', 'ἀι'), + (0x1F81, 'M', 'ἁι'), + (0x1F82, 'M', 'ἂι'), + (0x1F83, 'M', 'ἃι'), + (0x1F84, 'M', 'ἄι'), + (0x1F85, 'M', 'ἅι'), + (0x1F86, 'M', 'ἆι'), + (0x1F87, 'M', 'ἇι'), ] -def _seg_20(): +def _seg_20() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1F85, 'M', u'ἅι'), - (0x1F86, 'M', u'ἆι'), - (0x1F87, 'M', u'ἇι'), - (0x1F88, 'M', u'ἀι'), - (0x1F89, 'M', u'ἁι'), - (0x1F8A, 'M', u'ἂι'), - (0x1F8B, 'M', u'ἃι'), - (0x1F8C, 'M', u'ἄι'), - (0x1F8D, 'M', u'ἅι'), - (0x1F8E, 'M', u'ἆι'), - (0x1F8F, 'M', u'ἇι'), - (0x1F90, 'M', u'ἠι'), - (0x1F91, 'M', u'ἡι'), - (0x1F92, 'M', u'ἢι'), - (0x1F93, 'M', u'ἣι'), - (0x1F94, 'M', u'ἤι'), - (0x1F95, 'M', u'ἥι'), - (0x1F96, 'M', u'ἦι'), - (0x1F97, 'M', u'ἧι'), - (0x1F98, 'M', u'ἠι'), - (0x1F99, 'M', u'ἡι'), - (0x1F9A, 'M', u'ἢι'), - (0x1F9B, 'M', u'ἣι'), - (0x1F9C, 'M', u'ἤι'), - (0x1F9D, 'M', u'ἥι'), - (0x1F9E, 'M', u'ἦι'), - (0x1F9F, 'M', u'ἧι'), - (0x1FA0, 'M', u'ὠι'), - (0x1FA1, 'M', u'ὡι'), - (0x1FA2, 'M', u'ὢι'), - (0x1FA3, 'M', u'ὣι'), - (0x1FA4, 'M', u'ὤι'), - (0x1FA5, 'M', u'ὥι'), - (0x1FA6, 'M', u'ὦι'), - (0x1FA7, 'M', u'ὧι'), - (0x1FA8, 'M', u'ὠι'), - (0x1FA9, 'M', u'ὡι'), - (0x1FAA, 'M', u'ὢι'), - (0x1FAB, 'M', u'ὣι'), - (0x1FAC, 'M', u'ὤι'), - (0x1FAD, 'M', u'ὥι'), - (0x1FAE, 'M', u'ὦι'), - (0x1FAF, 'M', u'ὧι'), + (0x1F88, 'M', 'ἀι'), + (0x1F89, 'M', 'ἁι'), + (0x1F8A, 'M', 'ἂι'), + (0x1F8B, 'M', 'ἃι'), + (0x1F8C, 'M', 'ἄι'), + (0x1F8D, 'M', 'ἅι'), + (0x1F8E, 'M', 'ἆι'), + (0x1F8F, 'M', 'ἇι'), + (0x1F90, 'M', 'ἠι'), + (0x1F91, 'M', 'ἡι'), + (0x1F92, 'M', 'ἢι'), + (0x1F93, 'M', 'ἣι'), + (0x1F94, 'M', 'ἤι'), + (0x1F95, 'M', 'ἥι'), + (0x1F96, 'M', 'ἦι'), + (0x1F97, 'M', 'ἧι'), + (0x1F98, 'M', 'ἠι'), + (0x1F99, 'M', 'ἡι'), + (0x1F9A, 'M', 'ἢι'), + (0x1F9B, 'M', 'ἣι'), + (0x1F9C, 'M', 'ἤι'), + (0x1F9D, 'M', 'ἥι'), + (0x1F9E, 'M', 'ἦι'), + (0x1F9F, 'M', 'ἧι'), + (0x1FA0, 'M', 'ὠι'), + (0x1FA1, 'M', 'ὡι'), + (0x1FA2, 'M', 'ὢι'), + (0x1FA3, 'M', 'ὣι'), + (0x1FA4, 'M', 'ὤι'), + (0x1FA5, 'M', 'ὥι'), + (0x1FA6, 'M', 'ὦι'), + (0x1FA7, 'M', 'ὧι'), + (0x1FA8, 'M', 'ὠι'), + (0x1FA9, 'M', 'ὡι'), + (0x1FAA, 'M', 'ὢι'), + (0x1FAB, 'M', 'ὣι'), + (0x1FAC, 'M', 'ὤι'), + (0x1FAD, 'M', 'ὥι'), + (0x1FAE, 'M', 'ὦι'), + (0x1FAF, 'M', 'ὧι'), (0x1FB0, 'V'), - (0x1FB2, 'M', u'ὰι'), - (0x1FB3, 'M', u'αι'), - (0x1FB4, 'M', u'άι'), + (0x1FB2, 'M', 'ὰι'), + (0x1FB3, 'M', 'αι'), + (0x1FB4, 'M', 'άι'), (0x1FB5, 'X'), (0x1FB6, 'V'), - (0x1FB7, 'M', u'ᾶι'), - (0x1FB8, 'M', u'ᾰ'), - (0x1FB9, 'M', u'ᾱ'), - (0x1FBA, 'M', u'ὰ'), - (0x1FBB, 'M', u'ά'), - (0x1FBC, 'M', u'αι'), - (0x1FBD, '3', u' ̓'), - (0x1FBE, 'M', u'ι'), - (0x1FBF, '3', u' ̓'), - (0x1FC0, '3', u' ͂'), - (0x1FC1, '3', u' ̈͂'), - (0x1FC2, 'M', u'ὴι'), - (0x1FC3, 'M', u'ηι'), - (0x1FC4, 'M', u'ήι'), + (0x1FB7, 'M', 'ᾶι'), + (0x1FB8, 'M', 'ᾰ'), + (0x1FB9, 'M', 'ᾱ'), + (0x1FBA, 'M', 'ὰ'), + (0x1FBB, 'M', 'ά'), + (0x1FBC, 'M', 'αι'), + (0x1FBD, '3', ' ̓'), + (0x1FBE, 'M', 'ι'), + (0x1FBF, '3', ' ̓'), + (0x1FC0, '3', ' ͂'), + (0x1FC1, '3', ' ̈͂'), + (0x1FC2, 'M', 'ὴι'), + (0x1FC3, 'M', 'ηι'), + (0x1FC4, 'M', 'ήι'), (0x1FC5, 'X'), (0x1FC6, 'V'), - (0x1FC7, 'M', u'ῆι'), - (0x1FC8, 'M', u'ὲ'), - (0x1FC9, 'M', u'έ'), - (0x1FCA, 'M', u'ὴ'), - (0x1FCB, 'M', u'ή'), - (0x1FCC, 'M', u'ηι'), - (0x1FCD, '3', u' ̓̀'), - (0x1FCE, '3', u' ̓́'), - (0x1FCF, '3', u' ̓͂'), + (0x1FC7, 'M', 'ῆι'), + (0x1FC8, 'M', 'ὲ'), + (0x1FC9, 'M', 'έ'), + (0x1FCA, 'M', 'ὴ'), + (0x1FCB, 'M', 'ή'), + (0x1FCC, 'M', 'ηι'), + (0x1FCD, '3', ' ̓̀'), + (0x1FCE, '3', ' ̓́'), + (0x1FCF, '3', ' ̓͂'), (0x1FD0, 'V'), - (0x1FD3, 'M', u'ΐ'), + (0x1FD3, 'M', 'ΐ'), (0x1FD4, 'X'), (0x1FD6, 'V'), - (0x1FD8, 'M', u'ῐ'), - (0x1FD9, 'M', u'ῑ'), - (0x1FDA, 'M', u'ὶ'), - (0x1FDB, 'M', u'ί'), + (0x1FD8, 'M', 'ῐ'), + (0x1FD9, 'M', 'ῑ'), + (0x1FDA, 'M', 'ὶ'), + (0x1FDB, 'M', 'ί'), (0x1FDC, 'X'), - (0x1FDD, '3', u' ̔̀'), - (0x1FDE, '3', u' ̔́'), - (0x1FDF, '3', u' ̔͂'), + (0x1FDD, '3', ' ̔̀'), + (0x1FDE, '3', ' ̔́'), + (0x1FDF, '3', ' ̔͂'), (0x1FE0, 'V'), - (0x1FE3, 'M', u'ΰ'), + (0x1FE3, 'M', 'ΰ'), (0x1FE4, 'V'), - (0x1FE8, 'M', u'ῠ'), - (0x1FE9, 'M', u'ῡ'), - (0x1FEA, 'M', u'ὺ'), - (0x1FEB, 'M', u'ύ'), - (0x1FEC, 'M', u'ῥ'), - (0x1FED, '3', u' ̈̀'), - (0x1FEE, '3', u' ̈́'), - (0x1FEF, '3', u'`'), + (0x1FE8, 'M', 'ῠ'), + (0x1FE9, 'M', 'ῡ'), + (0x1FEA, 'M', 'ὺ'), + (0x1FEB, 'M', 'ύ'), + (0x1FEC, 'M', 'ῥ'), + (0x1FED, '3', ' ̈̀'), + (0x1FEE, '3', ' ̈́'), + (0x1FEF, '3', '`'), (0x1FF0, 'X'), - (0x1FF2, 'M', u'ὼι'), - (0x1FF3, 'M', u'ωι'), - ] - -def _seg_21(): - return [ - (0x1FF4, 'M', u'ώι'), + (0x1FF2, 'M', 'ὼι'), + (0x1FF3, 'M', 'ωι'), + (0x1FF4, 'M', 'ώι'), (0x1FF5, 'X'), (0x1FF6, 'V'), - (0x1FF7, 'M', u'ῶι'), - (0x1FF8, 'M', u'ὸ'), - (0x1FF9, 'M', u'ό'), - (0x1FFA, 'M', u'ὼ'), - (0x1FFB, 'M', u'ώ'), - (0x1FFC, 'M', u'ωι'), - (0x1FFD, '3', u' ́'), - (0x1FFE, '3', u' ̔'), + ] + +def _seg_21() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1FF7, 'M', 'ῶι'), + (0x1FF8, 'M', 'ὸ'), + (0x1FF9, 'M', 'ό'), + (0x1FFA, 'M', 'ὼ'), + (0x1FFB, 'M', 'ώ'), + (0x1FFC, 'M', 'ωι'), + (0x1FFD, '3', ' ́'), + (0x1FFE, '3', ' ̔'), (0x1FFF, 'X'), - (0x2000, '3', u' '), + (0x2000, '3', ' '), (0x200B, 'I'), - (0x200C, 'D', u''), + (0x200C, 'D', ''), (0x200E, 'X'), (0x2010, 'V'), - (0x2011, 'M', u'‐'), + (0x2011, 'M', '‐'), (0x2012, 'V'), - (0x2017, '3', u' ̳'), + (0x2017, '3', ' ̳'), (0x2018, 'V'), (0x2024, 'X'), (0x2027, 'V'), (0x2028, 'X'), - (0x202F, '3', u' '), + (0x202F, '3', ' '), (0x2030, 'V'), - (0x2033, 'M', u'′′'), - (0x2034, 'M', u'′′′'), + (0x2033, 'M', '′′'), + (0x2034, 'M', '′′′'), (0x2035, 'V'), - (0x2036, 'M', u'‵‵'), - (0x2037, 'M', u'‵‵‵'), + (0x2036, 'M', '‵‵'), + (0x2037, 'M', '‵‵‵'), (0x2038, 'V'), - (0x203C, '3', u'!!'), + (0x203C, '3', '!!'), (0x203D, 'V'), - (0x203E, '3', u' ̅'), + (0x203E, '3', ' ̅'), (0x203F, 'V'), - (0x2047, '3', u'??'), - (0x2048, '3', u'?!'), - (0x2049, '3', u'!?'), + (0x2047, '3', '??'), + (0x2048, '3', '?!'), + (0x2049, '3', '!?'), (0x204A, 'V'), - (0x2057, 'M', u'′′′′'), + (0x2057, 'M', '′′′′'), (0x2058, 'V'), - (0x205F, '3', u' '), + (0x205F, '3', ' '), (0x2060, 'I'), (0x2061, 'X'), (0x2064, 'I'), (0x2065, 'X'), - (0x2070, 'M', u'0'), - (0x2071, 'M', u'i'), + (0x2070, 'M', '0'), + (0x2071, 'M', 'i'), (0x2072, 'X'), - (0x2074, 'M', u'4'), - (0x2075, 'M', u'5'), - (0x2076, 'M', u'6'), - (0x2077, 'M', u'7'), - (0x2078, 'M', u'8'), - (0x2079, 'M', u'9'), - (0x207A, '3', u'+'), - (0x207B, 'M', u'−'), - (0x207C, '3', u'='), - (0x207D, '3', u'('), - (0x207E, '3', u')'), - (0x207F, 'M', u'n'), - (0x2080, 'M', u'0'), - (0x2081, 'M', u'1'), - (0x2082, 'M', u'2'), - (0x2083, 'M', u'3'), - (0x2084, 'M', u'4'), - (0x2085, 'M', u'5'), - (0x2086, 'M', u'6'), - (0x2087, 'M', u'7'), - (0x2088, 'M', u'8'), - (0x2089, 'M', u'9'), - (0x208A, '3', u'+'), - (0x208B, 'M', u'−'), - (0x208C, '3', u'='), - (0x208D, '3', u'('), - (0x208E, '3', u')'), + (0x2074, 'M', '4'), + (0x2075, 'M', '5'), + (0x2076, 'M', '6'), + (0x2077, 'M', '7'), + (0x2078, 'M', '8'), + (0x2079, 'M', '9'), + (0x207A, '3', '+'), + (0x207B, 'M', '−'), + (0x207C, '3', '='), + (0x207D, '3', '('), + (0x207E, '3', ')'), + (0x207F, 'M', 'n'), + (0x2080, 'M', '0'), + (0x2081, 'M', '1'), + (0x2082, 'M', '2'), + (0x2083, 'M', '3'), + (0x2084, 'M', '4'), + (0x2085, 'M', '5'), + (0x2086, 'M', '6'), + (0x2087, 'M', '7'), + (0x2088, 'M', '8'), + (0x2089, 'M', '9'), + (0x208A, '3', '+'), + (0x208B, 'M', '−'), + (0x208C, '3', '='), + (0x208D, '3', '('), + (0x208E, '3', ')'), (0x208F, 'X'), - (0x2090, 'M', u'a'), - (0x2091, 'M', u'e'), - (0x2092, 'M', u'o'), - (0x2093, 'M', u'x'), - (0x2094, 'M', u'ə'), - (0x2095, 'M', u'h'), - (0x2096, 'M', u'k'), - (0x2097, 'M', u'l'), - (0x2098, 'M', u'm'), - (0x2099, 'M', u'n'), - (0x209A, 'M', u'p'), - (0x209B, 'M', u's'), - (0x209C, 'M', u't'), + (0x2090, 'M', 'a'), + (0x2091, 'M', 'e'), + (0x2092, 'M', 'o'), + (0x2093, 'M', 'x'), + (0x2094, 'M', 'ə'), + (0x2095, 'M', 'h'), + (0x2096, 'M', 'k'), + (0x2097, 'M', 'l'), + (0x2098, 'M', 'm'), + (0x2099, 'M', 'n'), + (0x209A, 'M', 'p'), + (0x209B, 'M', 's'), + (0x209C, 'M', 't'), (0x209D, 'X'), (0x20A0, 'V'), - (0x20A8, 'M', u'rs'), + (0x20A8, 'M', 'rs'), (0x20A9, 'V'), - (0x20C0, 'X'), + (0x20C1, 'X'), (0x20D0, 'V'), (0x20F1, 'X'), - (0x2100, '3', u'a/c'), - (0x2101, '3', u'a/s'), - ] - -def _seg_22(): - return [ - (0x2102, 'M', u'c'), - (0x2103, 'M', u'°c'), + (0x2100, '3', 'a/c'), + (0x2101, '3', 'a/s'), + (0x2102, 'M', 'c'), + (0x2103, 'M', '°c'), (0x2104, 'V'), - (0x2105, '3', u'c/o'), - (0x2106, '3', u'c/u'), - (0x2107, 'M', u'ɛ'), - (0x2108, 'V'), - (0x2109, 'M', u'°f'), - (0x210A, 'M', u'g'), - (0x210B, 'M', u'h'), - (0x210F, 'M', u'ħ'), - (0x2110, 'M', u'i'), - (0x2112, 'M', u'l'), - (0x2114, 'V'), - (0x2115, 'M', u'n'), - (0x2116, 'M', u'no'), - (0x2117, 'V'), - (0x2119, 'M', u'p'), - (0x211A, 'M', u'q'), - (0x211B, 'M', u'r'), - (0x211E, 'V'), - (0x2120, 'M', u'sm'), - (0x2121, 'M', u'tel'), - (0x2122, 'M', u'tm'), - (0x2123, 'V'), - (0x2124, 'M', u'z'), - (0x2125, 'V'), - (0x2126, 'M', u'ω'), - (0x2127, 'V'), - (0x2128, 'M', u'z'), - (0x2129, 'V'), - (0x212A, 'M', u'k'), - (0x212B, 'M', u'å'), - (0x212C, 'M', u'b'), - (0x212D, 'M', u'c'), - (0x212E, 'V'), - (0x212F, 'M', u'e'), - (0x2131, 'M', u'f'), - (0x2132, 'X'), - (0x2133, 'M', u'm'), - (0x2134, 'M', u'o'), - (0x2135, 'M', u'א'), - (0x2136, 'M', u'ב'), - (0x2137, 'M', u'ג'), - (0x2138, 'M', u'ד'), - (0x2139, 'M', u'i'), - (0x213A, 'V'), - (0x213B, 'M', u'fax'), - (0x213C, 'M', u'π'), - (0x213D, 'M', u'γ'), - (0x213F, 'M', u'π'), - (0x2140, 'M', u'∑'), - (0x2141, 'V'), - (0x2145, 'M', u'd'), - (0x2147, 'M', u'e'), - (0x2148, 'M', u'i'), - (0x2149, 'M', u'j'), - (0x214A, 'V'), - (0x2150, 'M', u'1⁄7'), - (0x2151, 'M', u'1⁄9'), - (0x2152, 'M', u'1⁄10'), - (0x2153, 'M', u'1⁄3'), - (0x2154, 'M', u'2⁄3'), - (0x2155, 'M', u'1⁄5'), - (0x2156, 'M', u'2⁄5'), - (0x2157, 'M', u'3⁄5'), - (0x2158, 'M', u'4⁄5'), - (0x2159, 'M', u'1⁄6'), - (0x215A, 'M', u'5⁄6'), - (0x215B, 'M', u'1⁄8'), - (0x215C, 'M', u'3⁄8'), - (0x215D, 'M', u'5⁄8'), - (0x215E, 'M', u'7⁄8'), - (0x215F, 'M', u'1⁄'), - (0x2160, 'M', u'i'), - (0x2161, 'M', u'ii'), - (0x2162, 'M', u'iii'), - (0x2163, 'M', u'iv'), - (0x2164, 'M', u'v'), - (0x2165, 'M', u'vi'), - (0x2166, 'M', u'vii'), - (0x2167, 'M', u'viii'), - (0x2168, 'M', u'ix'), - (0x2169, 'M', u'x'), - (0x216A, 'M', u'xi'), - (0x216B, 'M', u'xii'), - (0x216C, 'M', u'l'), - (0x216D, 'M', u'c'), - (0x216E, 'M', u'd'), - (0x216F, 'M', u'm'), - (0x2170, 'M', u'i'), - (0x2171, 'M', u'ii'), - (0x2172, 'M', u'iii'), - (0x2173, 'M', u'iv'), - (0x2174, 'M', u'v'), - (0x2175, 'M', u'vi'), - (0x2176, 'M', u'vii'), - (0x2177, 'M', u'viii'), - (0x2178, 'M', u'ix'), - (0x2179, 'M', u'x'), ] -def _seg_23(): +def _seg_22() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x217A, 'M', u'xi'), - (0x217B, 'M', u'xii'), - (0x217C, 'M', u'l'), - (0x217D, 'M', u'c'), - (0x217E, 'M', u'd'), - (0x217F, 'M', u'm'), + (0x2105, '3', 'c/o'), + (0x2106, '3', 'c/u'), + (0x2107, 'M', 'ɛ'), + (0x2108, 'V'), + (0x2109, 'M', '°f'), + (0x210A, 'M', 'g'), + (0x210B, 'M', 'h'), + (0x210F, 'M', 'ħ'), + (0x2110, 'M', 'i'), + (0x2112, 'M', 'l'), + (0x2114, 'V'), + (0x2115, 'M', 'n'), + (0x2116, 'M', 'no'), + (0x2117, 'V'), + (0x2119, 'M', 'p'), + (0x211A, 'M', 'q'), + (0x211B, 'M', 'r'), + (0x211E, 'V'), + (0x2120, 'M', 'sm'), + (0x2121, 'M', 'tel'), + (0x2122, 'M', 'tm'), + (0x2123, 'V'), + (0x2124, 'M', 'z'), + (0x2125, 'V'), + (0x2126, 'M', 'ω'), + (0x2127, 'V'), + (0x2128, 'M', 'z'), + (0x2129, 'V'), + (0x212A, 'M', 'k'), + (0x212B, 'M', 'å'), + (0x212C, 'M', 'b'), + (0x212D, 'M', 'c'), + (0x212E, 'V'), + (0x212F, 'M', 'e'), + (0x2131, 'M', 'f'), + (0x2132, 'X'), + (0x2133, 'M', 'm'), + (0x2134, 'M', 'o'), + (0x2135, 'M', 'א'), + (0x2136, 'M', 'ב'), + (0x2137, 'M', 'ג'), + (0x2138, 'M', 'ד'), + (0x2139, 'M', 'i'), + (0x213A, 'V'), + (0x213B, 'M', 'fax'), + (0x213C, 'M', 'π'), + (0x213D, 'M', 'γ'), + (0x213F, 'M', 'π'), + (0x2140, 'M', '∑'), + (0x2141, 'V'), + (0x2145, 'M', 'd'), + (0x2147, 'M', 'e'), + (0x2148, 'M', 'i'), + (0x2149, 'M', 'j'), + (0x214A, 'V'), + (0x2150, 'M', '1⁄7'), + (0x2151, 'M', '1⁄9'), + (0x2152, 'M', '1⁄10'), + (0x2153, 'M', '1⁄3'), + (0x2154, 'M', '2⁄3'), + (0x2155, 'M', '1⁄5'), + (0x2156, 'M', '2⁄5'), + (0x2157, 'M', '3⁄5'), + (0x2158, 'M', '4⁄5'), + (0x2159, 'M', '1⁄6'), + (0x215A, 'M', '5⁄6'), + (0x215B, 'M', '1⁄8'), + (0x215C, 'M', '3⁄8'), + (0x215D, 'M', '5⁄8'), + (0x215E, 'M', '7⁄8'), + (0x215F, 'M', '1⁄'), + (0x2160, 'M', 'i'), + (0x2161, 'M', 'ii'), + (0x2162, 'M', 'iii'), + (0x2163, 'M', 'iv'), + (0x2164, 'M', 'v'), + (0x2165, 'M', 'vi'), + (0x2166, 'M', 'vii'), + (0x2167, 'M', 'viii'), + (0x2168, 'M', 'ix'), + (0x2169, 'M', 'x'), + (0x216A, 'M', 'xi'), + (0x216B, 'M', 'xii'), + (0x216C, 'M', 'l'), + (0x216D, 'M', 'c'), + (0x216E, 'M', 'd'), + (0x216F, 'M', 'm'), + (0x2170, 'M', 'i'), + (0x2171, 'M', 'ii'), + (0x2172, 'M', 'iii'), + (0x2173, 'M', 'iv'), + (0x2174, 'M', 'v'), + (0x2175, 'M', 'vi'), + (0x2176, 'M', 'vii'), + (0x2177, 'M', 'viii'), + (0x2178, 'M', 'ix'), + (0x2179, 'M', 'x'), + (0x217A, 'M', 'xi'), + (0x217B, 'M', 'xii'), + (0x217C, 'M', 'l'), + ] + +def _seg_23() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x217D, 'M', 'c'), + (0x217E, 'M', 'd'), + (0x217F, 'M', 'm'), (0x2180, 'V'), (0x2183, 'X'), (0x2184, 'V'), - (0x2189, 'M', u'0⁄3'), + (0x2189, 'M', '0⁄3'), (0x218A, 'V'), (0x218C, 'X'), (0x2190, 'V'), - (0x222C, 'M', u'∫∫'), - (0x222D, 'M', u'∫∫∫'), + (0x222C, 'M', '∫∫'), + (0x222D, 'M', '∫∫∫'), (0x222E, 'V'), - (0x222F, 'M', u'∮∮'), - (0x2230, 'M', u'∮∮∮'), + (0x222F, 'M', '∮∮'), + (0x2230, 'M', '∮∮∮'), (0x2231, 'V'), (0x2260, '3'), (0x2261, 'V'), (0x226E, '3'), (0x2270, 'V'), - (0x2329, 'M', u'〈'), - (0x232A, 'M', u'〉'), + (0x2329, 'M', '〈'), + (0x232A, 'M', '〉'), (0x232B, 'V'), (0x2427, 'X'), (0x2440, 'V'), (0x244B, 'X'), - (0x2460, 'M', u'1'), - (0x2461, 'M', u'2'), - (0x2462, 'M', u'3'), - (0x2463, 'M', u'4'), - (0x2464, 'M', u'5'), - (0x2465, 'M', u'6'), - (0x2466, 'M', u'7'), - (0x2467, 'M', u'8'), - (0x2468, 'M', u'9'), - (0x2469, 'M', u'10'), - (0x246A, 'M', u'11'), - (0x246B, 'M', u'12'), - (0x246C, 'M', u'13'), - (0x246D, 'M', u'14'), - (0x246E, 'M', u'15'), - (0x246F, 'M', u'16'), - (0x2470, 'M', u'17'), - (0x2471, 'M', u'18'), - (0x2472, 'M', u'19'), - (0x2473, 'M', u'20'), - (0x2474, '3', u'(1)'), - (0x2475, '3', u'(2)'), - (0x2476, '3', u'(3)'), - (0x2477, '3', u'(4)'), - (0x2478, '3', u'(5)'), - (0x2479, '3', u'(6)'), - (0x247A, '3', u'(7)'), - (0x247B, '3', u'(8)'), - (0x247C, '3', u'(9)'), - (0x247D, '3', u'(10)'), - (0x247E, '3', u'(11)'), - (0x247F, '3', u'(12)'), - (0x2480, '3', u'(13)'), - (0x2481, '3', u'(14)'), - (0x2482, '3', u'(15)'), - (0x2483, '3', u'(16)'), - (0x2484, '3', u'(17)'), - (0x2485, '3', u'(18)'), - (0x2486, '3', u'(19)'), - (0x2487, '3', u'(20)'), + (0x2460, 'M', '1'), + (0x2461, 'M', '2'), + (0x2462, 'M', '3'), + (0x2463, 'M', '4'), + (0x2464, 'M', '5'), + (0x2465, 'M', '6'), + (0x2466, 'M', '7'), + (0x2467, 'M', '8'), + (0x2468, 'M', '9'), + (0x2469, 'M', '10'), + (0x246A, 'M', '11'), + (0x246B, 'M', '12'), + (0x246C, 'M', '13'), + (0x246D, 'M', '14'), + (0x246E, 'M', '15'), + (0x246F, 'M', '16'), + (0x2470, 'M', '17'), + (0x2471, 'M', '18'), + (0x2472, 'M', '19'), + (0x2473, 'M', '20'), + (0x2474, '3', '(1)'), + (0x2475, '3', '(2)'), + (0x2476, '3', '(3)'), + (0x2477, '3', '(4)'), + (0x2478, '3', '(5)'), + (0x2479, '3', '(6)'), + (0x247A, '3', '(7)'), + (0x247B, '3', '(8)'), + (0x247C, '3', '(9)'), + (0x247D, '3', '(10)'), + (0x247E, '3', '(11)'), + (0x247F, '3', '(12)'), + (0x2480, '3', '(13)'), + (0x2481, '3', '(14)'), + (0x2482, '3', '(15)'), + (0x2483, '3', '(16)'), + (0x2484, '3', '(17)'), + (0x2485, '3', '(18)'), + (0x2486, '3', '(19)'), + (0x2487, '3', '(20)'), (0x2488, 'X'), - (0x249C, '3', u'(a)'), - (0x249D, '3', u'(b)'), - (0x249E, '3', u'(c)'), - (0x249F, '3', u'(d)'), - (0x24A0, '3', u'(e)'), - (0x24A1, '3', u'(f)'), - (0x24A2, '3', u'(g)'), - (0x24A3, '3', u'(h)'), - (0x24A4, '3', u'(i)'), - (0x24A5, '3', u'(j)'), - (0x24A6, '3', u'(k)'), - (0x24A7, '3', u'(l)'), - (0x24A8, '3', u'(m)'), - (0x24A9, '3', u'(n)'), - (0x24AA, '3', u'(o)'), - (0x24AB, '3', u'(p)'), - (0x24AC, '3', u'(q)'), - (0x24AD, '3', u'(r)'), - (0x24AE, '3', u'(s)'), - (0x24AF, '3', u'(t)'), - (0x24B0, '3', u'(u)'), - (0x24B1, '3', u'(v)'), - (0x24B2, '3', u'(w)'), - (0x24B3, '3', u'(x)'), - (0x24B4, '3', u'(y)'), - (0x24B5, '3', u'(z)'), - (0x24B6, 'M', u'a'), - (0x24B7, 'M', u'b'), - (0x24B8, 'M', u'c'), - (0x24B9, 'M', u'd'), + (0x249C, '3', '(a)'), + (0x249D, '3', '(b)'), + (0x249E, '3', '(c)'), + (0x249F, '3', '(d)'), + (0x24A0, '3', '(e)'), + (0x24A1, '3', '(f)'), + (0x24A2, '3', '(g)'), + (0x24A3, '3', '(h)'), + (0x24A4, '3', '(i)'), + (0x24A5, '3', '(j)'), + (0x24A6, '3', '(k)'), + (0x24A7, '3', '(l)'), + (0x24A8, '3', '(m)'), + (0x24A9, '3', '(n)'), + (0x24AA, '3', '(o)'), + (0x24AB, '3', '(p)'), + (0x24AC, '3', '(q)'), + (0x24AD, '3', '(r)'), + (0x24AE, '3', '(s)'), + (0x24AF, '3', '(t)'), + (0x24B0, '3', '(u)'), + (0x24B1, '3', '(v)'), + (0x24B2, '3', '(w)'), + (0x24B3, '3', '(x)'), + (0x24B4, '3', '(y)'), + (0x24B5, '3', '(z)'), + (0x24B6, 'M', 'a'), + (0x24B7, 'M', 'b'), + (0x24B8, 'M', 'c'), + (0x24B9, 'M', 'd'), + (0x24BA, 'M', 'e'), + (0x24BB, 'M', 'f'), + (0x24BC, 'M', 'g'), ] -def _seg_24(): +def _seg_24() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x24BA, 'M', u'e'), - (0x24BB, 'M', u'f'), - (0x24BC, 'M', u'g'), - (0x24BD, 'M', u'h'), - (0x24BE, 'M', u'i'), - (0x24BF, 'M', u'j'), - (0x24C0, 'M', u'k'), - (0x24C1, 'M', u'l'), - (0x24C2, 'M', u'm'), - (0x24C3, 'M', u'n'), - (0x24C4, 'M', u'o'), - (0x24C5, 'M', u'p'), - (0x24C6, 'M', u'q'), - (0x24C7, 'M', u'r'), - (0x24C8, 'M', u's'), - (0x24C9, 'M', u't'), - (0x24CA, 'M', u'u'), - (0x24CB, 'M', u'v'), - (0x24CC, 'M', u'w'), - (0x24CD, 'M', u'x'), - (0x24CE, 'M', u'y'), - (0x24CF, 'M', u'z'), - (0x24D0, 'M', u'a'), - (0x24D1, 'M', u'b'), - (0x24D2, 'M', u'c'), - (0x24D3, 'M', u'd'), - (0x24D4, 'M', u'e'), - (0x24D5, 'M', u'f'), - (0x24D6, 'M', u'g'), - (0x24D7, 'M', u'h'), - (0x24D8, 'M', u'i'), - (0x24D9, 'M', u'j'), - (0x24DA, 'M', u'k'), - (0x24DB, 'M', u'l'), - (0x24DC, 'M', u'm'), - (0x24DD, 'M', u'n'), - (0x24DE, 'M', u'o'), - (0x24DF, 'M', u'p'), - (0x24E0, 'M', u'q'), - (0x24E1, 'M', u'r'), - (0x24E2, 'M', u's'), - (0x24E3, 'M', u't'), - (0x24E4, 'M', u'u'), - (0x24E5, 'M', u'v'), - (0x24E6, 'M', u'w'), - (0x24E7, 'M', u'x'), - (0x24E8, 'M', u'y'), - (0x24E9, 'M', u'z'), - (0x24EA, 'M', u'0'), + (0x24BD, 'M', 'h'), + (0x24BE, 'M', 'i'), + (0x24BF, 'M', 'j'), + (0x24C0, 'M', 'k'), + (0x24C1, 'M', 'l'), + (0x24C2, 'M', 'm'), + (0x24C3, 'M', 'n'), + (0x24C4, 'M', 'o'), + (0x24C5, 'M', 'p'), + (0x24C6, 'M', 'q'), + (0x24C7, 'M', 'r'), + (0x24C8, 'M', 's'), + (0x24C9, 'M', 't'), + (0x24CA, 'M', 'u'), + (0x24CB, 'M', 'v'), + (0x24CC, 'M', 'w'), + (0x24CD, 'M', 'x'), + (0x24CE, 'M', 'y'), + (0x24CF, 'M', 'z'), + (0x24D0, 'M', 'a'), + (0x24D1, 'M', 'b'), + (0x24D2, 'M', 'c'), + (0x24D3, 'M', 'd'), + (0x24D4, 'M', 'e'), + (0x24D5, 'M', 'f'), + (0x24D6, 'M', 'g'), + (0x24D7, 'M', 'h'), + (0x24D8, 'M', 'i'), + (0x24D9, 'M', 'j'), + (0x24DA, 'M', 'k'), + (0x24DB, 'M', 'l'), + (0x24DC, 'M', 'm'), + (0x24DD, 'M', 'n'), + (0x24DE, 'M', 'o'), + (0x24DF, 'M', 'p'), + (0x24E0, 'M', 'q'), + (0x24E1, 'M', 'r'), + (0x24E2, 'M', 's'), + (0x24E3, 'M', 't'), + (0x24E4, 'M', 'u'), + (0x24E5, 'M', 'v'), + (0x24E6, 'M', 'w'), + (0x24E7, 'M', 'x'), + (0x24E8, 'M', 'y'), + (0x24E9, 'M', 'z'), + (0x24EA, 'M', '0'), (0x24EB, 'V'), - (0x2A0C, 'M', u'∫∫∫∫'), + (0x2A0C, 'M', '∫∫∫∫'), (0x2A0D, 'V'), - (0x2A74, '3', u'::='), - (0x2A75, '3', u'=='), - (0x2A76, '3', u'==='), + (0x2A74, '3', '::='), + (0x2A75, '3', '=='), + (0x2A76, '3', '==='), (0x2A77, 'V'), - (0x2ADC, 'M', u'⫝̸'), + (0x2ADC, 'M', '⫝̸'), (0x2ADD, 'V'), (0x2B74, 'X'), (0x2B76, 'V'), (0x2B96, 'X'), (0x2B97, 'V'), - (0x2C00, 'M', u'ⰰ'), - (0x2C01, 'M', u'ⰱ'), - (0x2C02, 'M', u'ⰲ'), - (0x2C03, 'M', u'ⰳ'), - (0x2C04, 'M', u'ⰴ'), - (0x2C05, 'M', u'ⰵ'), - (0x2C06, 'M', u'ⰶ'), - (0x2C07, 'M', u'ⰷ'), - (0x2C08, 'M', u'ⰸ'), - (0x2C09, 'M', u'ⰹ'), - (0x2C0A, 'M', u'ⰺ'), - (0x2C0B, 'M', u'ⰻ'), - (0x2C0C, 'M', u'ⰼ'), - (0x2C0D, 'M', u'ⰽ'), - (0x2C0E, 'M', u'ⰾ'), - (0x2C0F, 'M', u'ⰿ'), - (0x2C10, 'M', u'ⱀ'), - (0x2C11, 'M', u'ⱁ'), - (0x2C12, 'M', u'ⱂ'), - (0x2C13, 'M', u'ⱃ'), - (0x2C14, 'M', u'ⱄ'), - (0x2C15, 'M', u'ⱅ'), - (0x2C16, 'M', u'ⱆ'), - (0x2C17, 'M', u'ⱇ'), - (0x2C18, 'M', u'ⱈ'), - (0x2C19, 'M', u'ⱉ'), - (0x2C1A, 'M', u'ⱊ'), - (0x2C1B, 'M', u'ⱋ'), - (0x2C1C, 'M', u'ⱌ'), - (0x2C1D, 'M', u'ⱍ'), - (0x2C1E, 'M', u'ⱎ'), - (0x2C1F, 'M', u'ⱏ'), - (0x2C20, 'M', u'ⱐ'), - (0x2C21, 'M', u'ⱑ'), - (0x2C22, 'M', u'ⱒ'), - (0x2C23, 'M', u'ⱓ'), - (0x2C24, 'M', u'ⱔ'), - (0x2C25, 'M', u'ⱕ'), + (0x2C00, 'M', 'ⰰ'), + (0x2C01, 'M', 'ⰱ'), + (0x2C02, 'M', 'ⰲ'), + (0x2C03, 'M', 'ⰳ'), + (0x2C04, 'M', 'ⰴ'), + (0x2C05, 'M', 'ⰵ'), + (0x2C06, 'M', 'ⰶ'), + (0x2C07, 'M', 'ⰷ'), + (0x2C08, 'M', 'ⰸ'), + (0x2C09, 'M', 'ⰹ'), + (0x2C0A, 'M', 'ⰺ'), + (0x2C0B, 'M', 'ⰻ'), + (0x2C0C, 'M', 'ⰼ'), + (0x2C0D, 'M', 'ⰽ'), + (0x2C0E, 'M', 'ⰾ'), + (0x2C0F, 'M', 'ⰿ'), + (0x2C10, 'M', 'ⱀ'), + (0x2C11, 'M', 'ⱁ'), + (0x2C12, 'M', 'ⱂ'), + (0x2C13, 'M', 'ⱃ'), + (0x2C14, 'M', 'ⱄ'), + (0x2C15, 'M', 'ⱅ'), + (0x2C16, 'M', 'ⱆ'), + (0x2C17, 'M', 'ⱇ'), + (0x2C18, 'M', 'ⱈ'), + (0x2C19, 'M', 'ⱉ'), + (0x2C1A, 'M', 'ⱊ'), + (0x2C1B, 'M', 'ⱋ'), + (0x2C1C, 'M', 'ⱌ'), + (0x2C1D, 'M', 'ⱍ'), + (0x2C1E, 'M', 'ⱎ'), + (0x2C1F, 'M', 'ⱏ'), + (0x2C20, 'M', 'ⱐ'), + (0x2C21, 'M', 'ⱑ'), + (0x2C22, 'M', 'ⱒ'), + (0x2C23, 'M', 'ⱓ'), + (0x2C24, 'M', 'ⱔ'), + (0x2C25, 'M', 'ⱕ'), + (0x2C26, 'M', 'ⱖ'), + (0x2C27, 'M', 'ⱗ'), + (0x2C28, 'M', 'ⱘ'), ] -def _seg_25(): +def _seg_25() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2C26, 'M', u'ⱖ'), - (0x2C27, 'M', u'ⱗ'), - (0x2C28, 'M', u'ⱘ'), - (0x2C29, 'M', u'ⱙ'), - (0x2C2A, 'M', u'ⱚ'), - (0x2C2B, 'M', u'ⱛ'), - (0x2C2C, 'M', u'ⱜ'), - (0x2C2D, 'M', u'ⱝ'), - (0x2C2E, 'M', u'ⱞ'), - (0x2C2F, 'X'), + (0x2C29, 'M', 'ⱙ'), + (0x2C2A, 'M', 'ⱚ'), + (0x2C2B, 'M', 'ⱛ'), + (0x2C2C, 'M', 'ⱜ'), + (0x2C2D, 'M', 'ⱝ'), + (0x2C2E, 'M', 'ⱞ'), + (0x2C2F, 'M', 'ⱟ'), (0x2C30, 'V'), - (0x2C5F, 'X'), - (0x2C60, 'M', u'ⱡ'), + (0x2C60, 'M', 'ⱡ'), (0x2C61, 'V'), - (0x2C62, 'M', u'ɫ'), - (0x2C63, 'M', u'ᵽ'), - (0x2C64, 'M', u'ɽ'), + (0x2C62, 'M', 'ɫ'), + (0x2C63, 'M', 'ᵽ'), + (0x2C64, 'M', 'ɽ'), (0x2C65, 'V'), - (0x2C67, 'M', u'ⱨ'), + (0x2C67, 'M', 'ⱨ'), (0x2C68, 'V'), - (0x2C69, 'M', u'ⱪ'), + (0x2C69, 'M', 'ⱪ'), (0x2C6A, 'V'), - (0x2C6B, 'M', u'ⱬ'), + (0x2C6B, 'M', 'ⱬ'), (0x2C6C, 'V'), - (0x2C6D, 'M', u'ɑ'), - (0x2C6E, 'M', u'ɱ'), - (0x2C6F, 'M', u'ɐ'), - (0x2C70, 'M', u'ɒ'), + (0x2C6D, 'M', 'ɑ'), + (0x2C6E, 'M', 'ɱ'), + (0x2C6F, 'M', 'ɐ'), + (0x2C70, 'M', 'ɒ'), (0x2C71, 'V'), - (0x2C72, 'M', u'ⱳ'), + (0x2C72, 'M', 'ⱳ'), (0x2C73, 'V'), - (0x2C75, 'M', u'ⱶ'), + (0x2C75, 'M', 'ⱶ'), (0x2C76, 'V'), - (0x2C7C, 'M', u'j'), - (0x2C7D, 'M', u'v'), - (0x2C7E, 'M', u'ȿ'), - (0x2C7F, 'M', u'ɀ'), - (0x2C80, 'M', u'ⲁ'), + (0x2C7C, 'M', 'j'), + (0x2C7D, 'M', 'v'), + (0x2C7E, 'M', 'ȿ'), + (0x2C7F, 'M', 'ɀ'), + (0x2C80, 'M', 'ⲁ'), (0x2C81, 'V'), - (0x2C82, 'M', u'ⲃ'), + (0x2C82, 'M', 'ⲃ'), (0x2C83, 'V'), - (0x2C84, 'M', u'ⲅ'), + (0x2C84, 'M', 'ⲅ'), (0x2C85, 'V'), - (0x2C86, 'M', u'ⲇ'), + (0x2C86, 'M', 'ⲇ'), (0x2C87, 'V'), - (0x2C88, 'M', u'ⲉ'), + (0x2C88, 'M', 'ⲉ'), (0x2C89, 'V'), - (0x2C8A, 'M', u'ⲋ'), + (0x2C8A, 'M', 'ⲋ'), (0x2C8B, 'V'), - (0x2C8C, 'M', u'ⲍ'), + (0x2C8C, 'M', 'ⲍ'), (0x2C8D, 'V'), - (0x2C8E, 'M', u'ⲏ'), + (0x2C8E, 'M', 'ⲏ'), (0x2C8F, 'V'), - (0x2C90, 'M', u'ⲑ'), + (0x2C90, 'M', 'ⲑ'), (0x2C91, 'V'), - (0x2C92, 'M', u'ⲓ'), + (0x2C92, 'M', 'ⲓ'), (0x2C93, 'V'), - (0x2C94, 'M', u'ⲕ'), + (0x2C94, 'M', 'ⲕ'), (0x2C95, 'V'), - (0x2C96, 'M', u'ⲗ'), + (0x2C96, 'M', 'ⲗ'), (0x2C97, 'V'), - (0x2C98, 'M', u'ⲙ'), + (0x2C98, 'M', 'ⲙ'), (0x2C99, 'V'), - (0x2C9A, 'M', u'ⲛ'), + (0x2C9A, 'M', 'ⲛ'), (0x2C9B, 'V'), - (0x2C9C, 'M', u'ⲝ'), + (0x2C9C, 'M', 'ⲝ'), (0x2C9D, 'V'), - (0x2C9E, 'M', u'ⲟ'), + (0x2C9E, 'M', 'ⲟ'), (0x2C9F, 'V'), - (0x2CA0, 'M', u'ⲡ'), + (0x2CA0, 'M', 'ⲡ'), (0x2CA1, 'V'), - (0x2CA2, 'M', u'ⲣ'), + (0x2CA2, 'M', 'ⲣ'), (0x2CA3, 'V'), - (0x2CA4, 'M', u'ⲥ'), + (0x2CA4, 'M', 'ⲥ'), (0x2CA5, 'V'), - (0x2CA6, 'M', u'ⲧ'), + (0x2CA6, 'M', 'ⲧ'), (0x2CA7, 'V'), - (0x2CA8, 'M', u'ⲩ'), + (0x2CA8, 'M', 'ⲩ'), (0x2CA9, 'V'), - (0x2CAA, 'M', u'ⲫ'), + (0x2CAA, 'M', 'ⲫ'), (0x2CAB, 'V'), - (0x2CAC, 'M', u'ⲭ'), + (0x2CAC, 'M', 'ⲭ'), (0x2CAD, 'V'), - (0x2CAE, 'M', u'ⲯ'), + (0x2CAE, 'M', 'ⲯ'), (0x2CAF, 'V'), - (0x2CB0, 'M', u'ⲱ'), + (0x2CB0, 'M', 'ⲱ'), (0x2CB1, 'V'), - (0x2CB2, 'M', u'ⲳ'), + (0x2CB2, 'M', 'ⲳ'), (0x2CB3, 'V'), - (0x2CB4, 'M', u'ⲵ'), + (0x2CB4, 'M', 'ⲵ'), (0x2CB5, 'V'), - (0x2CB6, 'M', u'ⲷ'), + (0x2CB6, 'M', 'ⲷ'), (0x2CB7, 'V'), - (0x2CB8, 'M', u'ⲹ'), + (0x2CB8, 'M', 'ⲹ'), (0x2CB9, 'V'), - (0x2CBA, 'M', u'ⲻ'), + (0x2CBA, 'M', 'ⲻ'), (0x2CBB, 'V'), - (0x2CBC, 'M', u'ⲽ'), + (0x2CBC, 'M', 'ⲽ'), (0x2CBD, 'V'), - (0x2CBE, 'M', u'ⲿ'), + (0x2CBE, 'M', 'ⲿ'), + (0x2CBF, 'V'), + (0x2CC0, 'M', 'ⳁ'), + (0x2CC1, 'V'), + (0x2CC2, 'M', 'ⳃ'), ] -def _seg_26(): +def _seg_26() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2CBF, 'V'), - (0x2CC0, 'M', u'ⳁ'), - (0x2CC1, 'V'), - (0x2CC2, 'M', u'ⳃ'), (0x2CC3, 'V'), - (0x2CC4, 'M', u'ⳅ'), + (0x2CC4, 'M', 'ⳅ'), (0x2CC5, 'V'), - (0x2CC6, 'M', u'ⳇ'), + (0x2CC6, 'M', 'ⳇ'), (0x2CC7, 'V'), - (0x2CC8, 'M', u'ⳉ'), + (0x2CC8, 'M', 'ⳉ'), (0x2CC9, 'V'), - (0x2CCA, 'M', u'ⳋ'), + (0x2CCA, 'M', 'ⳋ'), (0x2CCB, 'V'), - (0x2CCC, 'M', u'ⳍ'), + (0x2CCC, 'M', 'ⳍ'), (0x2CCD, 'V'), - (0x2CCE, 'M', u'ⳏ'), + (0x2CCE, 'M', 'ⳏ'), (0x2CCF, 'V'), - (0x2CD0, 'M', u'ⳑ'), + (0x2CD0, 'M', 'ⳑ'), (0x2CD1, 'V'), - (0x2CD2, 'M', u'ⳓ'), + (0x2CD2, 'M', 'ⳓ'), (0x2CD3, 'V'), - (0x2CD4, 'M', u'ⳕ'), + (0x2CD4, 'M', 'ⳕ'), (0x2CD5, 'V'), - (0x2CD6, 'M', u'ⳗ'), + (0x2CD6, 'M', 'ⳗ'), (0x2CD7, 'V'), - (0x2CD8, 'M', u'ⳙ'), + (0x2CD8, 'M', 'ⳙ'), (0x2CD9, 'V'), - (0x2CDA, 'M', u'ⳛ'), + (0x2CDA, 'M', 'ⳛ'), (0x2CDB, 'V'), - (0x2CDC, 'M', u'ⳝ'), + (0x2CDC, 'M', 'ⳝ'), (0x2CDD, 'V'), - (0x2CDE, 'M', u'ⳟ'), + (0x2CDE, 'M', 'ⳟ'), (0x2CDF, 'V'), - (0x2CE0, 'M', u'ⳡ'), + (0x2CE0, 'M', 'ⳡ'), (0x2CE1, 'V'), - (0x2CE2, 'M', u'ⳣ'), + (0x2CE2, 'M', 'ⳣ'), (0x2CE3, 'V'), - (0x2CEB, 'M', u'ⳬ'), + (0x2CEB, 'M', 'ⳬ'), (0x2CEC, 'V'), - (0x2CED, 'M', u'ⳮ'), + (0x2CED, 'M', 'ⳮ'), (0x2CEE, 'V'), - (0x2CF2, 'M', u'ⳳ'), + (0x2CF2, 'M', 'ⳳ'), (0x2CF3, 'V'), (0x2CF4, 'X'), (0x2CF9, 'V'), @@ -2763,7 +2762,7 @@ def _seg_26(): (0x2D2E, 'X'), (0x2D30, 'V'), (0x2D68, 'X'), - (0x2D6F, 'M', u'ⵡ'), + (0x2D6F, 'M', 'ⵡ'), (0x2D70, 'V'), (0x2D71, 'X'), (0x2D7F, 'V'), @@ -2785,1159 +2784,1172 @@ def _seg_26(): (0x2DD8, 'V'), (0x2DDF, 'X'), (0x2DE0, 'V'), - (0x2E53, 'X'), + (0x2E5E, 'X'), (0x2E80, 'V'), (0x2E9A, 'X'), (0x2E9B, 'V'), - (0x2E9F, 'M', u'母'), + (0x2E9F, 'M', '母'), (0x2EA0, 'V'), - (0x2EF3, 'M', u'龟'), + (0x2EF3, 'M', '龟'), (0x2EF4, 'X'), - (0x2F00, 'M', u'一'), - (0x2F01, 'M', u'丨'), - (0x2F02, 'M', u'丶'), - (0x2F03, 'M', u'丿'), - (0x2F04, 'M', u'乙'), - (0x2F05, 'M', u'亅'), - (0x2F06, 'M', u'二'), - (0x2F07, 'M', u'亠'), - (0x2F08, 'M', u'人'), - (0x2F09, 'M', u'儿'), - (0x2F0A, 'M', u'入'), - (0x2F0B, 'M', u'八'), - (0x2F0C, 'M', u'冂'), - (0x2F0D, 'M', u'冖'), - (0x2F0E, 'M', u'冫'), - (0x2F0F, 'M', u'几'), - (0x2F10, 'M', u'凵'), - (0x2F11, 'M', u'刀'), + (0x2F00, 'M', '一'), + (0x2F01, 'M', '丨'), + (0x2F02, 'M', '丶'), + (0x2F03, 'M', '丿'), + (0x2F04, 'M', '乙'), + (0x2F05, 'M', '亅'), + (0x2F06, 'M', '二'), + (0x2F07, 'M', '亠'), + (0x2F08, 'M', '人'), + (0x2F09, 'M', '儿'), + (0x2F0A, 'M', '入'), + (0x2F0B, 'M', '八'), + (0x2F0C, 'M', '冂'), + (0x2F0D, 'M', '冖'), + (0x2F0E, 'M', '冫'), + (0x2F0F, 'M', '几'), + (0x2F10, 'M', '凵'), + (0x2F11, 'M', '刀'), + (0x2F12, 'M', '力'), + (0x2F13, 'M', '勹'), + (0x2F14, 'M', '匕'), + (0x2F15, 'M', '匚'), ] -def _seg_27(): +def _seg_27() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2F12, 'M', u'力'), - (0x2F13, 'M', u'勹'), - (0x2F14, 'M', u'匕'), - (0x2F15, 'M', u'匚'), - (0x2F16, 'M', u'匸'), - (0x2F17, 'M', u'十'), - (0x2F18, 'M', u'卜'), - (0x2F19, 'M', u'卩'), - (0x2F1A, 'M', u'厂'), - (0x2F1B, 'M', u'厶'), - (0x2F1C, 'M', u'又'), - (0x2F1D, 'M', u'口'), - (0x2F1E, 'M', u'囗'), - (0x2F1F, 'M', u'土'), - (0x2F20, 'M', u'士'), - (0x2F21, 'M', u'夂'), - (0x2F22, 'M', u'夊'), - (0x2F23, 'M', u'夕'), - (0x2F24, 'M', u'大'), - (0x2F25, 'M', u'女'), - (0x2F26, 'M', u'子'), - (0x2F27, 'M', u'宀'), - (0x2F28, 'M', u'寸'), - (0x2F29, 'M', u'小'), - (0x2F2A, 'M', u'尢'), - (0x2F2B, 'M', u'尸'), - (0x2F2C, 'M', u'屮'), - (0x2F2D, 'M', u'山'), - (0x2F2E, 'M', u'巛'), - (0x2F2F, 'M', u'工'), - (0x2F30, 'M', u'己'), - (0x2F31, 'M', u'巾'), - (0x2F32, 'M', u'干'), - (0x2F33, 'M', u'幺'), - (0x2F34, 'M', u'广'), - (0x2F35, 'M', u'廴'), - (0x2F36, 'M', u'廾'), - (0x2F37, 'M', u'弋'), - (0x2F38, 'M', u'弓'), - (0x2F39, 'M', u'彐'), - (0x2F3A, 'M', u'彡'), - (0x2F3B, 'M', u'彳'), - (0x2F3C, 'M', u'心'), - (0x2F3D, 'M', u'戈'), - (0x2F3E, 'M', u'戶'), - (0x2F3F, 'M', u'手'), - (0x2F40, 'M', u'支'), - (0x2F41, 'M', u'攴'), - (0x2F42, 'M', u'文'), - (0x2F43, 'M', u'斗'), - (0x2F44, 'M', u'斤'), - (0x2F45, 'M', u'方'), - (0x2F46, 'M', u'无'), - (0x2F47, 'M', u'日'), - (0x2F48, 'M', u'曰'), - (0x2F49, 'M', u'月'), - (0x2F4A, 'M', u'木'), - (0x2F4B, 'M', u'欠'), - (0x2F4C, 'M', u'止'), - (0x2F4D, 'M', u'歹'), - (0x2F4E, 'M', u'殳'), - (0x2F4F, 'M', u'毋'), - (0x2F50, 'M', u'比'), - (0x2F51, 'M', u'毛'), - (0x2F52, 'M', u'氏'), - (0x2F53, 'M', u'气'), - (0x2F54, 'M', u'水'), - (0x2F55, 'M', u'火'), - (0x2F56, 'M', u'爪'), - (0x2F57, 'M', u'父'), - (0x2F58, 'M', u'爻'), - (0x2F59, 'M', u'爿'), - (0x2F5A, 'M', u'片'), - (0x2F5B, 'M', u'牙'), - (0x2F5C, 'M', u'牛'), - (0x2F5D, 'M', u'犬'), - (0x2F5E, 'M', u'玄'), - (0x2F5F, 'M', u'玉'), - (0x2F60, 'M', u'瓜'), - (0x2F61, 'M', u'瓦'), - (0x2F62, 'M', u'甘'), - (0x2F63, 'M', u'生'), - (0x2F64, 'M', u'用'), - (0x2F65, 'M', u'田'), - (0x2F66, 'M', u'疋'), - (0x2F67, 'M', u'疒'), - (0x2F68, 'M', u'癶'), - (0x2F69, 'M', u'白'), - (0x2F6A, 'M', u'皮'), - (0x2F6B, 'M', u'皿'), - (0x2F6C, 'M', u'目'), - (0x2F6D, 'M', u'矛'), - (0x2F6E, 'M', u'矢'), - (0x2F6F, 'M', u'石'), - (0x2F70, 'M', u'示'), - (0x2F71, 'M', u'禸'), - (0x2F72, 'M', u'禾'), - (0x2F73, 'M', u'穴'), - (0x2F74, 'M', u'立'), - (0x2F75, 'M', u'竹'), + (0x2F16, 'M', '匸'), + (0x2F17, 'M', '十'), + (0x2F18, 'M', '卜'), + (0x2F19, 'M', '卩'), + (0x2F1A, 'M', '厂'), + (0x2F1B, 'M', '厶'), + (0x2F1C, 'M', '又'), + (0x2F1D, 'M', '口'), + (0x2F1E, 'M', '囗'), + (0x2F1F, 'M', '土'), + (0x2F20, 'M', '士'), + (0x2F21, 'M', '夂'), + (0x2F22, 'M', '夊'), + (0x2F23, 'M', '夕'), + (0x2F24, 'M', '大'), + (0x2F25, 'M', '女'), + (0x2F26, 'M', '子'), + (0x2F27, 'M', '宀'), + (0x2F28, 'M', '寸'), + (0x2F29, 'M', '小'), + (0x2F2A, 'M', '尢'), + (0x2F2B, 'M', '尸'), + (0x2F2C, 'M', '屮'), + (0x2F2D, 'M', '山'), + (0x2F2E, 'M', '巛'), + (0x2F2F, 'M', '工'), + (0x2F30, 'M', '己'), + (0x2F31, 'M', '巾'), + (0x2F32, 'M', '干'), + (0x2F33, 'M', '幺'), + (0x2F34, 'M', '广'), + (0x2F35, 'M', '廴'), + (0x2F36, 'M', '廾'), + (0x2F37, 'M', '弋'), + (0x2F38, 'M', '弓'), + (0x2F39, 'M', '彐'), + (0x2F3A, 'M', '彡'), + (0x2F3B, 'M', '彳'), + (0x2F3C, 'M', '心'), + (0x2F3D, 'M', '戈'), + (0x2F3E, 'M', '戶'), + (0x2F3F, 'M', '手'), + (0x2F40, 'M', '支'), + (0x2F41, 'M', '攴'), + (0x2F42, 'M', '文'), + (0x2F43, 'M', '斗'), + (0x2F44, 'M', '斤'), + (0x2F45, 'M', '方'), + (0x2F46, 'M', '无'), + (0x2F47, 'M', '日'), + (0x2F48, 'M', '曰'), + (0x2F49, 'M', '月'), + (0x2F4A, 'M', '木'), + (0x2F4B, 'M', '欠'), + (0x2F4C, 'M', '止'), + (0x2F4D, 'M', '歹'), + (0x2F4E, 'M', '殳'), + (0x2F4F, 'M', '毋'), + (0x2F50, 'M', '比'), + (0x2F51, 'M', '毛'), + (0x2F52, 'M', '氏'), + (0x2F53, 'M', '气'), + (0x2F54, 'M', '水'), + (0x2F55, 'M', '火'), + (0x2F56, 'M', '爪'), + (0x2F57, 'M', '父'), + (0x2F58, 'M', '爻'), + (0x2F59, 'M', '爿'), + (0x2F5A, 'M', '片'), + (0x2F5B, 'M', '牙'), + (0x2F5C, 'M', '牛'), + (0x2F5D, 'M', '犬'), + (0x2F5E, 'M', '玄'), + (0x2F5F, 'M', '玉'), + (0x2F60, 'M', '瓜'), + (0x2F61, 'M', '瓦'), + (0x2F62, 'M', '甘'), + (0x2F63, 'M', '生'), + (0x2F64, 'M', '用'), + (0x2F65, 'M', '田'), + (0x2F66, 'M', '疋'), + (0x2F67, 'M', '疒'), + (0x2F68, 'M', '癶'), + (0x2F69, 'M', '白'), + (0x2F6A, 'M', '皮'), + (0x2F6B, 'M', '皿'), + (0x2F6C, 'M', '目'), + (0x2F6D, 'M', '矛'), + (0x2F6E, 'M', '矢'), + (0x2F6F, 'M', '石'), + (0x2F70, 'M', '示'), + (0x2F71, 'M', '禸'), + (0x2F72, 'M', '禾'), + (0x2F73, 'M', '穴'), + (0x2F74, 'M', '立'), + (0x2F75, 'M', '竹'), + (0x2F76, 'M', '米'), + (0x2F77, 'M', '糸'), + (0x2F78, 'M', '缶'), + (0x2F79, 'M', '网'), ] -def _seg_28(): +def _seg_28() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2F76, 'M', u'米'), - (0x2F77, 'M', u'糸'), - (0x2F78, 'M', u'缶'), - (0x2F79, 'M', u'网'), - (0x2F7A, 'M', u'羊'), - (0x2F7B, 'M', u'羽'), - (0x2F7C, 'M', u'老'), - (0x2F7D, 'M', u'而'), - (0x2F7E, 'M', u'耒'), - (0x2F7F, 'M', u'耳'), - (0x2F80, 'M', u'聿'), - (0x2F81, 'M', u'肉'), - (0x2F82, 'M', u'臣'), - (0x2F83, 'M', u'自'), - (0x2F84, 'M', u'至'), - (0x2F85, 'M', u'臼'), - (0x2F86, 'M', u'舌'), - (0x2F87, 'M', u'舛'), - (0x2F88, 'M', u'舟'), - (0x2F89, 'M', u'艮'), - (0x2F8A, 'M', u'色'), - (0x2F8B, 'M', u'艸'), - (0x2F8C, 'M', u'虍'), - (0x2F8D, 'M', u'虫'), - (0x2F8E, 'M', u'血'), - (0x2F8F, 'M', u'行'), - (0x2F90, 'M', u'衣'), - (0x2F91, 'M', u'襾'), - (0x2F92, 'M', u'見'), - (0x2F93, 'M', u'角'), - (0x2F94, 'M', u'言'), - (0x2F95, 'M', u'谷'), - (0x2F96, 'M', u'豆'), - (0x2F97, 'M', u'豕'), - (0x2F98, 'M', u'豸'), - (0x2F99, 'M', u'貝'), - (0x2F9A, 'M', u'赤'), - (0x2F9B, 'M', u'走'), - (0x2F9C, 'M', u'足'), - (0x2F9D, 'M', u'身'), - (0x2F9E, 'M', u'車'), - (0x2F9F, 'M', u'辛'), - (0x2FA0, 'M', u'辰'), - (0x2FA1, 'M', u'辵'), - (0x2FA2, 'M', u'邑'), - (0x2FA3, 'M', u'酉'), - (0x2FA4, 'M', u'釆'), - (0x2FA5, 'M', u'里'), - (0x2FA6, 'M', u'金'), - (0x2FA7, 'M', u'長'), - (0x2FA8, 'M', u'門'), - (0x2FA9, 'M', u'阜'), - (0x2FAA, 'M', u'隶'), - (0x2FAB, 'M', u'隹'), - (0x2FAC, 'M', u'雨'), - (0x2FAD, 'M', u'靑'), - (0x2FAE, 'M', u'非'), - (0x2FAF, 'M', u'面'), - (0x2FB0, 'M', u'革'), - (0x2FB1, 'M', u'韋'), - (0x2FB2, 'M', u'韭'), - (0x2FB3, 'M', u'音'), - (0x2FB4, 'M', u'頁'), - (0x2FB5, 'M', u'風'), - (0x2FB6, 'M', u'飛'), - (0x2FB7, 'M', u'食'), - (0x2FB8, 'M', u'首'), - (0x2FB9, 'M', u'香'), - (0x2FBA, 'M', u'馬'), - (0x2FBB, 'M', u'骨'), - (0x2FBC, 'M', u'高'), - (0x2FBD, 'M', u'髟'), - (0x2FBE, 'M', u'鬥'), - (0x2FBF, 'M', u'鬯'), - (0x2FC0, 'M', u'鬲'), - (0x2FC1, 'M', u'鬼'), - (0x2FC2, 'M', u'魚'), - (0x2FC3, 'M', u'鳥'), - (0x2FC4, 'M', u'鹵'), - (0x2FC5, 'M', u'鹿'), - (0x2FC6, 'M', u'麥'), - (0x2FC7, 'M', u'麻'), - (0x2FC8, 'M', u'黃'), - (0x2FC9, 'M', u'黍'), - (0x2FCA, 'M', u'黑'), - (0x2FCB, 'M', u'黹'), - (0x2FCC, 'M', u'黽'), - (0x2FCD, 'M', u'鼎'), - (0x2FCE, 'M', u'鼓'), - (0x2FCF, 'M', u'鼠'), - (0x2FD0, 'M', u'鼻'), - (0x2FD1, 'M', u'齊'), - (0x2FD2, 'M', u'齒'), - (0x2FD3, 'M', u'龍'), - (0x2FD4, 'M', u'龜'), - (0x2FD5, 'M', u'龠'), + (0x2F7A, 'M', '羊'), + (0x2F7B, 'M', '羽'), + (0x2F7C, 'M', '老'), + (0x2F7D, 'M', '而'), + (0x2F7E, 'M', '耒'), + (0x2F7F, 'M', '耳'), + (0x2F80, 'M', '聿'), + (0x2F81, 'M', '肉'), + (0x2F82, 'M', '臣'), + (0x2F83, 'M', '自'), + (0x2F84, 'M', '至'), + (0x2F85, 'M', '臼'), + (0x2F86, 'M', '舌'), + (0x2F87, 'M', '舛'), + (0x2F88, 'M', '舟'), + (0x2F89, 'M', '艮'), + (0x2F8A, 'M', '色'), + (0x2F8B, 'M', '艸'), + (0x2F8C, 'M', '虍'), + (0x2F8D, 'M', '虫'), + (0x2F8E, 'M', '血'), + (0x2F8F, 'M', '行'), + (0x2F90, 'M', '衣'), + (0x2F91, 'M', '襾'), + (0x2F92, 'M', '見'), + (0x2F93, 'M', '角'), + (0x2F94, 'M', '言'), + (0x2F95, 'M', '谷'), + (0x2F96, 'M', '豆'), + (0x2F97, 'M', '豕'), + (0x2F98, 'M', '豸'), + (0x2F99, 'M', '貝'), + (0x2F9A, 'M', '赤'), + (0x2F9B, 'M', '走'), + (0x2F9C, 'M', '足'), + (0x2F9D, 'M', '身'), + (0x2F9E, 'M', '車'), + (0x2F9F, 'M', '辛'), + (0x2FA0, 'M', '辰'), + (0x2FA1, 'M', '辵'), + (0x2FA2, 'M', '邑'), + (0x2FA3, 'M', '酉'), + (0x2FA4, 'M', '釆'), + (0x2FA5, 'M', '里'), + (0x2FA6, 'M', '金'), + (0x2FA7, 'M', '長'), + (0x2FA8, 'M', '門'), + (0x2FA9, 'M', '阜'), + (0x2FAA, 'M', '隶'), + (0x2FAB, 'M', '隹'), + (0x2FAC, 'M', '雨'), + (0x2FAD, 'M', '靑'), + (0x2FAE, 'M', '非'), + (0x2FAF, 'M', '面'), + (0x2FB0, 'M', '革'), + (0x2FB1, 'M', '韋'), + (0x2FB2, 'M', '韭'), + (0x2FB3, 'M', '音'), + (0x2FB4, 'M', '頁'), + (0x2FB5, 'M', '風'), + (0x2FB6, 'M', '飛'), + (0x2FB7, 'M', '食'), + (0x2FB8, 'M', '首'), + (0x2FB9, 'M', '香'), + (0x2FBA, 'M', '馬'), + (0x2FBB, 'M', '骨'), + (0x2FBC, 'M', '高'), + (0x2FBD, 'M', '髟'), + (0x2FBE, 'M', '鬥'), + (0x2FBF, 'M', '鬯'), + (0x2FC0, 'M', '鬲'), + (0x2FC1, 'M', '鬼'), + (0x2FC2, 'M', '魚'), + (0x2FC3, 'M', '鳥'), + (0x2FC4, 'M', '鹵'), + (0x2FC5, 'M', '鹿'), + (0x2FC6, 'M', '麥'), + (0x2FC7, 'M', '麻'), + (0x2FC8, 'M', '黃'), + (0x2FC9, 'M', '黍'), + (0x2FCA, 'M', '黑'), + (0x2FCB, 'M', '黹'), + (0x2FCC, 'M', '黽'), + (0x2FCD, 'M', '鼎'), + (0x2FCE, 'M', '鼓'), + (0x2FCF, 'M', '鼠'), + (0x2FD0, 'M', '鼻'), + (0x2FD1, 'M', '齊'), + (0x2FD2, 'M', '齒'), + (0x2FD3, 'M', '龍'), + (0x2FD4, 'M', '龜'), + (0x2FD5, 'M', '龠'), (0x2FD6, 'X'), - (0x3000, '3', u' '), + (0x3000, '3', ' '), (0x3001, 'V'), - (0x3002, 'M', u'.'), + (0x3002, 'M', '.'), + (0x3003, 'V'), + (0x3036, 'M', '〒'), + (0x3037, 'V'), + (0x3038, 'M', '十'), ] -def _seg_29(): +def _seg_29() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x3003, 'V'), - (0x3036, 'M', u'〒'), - (0x3037, 'V'), - (0x3038, 'M', u'十'), - (0x3039, 'M', u'卄'), - (0x303A, 'M', u'卅'), + (0x3039, 'M', '卄'), + (0x303A, 'M', '卅'), (0x303B, 'V'), (0x3040, 'X'), (0x3041, 'V'), (0x3097, 'X'), (0x3099, 'V'), - (0x309B, '3', u' ゙'), - (0x309C, '3', u' ゚'), + (0x309B, '3', ' ゙'), + (0x309C, '3', ' ゚'), (0x309D, 'V'), - (0x309F, 'M', u'より'), + (0x309F, 'M', 'より'), (0x30A0, 'V'), - (0x30FF, 'M', u'コト'), + (0x30FF, 'M', 'コト'), (0x3100, 'X'), (0x3105, 'V'), (0x3130, 'X'), - (0x3131, 'M', u'ᄀ'), - (0x3132, 'M', u'ᄁ'), - (0x3133, 'M', u'ᆪ'), - (0x3134, 'M', u'ᄂ'), - (0x3135, 'M', u'ᆬ'), - (0x3136, 'M', u'ᆭ'), - (0x3137, 'M', u'ᄃ'), - (0x3138, 'M', u'ᄄ'), - (0x3139, 'M', u'ᄅ'), - (0x313A, 'M', u'ᆰ'), - (0x313B, 'M', u'ᆱ'), - (0x313C, 'M', u'ᆲ'), - (0x313D, 'M', u'ᆳ'), - (0x313E, 'M', u'ᆴ'), - (0x313F, 'M', u'ᆵ'), - (0x3140, 'M', u'ᄚ'), - (0x3141, 'M', u'ᄆ'), - (0x3142, 'M', u'ᄇ'), - (0x3143, 'M', u'ᄈ'), - (0x3144, 'M', u'ᄡ'), - (0x3145, 'M', u'ᄉ'), - (0x3146, 'M', u'ᄊ'), - (0x3147, 'M', u'ᄋ'), - (0x3148, 'M', u'ᄌ'), - (0x3149, 'M', u'ᄍ'), - (0x314A, 'M', u'ᄎ'), - (0x314B, 'M', u'ᄏ'), - (0x314C, 'M', u'ᄐ'), - (0x314D, 'M', u'ᄑ'), - (0x314E, 'M', u'ᄒ'), - (0x314F, 'M', u'ᅡ'), - (0x3150, 'M', u'ᅢ'), - (0x3151, 'M', u'ᅣ'), - (0x3152, 'M', u'ᅤ'), - (0x3153, 'M', u'ᅥ'), - (0x3154, 'M', u'ᅦ'), - (0x3155, 'M', u'ᅧ'), - (0x3156, 'M', u'ᅨ'), - (0x3157, 'M', u'ᅩ'), - (0x3158, 'M', u'ᅪ'), - (0x3159, 'M', u'ᅫ'), - (0x315A, 'M', u'ᅬ'), - (0x315B, 'M', u'ᅭ'), - (0x315C, 'M', u'ᅮ'), - (0x315D, 'M', u'ᅯ'), - (0x315E, 'M', u'ᅰ'), - (0x315F, 'M', u'ᅱ'), - (0x3160, 'M', u'ᅲ'), - (0x3161, 'M', u'ᅳ'), - (0x3162, 'M', u'ᅴ'), - (0x3163, 'M', u'ᅵ'), + (0x3131, 'M', 'ᄀ'), + (0x3132, 'M', 'ᄁ'), + (0x3133, 'M', 'ᆪ'), + (0x3134, 'M', 'ᄂ'), + (0x3135, 'M', 'ᆬ'), + (0x3136, 'M', 'ᆭ'), + (0x3137, 'M', 'ᄃ'), + (0x3138, 'M', 'ᄄ'), + (0x3139, 'M', 'ᄅ'), + (0x313A, 'M', 'ᆰ'), + (0x313B, 'M', 'ᆱ'), + (0x313C, 'M', 'ᆲ'), + (0x313D, 'M', 'ᆳ'), + (0x313E, 'M', 'ᆴ'), + (0x313F, 'M', 'ᆵ'), + (0x3140, 'M', 'ᄚ'), + (0x3141, 'M', 'ᄆ'), + (0x3142, 'M', 'ᄇ'), + (0x3143, 'M', 'ᄈ'), + (0x3144, 'M', 'ᄡ'), + (0x3145, 'M', 'ᄉ'), + (0x3146, 'M', 'ᄊ'), + (0x3147, 'M', 'ᄋ'), + (0x3148, 'M', 'ᄌ'), + (0x3149, 'M', 'ᄍ'), + (0x314A, 'M', 'ᄎ'), + (0x314B, 'M', 'ᄏ'), + (0x314C, 'M', 'ᄐ'), + (0x314D, 'M', 'ᄑ'), + (0x314E, 'M', 'ᄒ'), + (0x314F, 'M', 'ᅡ'), + (0x3150, 'M', 'ᅢ'), + (0x3151, 'M', 'ᅣ'), + (0x3152, 'M', 'ᅤ'), + (0x3153, 'M', 'ᅥ'), + (0x3154, 'M', 'ᅦ'), + (0x3155, 'M', 'ᅧ'), + (0x3156, 'M', 'ᅨ'), + (0x3157, 'M', 'ᅩ'), + (0x3158, 'M', 'ᅪ'), + (0x3159, 'M', 'ᅫ'), + (0x315A, 'M', 'ᅬ'), + (0x315B, 'M', 'ᅭ'), + (0x315C, 'M', 'ᅮ'), + (0x315D, 'M', 'ᅯ'), + (0x315E, 'M', 'ᅰ'), + (0x315F, 'M', 'ᅱ'), + (0x3160, 'M', 'ᅲ'), + (0x3161, 'M', 'ᅳ'), + (0x3162, 'M', 'ᅴ'), + (0x3163, 'M', 'ᅵ'), (0x3164, 'X'), - (0x3165, 'M', u'ᄔ'), - (0x3166, 'M', u'ᄕ'), - (0x3167, 'M', u'ᇇ'), - (0x3168, 'M', u'ᇈ'), - (0x3169, 'M', u'ᇌ'), - (0x316A, 'M', u'ᇎ'), - (0x316B, 'M', u'ᇓ'), - (0x316C, 'M', u'ᇗ'), - (0x316D, 'M', u'ᇙ'), - (0x316E, 'M', u'ᄜ'), - (0x316F, 'M', u'ᇝ'), - (0x3170, 'M', u'ᇟ'), - (0x3171, 'M', u'ᄝ'), - (0x3172, 'M', u'ᄞ'), - (0x3173, 'M', u'ᄠ'), - (0x3174, 'M', u'ᄢ'), - (0x3175, 'M', u'ᄣ'), - (0x3176, 'M', u'ᄧ'), - (0x3177, 'M', u'ᄩ'), - (0x3178, 'M', u'ᄫ'), - (0x3179, 'M', u'ᄬ'), - (0x317A, 'M', u'ᄭ'), - (0x317B, 'M', u'ᄮ'), - (0x317C, 'M', u'ᄯ'), - (0x317D, 'M', u'ᄲ'), - (0x317E, 'M', u'ᄶ'), - (0x317F, 'M', u'ᅀ'), - (0x3180, 'M', u'ᅇ'), + (0x3165, 'M', 'ᄔ'), + (0x3166, 'M', 'ᄕ'), + (0x3167, 'M', 'ᇇ'), + (0x3168, 'M', 'ᇈ'), + (0x3169, 'M', 'ᇌ'), + (0x316A, 'M', 'ᇎ'), + (0x316B, 'M', 'ᇓ'), + (0x316C, 'M', 'ᇗ'), + (0x316D, 'M', 'ᇙ'), + (0x316E, 'M', 'ᄜ'), + (0x316F, 'M', 'ᇝ'), + (0x3170, 'M', 'ᇟ'), + (0x3171, 'M', 'ᄝ'), + (0x3172, 'M', 'ᄞ'), + (0x3173, 'M', 'ᄠ'), + (0x3174, 'M', 'ᄢ'), + (0x3175, 'M', 'ᄣ'), + (0x3176, 'M', 'ᄧ'), + (0x3177, 'M', 'ᄩ'), + (0x3178, 'M', 'ᄫ'), + (0x3179, 'M', 'ᄬ'), + (0x317A, 'M', 'ᄭ'), + (0x317B, 'M', 'ᄮ'), + (0x317C, 'M', 'ᄯ'), + (0x317D, 'M', 'ᄲ'), + (0x317E, 'M', 'ᄶ'), + (0x317F, 'M', 'ᅀ'), + (0x3180, 'M', 'ᅇ'), + (0x3181, 'M', 'ᅌ'), + (0x3182, 'M', 'ᇱ'), + (0x3183, 'M', 'ᇲ'), + (0x3184, 'M', 'ᅗ'), ] -def _seg_30(): +def _seg_30() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x3181, 'M', u'ᅌ'), - (0x3182, 'M', u'ᇱ'), - (0x3183, 'M', u'ᇲ'), - (0x3184, 'M', u'ᅗ'), - (0x3185, 'M', u'ᅘ'), - (0x3186, 'M', u'ᅙ'), - (0x3187, 'M', u'ᆄ'), - (0x3188, 'M', u'ᆅ'), - (0x3189, 'M', u'ᆈ'), - (0x318A, 'M', u'ᆑ'), - (0x318B, 'M', u'ᆒ'), - (0x318C, 'M', u'ᆔ'), - (0x318D, 'M', u'ᆞ'), - (0x318E, 'M', u'ᆡ'), + (0x3185, 'M', 'ᅘ'), + (0x3186, 'M', 'ᅙ'), + (0x3187, 'M', 'ᆄ'), + (0x3188, 'M', 'ᆅ'), + (0x3189, 'M', 'ᆈ'), + (0x318A, 'M', 'ᆑ'), + (0x318B, 'M', 'ᆒ'), + (0x318C, 'M', 'ᆔ'), + (0x318D, 'M', 'ᆞ'), + (0x318E, 'M', 'ᆡ'), (0x318F, 'X'), (0x3190, 'V'), - (0x3192, 'M', u'一'), - (0x3193, 'M', u'二'), - (0x3194, 'M', u'三'), - (0x3195, 'M', u'四'), - (0x3196, 'M', u'上'), - (0x3197, 'M', u'中'), - (0x3198, 'M', u'下'), - (0x3199, 'M', u'甲'), - (0x319A, 'M', u'乙'), - (0x319B, 'M', u'丙'), - (0x319C, 'M', u'丁'), - (0x319D, 'M', u'天'), - (0x319E, 'M', u'地'), - (0x319F, 'M', u'人'), + (0x3192, 'M', '一'), + (0x3193, 'M', '二'), + (0x3194, 'M', '三'), + (0x3195, 'M', '四'), + (0x3196, 'M', '上'), + (0x3197, 'M', '中'), + (0x3198, 'M', '下'), + (0x3199, 'M', '甲'), + (0x319A, 'M', '乙'), + (0x319B, 'M', '丙'), + (0x319C, 'M', '丁'), + (0x319D, 'M', '天'), + (0x319E, 'M', '地'), + (0x319F, 'M', '人'), (0x31A0, 'V'), (0x31E4, 'X'), (0x31F0, 'V'), - (0x3200, '3', u'(ᄀ)'), - (0x3201, '3', u'(ᄂ)'), - (0x3202, '3', u'(ᄃ)'), - (0x3203, '3', u'(ᄅ)'), - (0x3204, '3', u'(ᄆ)'), - (0x3205, '3', u'(ᄇ)'), - (0x3206, '3', u'(ᄉ)'), - (0x3207, '3', u'(ᄋ)'), - (0x3208, '3', u'(ᄌ)'), - (0x3209, '3', u'(ᄎ)'), - (0x320A, '3', u'(ᄏ)'), - (0x320B, '3', u'(ᄐ)'), - (0x320C, '3', u'(ᄑ)'), - (0x320D, '3', u'(ᄒ)'), - (0x320E, '3', u'(가)'), - (0x320F, '3', u'(나)'), - (0x3210, '3', u'(다)'), - (0x3211, '3', u'(라)'), - (0x3212, '3', u'(마)'), - (0x3213, '3', u'(바)'), - (0x3214, '3', u'(사)'), - (0x3215, '3', u'(아)'), - (0x3216, '3', u'(자)'), - (0x3217, '3', u'(차)'), - (0x3218, '3', u'(카)'), - (0x3219, '3', u'(타)'), - (0x321A, '3', u'(파)'), - (0x321B, '3', u'(하)'), - (0x321C, '3', u'(주)'), - (0x321D, '3', u'(오전)'), - (0x321E, '3', u'(오후)'), + (0x3200, '3', '(ᄀ)'), + (0x3201, '3', '(ᄂ)'), + (0x3202, '3', '(ᄃ)'), + (0x3203, '3', '(ᄅ)'), + (0x3204, '3', '(ᄆ)'), + (0x3205, '3', '(ᄇ)'), + (0x3206, '3', '(ᄉ)'), + (0x3207, '3', '(ᄋ)'), + (0x3208, '3', '(ᄌ)'), + (0x3209, '3', '(ᄎ)'), + (0x320A, '3', '(ᄏ)'), + (0x320B, '3', '(ᄐ)'), + (0x320C, '3', '(ᄑ)'), + (0x320D, '3', '(ᄒ)'), + (0x320E, '3', '(가)'), + (0x320F, '3', '(나)'), + (0x3210, '3', '(다)'), + (0x3211, '3', '(라)'), + (0x3212, '3', '(마)'), + (0x3213, '3', '(바)'), + (0x3214, '3', '(사)'), + (0x3215, '3', '(아)'), + (0x3216, '3', '(자)'), + (0x3217, '3', '(차)'), + (0x3218, '3', '(카)'), + (0x3219, '3', '(타)'), + (0x321A, '3', '(파)'), + (0x321B, '3', '(하)'), + (0x321C, '3', '(주)'), + (0x321D, '3', '(오전)'), + (0x321E, '3', '(오후)'), (0x321F, 'X'), - (0x3220, '3', u'(一)'), - (0x3221, '3', u'(二)'), - (0x3222, '3', u'(三)'), - (0x3223, '3', u'(四)'), - (0x3224, '3', u'(五)'), - (0x3225, '3', u'(六)'), - (0x3226, '3', u'(七)'), - (0x3227, '3', u'(八)'), - (0x3228, '3', u'(九)'), - (0x3229, '3', u'(十)'), - (0x322A, '3', u'(月)'), - (0x322B, '3', u'(火)'), - (0x322C, '3', u'(水)'), - (0x322D, '3', u'(木)'), - (0x322E, '3', u'(金)'), - (0x322F, '3', u'(土)'), - (0x3230, '3', u'(日)'), - (0x3231, '3', u'(株)'), - (0x3232, '3', u'(有)'), - (0x3233, '3', u'(社)'), - (0x3234, '3', u'(名)'), - (0x3235, '3', u'(特)'), - (0x3236, '3', u'(財)'), - (0x3237, '3', u'(祝)'), - (0x3238, '3', u'(労)'), - (0x3239, '3', u'(代)'), - (0x323A, '3', u'(呼)'), - (0x323B, '3', u'(学)'), - (0x323C, '3', u'(監)'), - (0x323D, '3', u'(企)'), - (0x323E, '3', u'(資)'), - (0x323F, '3', u'(協)'), - (0x3240, '3', u'(祭)'), - (0x3241, '3', u'(休)'), - (0x3242, '3', u'(自)'), + (0x3220, '3', '(一)'), + (0x3221, '3', '(二)'), + (0x3222, '3', '(三)'), + (0x3223, '3', '(四)'), + (0x3224, '3', '(五)'), + (0x3225, '3', '(六)'), + (0x3226, '3', '(七)'), + (0x3227, '3', '(八)'), + (0x3228, '3', '(九)'), + (0x3229, '3', '(十)'), + (0x322A, '3', '(月)'), + (0x322B, '3', '(火)'), + (0x322C, '3', '(水)'), + (0x322D, '3', '(木)'), + (0x322E, '3', '(金)'), + (0x322F, '3', '(土)'), + (0x3230, '3', '(日)'), + (0x3231, '3', '(株)'), + (0x3232, '3', '(有)'), + (0x3233, '3', '(社)'), + (0x3234, '3', '(名)'), + (0x3235, '3', '(特)'), + (0x3236, '3', '(財)'), + (0x3237, '3', '(祝)'), + (0x3238, '3', '(労)'), + (0x3239, '3', '(代)'), + (0x323A, '3', '(呼)'), + (0x323B, '3', '(学)'), + (0x323C, '3', '(監)'), + (0x323D, '3', '(企)'), + (0x323E, '3', '(資)'), + (0x323F, '3', '(協)'), + (0x3240, '3', '(祭)'), + (0x3241, '3', '(休)'), + (0x3242, '3', '(自)'), + (0x3243, '3', '(至)'), + (0x3244, 'M', '問'), + (0x3245, 'M', '幼'), + (0x3246, 'M', '文'), ] -def _seg_31(): +def _seg_31() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x3243, '3', u'(至)'), - (0x3244, 'M', u'問'), - (0x3245, 'M', u'幼'), - (0x3246, 'M', u'文'), - (0x3247, 'M', u'箏'), + (0x3247, 'M', '箏'), (0x3248, 'V'), - (0x3250, 'M', u'pte'), - (0x3251, 'M', u'21'), - (0x3252, 'M', u'22'), - (0x3253, 'M', u'23'), - (0x3254, 'M', u'24'), - (0x3255, 'M', u'25'), - (0x3256, 'M', u'26'), - (0x3257, 'M', u'27'), - (0x3258, 'M', u'28'), - (0x3259, 'M', u'29'), - (0x325A, 'M', u'30'), - (0x325B, 'M', u'31'), - (0x325C, 'M', u'32'), - (0x325D, 'M', u'33'), - (0x325E, 'M', u'34'), - (0x325F, 'M', u'35'), - (0x3260, 'M', u'ᄀ'), - (0x3261, 'M', u'ᄂ'), - (0x3262, 'M', u'ᄃ'), - (0x3263, 'M', u'ᄅ'), - (0x3264, 'M', u'ᄆ'), - (0x3265, 'M', u'ᄇ'), - (0x3266, 'M', u'ᄉ'), - (0x3267, 'M', u'ᄋ'), - (0x3268, 'M', u'ᄌ'), - (0x3269, 'M', u'ᄎ'), - (0x326A, 'M', u'ᄏ'), - (0x326B, 'M', u'ᄐ'), - (0x326C, 'M', u'ᄑ'), - (0x326D, 'M', u'ᄒ'), - (0x326E, 'M', u'가'), - (0x326F, 'M', u'나'), - (0x3270, 'M', u'다'), - (0x3271, 'M', u'라'), - (0x3272, 'M', u'마'), - (0x3273, 'M', u'바'), - (0x3274, 'M', u'사'), - (0x3275, 'M', u'아'), - (0x3276, 'M', u'자'), - (0x3277, 'M', u'차'), - (0x3278, 'M', u'카'), - (0x3279, 'M', u'타'), - (0x327A, 'M', u'파'), - (0x327B, 'M', u'하'), - (0x327C, 'M', u'참고'), - (0x327D, 'M', u'주의'), - (0x327E, 'M', u'우'), + (0x3250, 'M', 'pte'), + (0x3251, 'M', '21'), + (0x3252, 'M', '22'), + (0x3253, 'M', '23'), + (0x3254, 'M', '24'), + (0x3255, 'M', '25'), + (0x3256, 'M', '26'), + (0x3257, 'M', '27'), + (0x3258, 'M', '28'), + (0x3259, 'M', '29'), + (0x325A, 'M', '30'), + (0x325B, 'M', '31'), + (0x325C, 'M', '32'), + (0x325D, 'M', '33'), + (0x325E, 'M', '34'), + (0x325F, 'M', '35'), + (0x3260, 'M', 'ᄀ'), + (0x3261, 'M', 'ᄂ'), + (0x3262, 'M', 'ᄃ'), + (0x3263, 'M', 'ᄅ'), + (0x3264, 'M', 'ᄆ'), + (0x3265, 'M', 'ᄇ'), + (0x3266, 'M', 'ᄉ'), + (0x3267, 'M', 'ᄋ'), + (0x3268, 'M', 'ᄌ'), + (0x3269, 'M', 'ᄎ'), + (0x326A, 'M', 'ᄏ'), + (0x326B, 'M', 'ᄐ'), + (0x326C, 'M', 'ᄑ'), + (0x326D, 'M', 'ᄒ'), + (0x326E, 'M', '가'), + (0x326F, 'M', '나'), + (0x3270, 'M', '다'), + (0x3271, 'M', '라'), + (0x3272, 'M', '마'), + (0x3273, 'M', '바'), + (0x3274, 'M', '사'), + (0x3275, 'M', '아'), + (0x3276, 'M', '자'), + (0x3277, 'M', '차'), + (0x3278, 'M', '카'), + (0x3279, 'M', '타'), + (0x327A, 'M', '파'), + (0x327B, 'M', '하'), + (0x327C, 'M', '참고'), + (0x327D, 'M', '주의'), + (0x327E, 'M', '우'), (0x327F, 'V'), - (0x3280, 'M', u'一'), - (0x3281, 'M', u'二'), - (0x3282, 'M', u'三'), - (0x3283, 'M', u'四'), - (0x3284, 'M', u'五'), - (0x3285, 'M', u'六'), - (0x3286, 'M', u'七'), - (0x3287, 'M', u'八'), - (0x3288, 'M', u'九'), - (0x3289, 'M', u'十'), - (0x328A, 'M', u'月'), - (0x328B, 'M', u'火'), - (0x328C, 'M', u'水'), - (0x328D, 'M', u'木'), - (0x328E, 'M', u'金'), - (0x328F, 'M', u'土'), - (0x3290, 'M', u'日'), - (0x3291, 'M', u'株'), - (0x3292, 'M', u'有'), - (0x3293, 'M', u'社'), - (0x3294, 'M', u'名'), - (0x3295, 'M', u'特'), - (0x3296, 'M', u'財'), - (0x3297, 'M', u'祝'), - (0x3298, 'M', u'労'), - (0x3299, 'M', u'秘'), - (0x329A, 'M', u'男'), - (0x329B, 'M', u'女'), - (0x329C, 'M', u'適'), - (0x329D, 'M', u'優'), - (0x329E, 'M', u'印'), - (0x329F, 'M', u'注'), - (0x32A0, 'M', u'項'), - (0x32A1, 'M', u'休'), - (0x32A2, 'M', u'写'), - (0x32A3, 'M', u'正'), - (0x32A4, 'M', u'上'), - (0x32A5, 'M', u'中'), - (0x32A6, 'M', u'下'), - (0x32A7, 'M', u'左'), - (0x32A8, 'M', u'右'), - (0x32A9, 'M', u'医'), - (0x32AA, 'M', u'宗'), - (0x32AB, 'M', u'学'), - (0x32AC, 'M', u'監'), - (0x32AD, 'M', u'企'), + (0x3280, 'M', '一'), + (0x3281, 'M', '二'), + (0x3282, 'M', '三'), + (0x3283, 'M', '四'), + (0x3284, 'M', '五'), + (0x3285, 'M', '六'), + (0x3286, 'M', '七'), + (0x3287, 'M', '八'), + (0x3288, 'M', '九'), + (0x3289, 'M', '十'), + (0x328A, 'M', '月'), + (0x328B, 'M', '火'), + (0x328C, 'M', '水'), + (0x328D, 'M', '木'), + (0x328E, 'M', '金'), + (0x328F, 'M', '土'), + (0x3290, 'M', '日'), + (0x3291, 'M', '株'), + (0x3292, 'M', '有'), + (0x3293, 'M', '社'), + (0x3294, 'M', '名'), + (0x3295, 'M', '特'), + (0x3296, 'M', '財'), + (0x3297, 'M', '祝'), + (0x3298, 'M', '労'), + (0x3299, 'M', '秘'), + (0x329A, 'M', '男'), + (0x329B, 'M', '女'), + (0x329C, 'M', '適'), + (0x329D, 'M', '優'), + (0x329E, 'M', '印'), + (0x329F, 'M', '注'), + (0x32A0, 'M', '項'), + (0x32A1, 'M', '休'), + (0x32A2, 'M', '写'), + (0x32A3, 'M', '正'), + (0x32A4, 'M', '上'), + (0x32A5, 'M', '中'), + (0x32A6, 'M', '下'), + (0x32A7, 'M', '左'), + (0x32A8, 'M', '右'), + (0x32A9, 'M', '医'), + (0x32AA, 'M', '宗'), + (0x32AB, 'M', '学'), + (0x32AC, 'M', '監'), + (0x32AD, 'M', '企'), + (0x32AE, 'M', '資'), + (0x32AF, 'M', '協'), + (0x32B0, 'M', '夜'), + (0x32B1, 'M', '36'), ] -def _seg_32(): +def _seg_32() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x32AE, 'M', u'資'), - (0x32AF, 'M', u'協'), - (0x32B0, 'M', u'夜'), - (0x32B1, 'M', u'36'), - (0x32B2, 'M', u'37'), - (0x32B3, 'M', u'38'), - (0x32B4, 'M', u'39'), - (0x32B5, 'M', u'40'), - (0x32B6, 'M', u'41'), - (0x32B7, 'M', u'42'), - (0x32B8, 'M', u'43'), - (0x32B9, 'M', u'44'), - (0x32BA, 'M', u'45'), - (0x32BB, 'M', u'46'), - (0x32BC, 'M', u'47'), - (0x32BD, 'M', u'48'), - (0x32BE, 'M', u'49'), - (0x32BF, 'M', u'50'), - (0x32C0, 'M', u'1月'), - (0x32C1, 'M', u'2月'), - (0x32C2, 'M', u'3月'), - (0x32C3, 'M', u'4月'), - (0x32C4, 'M', u'5月'), - (0x32C5, 'M', u'6月'), - (0x32C6, 'M', u'7月'), - (0x32C7, 'M', u'8月'), - (0x32C8, 'M', u'9月'), - (0x32C9, 'M', u'10月'), - (0x32CA, 'M', u'11月'), - (0x32CB, 'M', u'12月'), - (0x32CC, 'M', u'hg'), - (0x32CD, 'M', u'erg'), - (0x32CE, 'M', u'ev'), - (0x32CF, 'M', u'ltd'), - (0x32D0, 'M', u'ア'), - (0x32D1, 'M', u'イ'), - (0x32D2, 'M', u'ウ'), - (0x32D3, 'M', u'エ'), - (0x32D4, 'M', u'オ'), - (0x32D5, 'M', u'カ'), - (0x32D6, 'M', u'キ'), - (0x32D7, 'M', u'ク'), - (0x32D8, 'M', u'ケ'), - (0x32D9, 'M', u'コ'), - (0x32DA, 'M', u'サ'), - (0x32DB, 'M', u'シ'), - (0x32DC, 'M', u'ス'), - (0x32DD, 'M', u'セ'), - (0x32DE, 'M', u'ソ'), - (0x32DF, 'M', u'タ'), - (0x32E0, 'M', u'チ'), - (0x32E1, 'M', u'ツ'), - (0x32E2, 'M', u'テ'), - (0x32E3, 'M', u'ト'), - (0x32E4, 'M', u'ナ'), - (0x32E5, 'M', u'ニ'), - (0x32E6, 'M', u'ヌ'), - (0x32E7, 'M', u'ネ'), - (0x32E8, 'M', u'ノ'), - (0x32E9, 'M', u'ハ'), - (0x32EA, 'M', u'ヒ'), - (0x32EB, 'M', u'フ'), - (0x32EC, 'M', u'ヘ'), - (0x32ED, 'M', u'ホ'), - (0x32EE, 'M', u'マ'), - (0x32EF, 'M', u'ミ'), - (0x32F0, 'M', u'ム'), - (0x32F1, 'M', u'メ'), - (0x32F2, 'M', u'モ'), - (0x32F3, 'M', u'ヤ'), - (0x32F4, 'M', u'ユ'), - (0x32F5, 'M', u'ヨ'), - (0x32F6, 'M', u'ラ'), - (0x32F7, 'M', u'リ'), - (0x32F8, 'M', u'ル'), - (0x32F9, 'M', u'レ'), - (0x32FA, 'M', u'ロ'), - (0x32FB, 'M', u'ワ'), - (0x32FC, 'M', u'ヰ'), - (0x32FD, 'M', u'ヱ'), - (0x32FE, 'M', u'ヲ'), - (0x32FF, 'M', u'令和'), - (0x3300, 'M', u'アパート'), - (0x3301, 'M', u'アルファ'), - (0x3302, 'M', u'アンペア'), - (0x3303, 'M', u'アール'), - (0x3304, 'M', u'イニング'), - (0x3305, 'M', u'インチ'), - (0x3306, 'M', u'ウォン'), - (0x3307, 'M', u'エスクード'), - (0x3308, 'M', u'エーカー'), - (0x3309, 'M', u'オンス'), - (0x330A, 'M', u'オーム'), - (0x330B, 'M', u'カイリ'), - (0x330C, 'M', u'カラット'), - (0x330D, 'M', u'カロリー'), - (0x330E, 'M', u'ガロン'), - (0x330F, 'M', u'ガンマ'), - (0x3310, 'M', u'ギガ'), - (0x3311, 'M', u'ギニー'), + (0x32B2, 'M', '37'), + (0x32B3, 'M', '38'), + (0x32B4, 'M', '39'), + (0x32B5, 'M', '40'), + (0x32B6, 'M', '41'), + (0x32B7, 'M', '42'), + (0x32B8, 'M', '43'), + (0x32B9, 'M', '44'), + (0x32BA, 'M', '45'), + (0x32BB, 'M', '46'), + (0x32BC, 'M', '47'), + (0x32BD, 'M', '48'), + (0x32BE, 'M', '49'), + (0x32BF, 'M', '50'), + (0x32C0, 'M', '1月'), + (0x32C1, 'M', '2月'), + (0x32C2, 'M', '3月'), + (0x32C3, 'M', '4月'), + (0x32C4, 'M', '5月'), + (0x32C5, 'M', '6月'), + (0x32C6, 'M', '7月'), + (0x32C7, 'M', '8月'), + (0x32C8, 'M', '9月'), + (0x32C9, 'M', '10月'), + (0x32CA, 'M', '11月'), + (0x32CB, 'M', '12月'), + (0x32CC, 'M', 'hg'), + (0x32CD, 'M', 'erg'), + (0x32CE, 'M', 'ev'), + (0x32CF, 'M', 'ltd'), + (0x32D0, 'M', 'ア'), + (0x32D1, 'M', 'イ'), + (0x32D2, 'M', 'ウ'), + (0x32D3, 'M', 'エ'), + (0x32D4, 'M', 'オ'), + (0x32D5, 'M', 'カ'), + (0x32D6, 'M', 'キ'), + (0x32D7, 'M', 'ク'), + (0x32D8, 'M', 'ケ'), + (0x32D9, 'M', 'コ'), + (0x32DA, 'M', 'サ'), + (0x32DB, 'M', 'シ'), + (0x32DC, 'M', 'ス'), + (0x32DD, 'M', 'セ'), + (0x32DE, 'M', 'ソ'), + (0x32DF, 'M', 'タ'), + (0x32E0, 'M', 'チ'), + (0x32E1, 'M', 'ツ'), + (0x32E2, 'M', 'テ'), + (0x32E3, 'M', 'ト'), + (0x32E4, 'M', 'ナ'), + (0x32E5, 'M', 'ニ'), + (0x32E6, 'M', 'ヌ'), + (0x32E7, 'M', 'ネ'), + (0x32E8, 'M', 'ノ'), + (0x32E9, 'M', 'ハ'), + (0x32EA, 'M', 'ヒ'), + (0x32EB, 'M', 'フ'), + (0x32EC, 'M', 'ヘ'), + (0x32ED, 'M', 'ホ'), + (0x32EE, 'M', 'マ'), + (0x32EF, 'M', 'ミ'), + (0x32F0, 'M', 'ム'), + (0x32F1, 'M', 'メ'), + (0x32F2, 'M', 'モ'), + (0x32F3, 'M', 'ヤ'), + (0x32F4, 'M', 'ユ'), + (0x32F5, 'M', 'ヨ'), + (0x32F6, 'M', 'ラ'), + (0x32F7, 'M', 'リ'), + (0x32F8, 'M', 'ル'), + (0x32F9, 'M', 'レ'), + (0x32FA, 'M', 'ロ'), + (0x32FB, 'M', 'ワ'), + (0x32FC, 'M', 'ヰ'), + (0x32FD, 'M', 'ヱ'), + (0x32FE, 'M', 'ヲ'), + (0x32FF, 'M', '令和'), + (0x3300, 'M', 'アパート'), + (0x3301, 'M', 'アルファ'), + (0x3302, 'M', 'アンペア'), + (0x3303, 'M', 'アール'), + (0x3304, 'M', 'イニング'), + (0x3305, 'M', 'インチ'), + (0x3306, 'M', 'ウォン'), + (0x3307, 'M', 'エスクード'), + (0x3308, 'M', 'エーカー'), + (0x3309, 'M', 'オンス'), + (0x330A, 'M', 'オーム'), + (0x330B, 'M', 'カイリ'), + (0x330C, 'M', 'カラット'), + (0x330D, 'M', 'カロリー'), + (0x330E, 'M', 'ガロン'), + (0x330F, 'M', 'ガンマ'), + (0x3310, 'M', 'ギガ'), + (0x3311, 'M', 'ギニー'), + (0x3312, 'M', 'キュリー'), + (0x3313, 'M', 'ギルダー'), + (0x3314, 'M', 'キロ'), + (0x3315, 'M', 'キログラム'), ] -def _seg_33(): +def _seg_33() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x3312, 'M', u'キュリー'), - (0x3313, 'M', u'ギルダー'), - (0x3314, 'M', u'キロ'), - (0x3315, 'M', u'キログラム'), - (0x3316, 'M', u'キロメートル'), - (0x3317, 'M', u'キロワット'), - (0x3318, 'M', u'グラム'), - (0x3319, 'M', u'グラムトン'), - (0x331A, 'M', u'クルゼイロ'), - (0x331B, 'M', u'クローネ'), - (0x331C, 'M', u'ケース'), - (0x331D, 'M', u'コルナ'), - (0x331E, 'M', u'コーポ'), - (0x331F, 'M', u'サイクル'), - (0x3320, 'M', u'サンチーム'), - (0x3321, 'M', u'シリング'), - (0x3322, 'M', u'センチ'), - (0x3323, 'M', u'セント'), - (0x3324, 'M', u'ダース'), - (0x3325, 'M', u'デシ'), - (0x3326, 'M', u'ドル'), - (0x3327, 'M', u'トン'), - (0x3328, 'M', u'ナノ'), - (0x3329, 'M', u'ノット'), - (0x332A, 'M', u'ハイツ'), - (0x332B, 'M', u'パーセント'), - (0x332C, 'M', u'パーツ'), - (0x332D, 'M', u'バーレル'), - (0x332E, 'M', u'ピアストル'), - (0x332F, 'M', u'ピクル'), - (0x3330, 'M', u'ピコ'), - (0x3331, 'M', u'ビル'), - (0x3332, 'M', u'ファラッド'), - (0x3333, 'M', u'フィート'), - (0x3334, 'M', u'ブッシェル'), - (0x3335, 'M', u'フラン'), - (0x3336, 'M', u'ヘクタール'), - (0x3337, 'M', u'ペソ'), - (0x3338, 'M', u'ペニヒ'), - (0x3339, 'M', u'ヘルツ'), - (0x333A, 'M', u'ペンス'), - (0x333B, 'M', u'ページ'), - (0x333C, 'M', u'ベータ'), - (0x333D, 'M', u'ポイント'), - (0x333E, 'M', u'ボルト'), - (0x333F, 'M', u'ホン'), - (0x3340, 'M', u'ポンド'), - (0x3341, 'M', u'ホール'), - (0x3342, 'M', u'ホーン'), - (0x3343, 'M', u'マイクロ'), - (0x3344, 'M', u'マイル'), - (0x3345, 'M', u'マッハ'), - (0x3346, 'M', u'マルク'), - (0x3347, 'M', u'マンション'), - (0x3348, 'M', u'ミクロン'), - (0x3349, 'M', u'ミリ'), - (0x334A, 'M', u'ミリバール'), - (0x334B, 'M', u'メガ'), - (0x334C, 'M', u'メガトン'), - (0x334D, 'M', u'メートル'), - (0x334E, 'M', u'ヤード'), - (0x334F, 'M', u'ヤール'), - (0x3350, 'M', u'ユアン'), - (0x3351, 'M', u'リットル'), - (0x3352, 'M', u'リラ'), - (0x3353, 'M', u'ルピー'), - (0x3354, 'M', u'ルーブル'), - (0x3355, 'M', u'レム'), - (0x3356, 'M', u'レントゲン'), - (0x3357, 'M', u'ワット'), - (0x3358, 'M', u'0点'), - (0x3359, 'M', u'1点'), - (0x335A, 'M', u'2点'), - (0x335B, 'M', u'3点'), - (0x335C, 'M', u'4点'), - (0x335D, 'M', u'5点'), - (0x335E, 'M', u'6点'), - (0x335F, 'M', u'7点'), - (0x3360, 'M', u'8点'), - (0x3361, 'M', u'9点'), - (0x3362, 'M', u'10点'), - (0x3363, 'M', u'11点'), - (0x3364, 'M', u'12点'), - (0x3365, 'M', u'13点'), - (0x3366, 'M', u'14点'), - (0x3367, 'M', u'15点'), - (0x3368, 'M', u'16点'), - (0x3369, 'M', u'17点'), - (0x336A, 'M', u'18点'), - (0x336B, 'M', u'19点'), - (0x336C, 'M', u'20点'), - (0x336D, 'M', u'21点'), - (0x336E, 'M', u'22点'), - (0x336F, 'M', u'23点'), - (0x3370, 'M', u'24点'), - (0x3371, 'M', u'hpa'), - (0x3372, 'M', u'da'), - (0x3373, 'M', u'au'), - (0x3374, 'M', u'bar'), - (0x3375, 'M', u'ov'), + (0x3316, 'M', 'キロメートル'), + (0x3317, 'M', 'キロワット'), + (0x3318, 'M', 'グラム'), + (0x3319, 'M', 'グラムトン'), + (0x331A, 'M', 'クルゼイロ'), + (0x331B, 'M', 'クローネ'), + (0x331C, 'M', 'ケース'), + (0x331D, 'M', 'コルナ'), + (0x331E, 'M', 'コーポ'), + (0x331F, 'M', 'サイクル'), + (0x3320, 'M', 'サンチーム'), + (0x3321, 'M', 'シリング'), + (0x3322, 'M', 'センチ'), + (0x3323, 'M', 'セント'), + (0x3324, 'M', 'ダース'), + (0x3325, 'M', 'デシ'), + (0x3326, 'M', 'ドル'), + (0x3327, 'M', 'トン'), + (0x3328, 'M', 'ナノ'), + (0x3329, 'M', 'ノット'), + (0x332A, 'M', 'ハイツ'), + (0x332B, 'M', 'パーセント'), + (0x332C, 'M', 'パーツ'), + (0x332D, 'M', 'バーレル'), + (0x332E, 'M', 'ピアストル'), + (0x332F, 'M', 'ピクル'), + (0x3330, 'M', 'ピコ'), + (0x3331, 'M', 'ビル'), + (0x3332, 'M', 'ファラッド'), + (0x3333, 'M', 'フィート'), + (0x3334, 'M', 'ブッシェル'), + (0x3335, 'M', 'フラン'), + (0x3336, 'M', 'ヘクタール'), + (0x3337, 'M', 'ペソ'), + (0x3338, 'M', 'ペニヒ'), + (0x3339, 'M', 'ヘルツ'), + (0x333A, 'M', 'ペンス'), + (0x333B, 'M', 'ページ'), + (0x333C, 'M', 'ベータ'), + (0x333D, 'M', 'ポイント'), + (0x333E, 'M', 'ボルト'), + (0x333F, 'M', 'ホン'), + (0x3340, 'M', 'ポンド'), + (0x3341, 'M', 'ホール'), + (0x3342, 'M', 'ホーン'), + (0x3343, 'M', 'マイクロ'), + (0x3344, 'M', 'マイル'), + (0x3345, 'M', 'マッハ'), + (0x3346, 'M', 'マルク'), + (0x3347, 'M', 'マンション'), + (0x3348, 'M', 'ミクロン'), + (0x3349, 'M', 'ミリ'), + (0x334A, 'M', 'ミリバール'), + (0x334B, 'M', 'メガ'), + (0x334C, 'M', 'メガトン'), + (0x334D, 'M', 'メートル'), + (0x334E, 'M', 'ヤード'), + (0x334F, 'M', 'ヤール'), + (0x3350, 'M', 'ユアン'), + (0x3351, 'M', 'リットル'), + (0x3352, 'M', 'リラ'), + (0x3353, 'M', 'ルピー'), + (0x3354, 'M', 'ルーブル'), + (0x3355, 'M', 'レム'), + (0x3356, 'M', 'レントゲン'), + (0x3357, 'M', 'ワット'), + (0x3358, 'M', '0点'), + (0x3359, 'M', '1点'), + (0x335A, 'M', '2点'), + (0x335B, 'M', '3点'), + (0x335C, 'M', '4点'), + (0x335D, 'M', '5点'), + (0x335E, 'M', '6点'), + (0x335F, 'M', '7点'), + (0x3360, 'M', '8点'), + (0x3361, 'M', '9点'), + (0x3362, 'M', '10点'), + (0x3363, 'M', '11点'), + (0x3364, 'M', '12点'), + (0x3365, 'M', '13点'), + (0x3366, 'M', '14点'), + (0x3367, 'M', '15点'), + (0x3368, 'M', '16点'), + (0x3369, 'M', '17点'), + (0x336A, 'M', '18点'), + (0x336B, 'M', '19点'), + (0x336C, 'M', '20点'), + (0x336D, 'M', '21点'), + (0x336E, 'M', '22点'), + (0x336F, 'M', '23点'), + (0x3370, 'M', '24点'), + (0x3371, 'M', 'hpa'), + (0x3372, 'M', 'da'), + (0x3373, 'M', 'au'), + (0x3374, 'M', 'bar'), + (0x3375, 'M', 'ov'), + (0x3376, 'M', 'pc'), + (0x3377, 'M', 'dm'), + (0x3378, 'M', 'dm2'), + (0x3379, 'M', 'dm3'), ] -def _seg_34(): +def _seg_34() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x3376, 'M', u'pc'), - (0x3377, 'M', u'dm'), - (0x3378, 'M', u'dm2'), - (0x3379, 'M', u'dm3'), - (0x337A, 'M', u'iu'), - (0x337B, 'M', u'平成'), - (0x337C, 'M', u'昭和'), - (0x337D, 'M', u'大正'), - (0x337E, 'M', u'明治'), - (0x337F, 'M', u'株式会社'), - (0x3380, 'M', u'pa'), - (0x3381, 'M', u'na'), - (0x3382, 'M', u'μa'), - (0x3383, 'M', u'ma'), - (0x3384, 'M', u'ka'), - (0x3385, 'M', u'kb'), - (0x3386, 'M', u'mb'), - (0x3387, 'M', u'gb'), - (0x3388, 'M', u'cal'), - (0x3389, 'M', u'kcal'), - (0x338A, 'M', u'pf'), - (0x338B, 'M', u'nf'), - (0x338C, 'M', u'μf'), - (0x338D, 'M', u'μg'), - (0x338E, 'M', u'mg'), - (0x338F, 'M', u'kg'), - (0x3390, 'M', u'hz'), - (0x3391, 'M', u'khz'), - (0x3392, 'M', u'mhz'), - (0x3393, 'M', u'ghz'), - (0x3394, 'M', u'thz'), - (0x3395, 'M', u'μl'), - (0x3396, 'M', u'ml'), - (0x3397, 'M', u'dl'), - (0x3398, 'M', u'kl'), - (0x3399, 'M', u'fm'), - (0x339A, 'M', u'nm'), - (0x339B, 'M', u'μm'), - (0x339C, 'M', u'mm'), - (0x339D, 'M', u'cm'), - (0x339E, 'M', u'km'), - (0x339F, 'M', u'mm2'), - (0x33A0, 'M', u'cm2'), - (0x33A1, 'M', u'm2'), - (0x33A2, 'M', u'km2'), - (0x33A3, 'M', u'mm3'), - (0x33A4, 'M', u'cm3'), - (0x33A5, 'M', u'm3'), - (0x33A6, 'M', u'km3'), - (0x33A7, 'M', u'm∕s'), - (0x33A8, 'M', u'm∕s2'), - (0x33A9, 'M', u'pa'), - (0x33AA, 'M', u'kpa'), - (0x33AB, 'M', u'mpa'), - (0x33AC, 'M', u'gpa'), - (0x33AD, 'M', u'rad'), - (0x33AE, 'M', u'rad∕s'), - (0x33AF, 'M', u'rad∕s2'), - (0x33B0, 'M', u'ps'), - (0x33B1, 'M', u'ns'), - (0x33B2, 'M', u'μs'), - (0x33B3, 'M', u'ms'), - (0x33B4, 'M', u'pv'), - (0x33B5, 'M', u'nv'), - (0x33B6, 'M', u'μv'), - (0x33B7, 'M', u'mv'), - (0x33B8, 'M', u'kv'), - (0x33B9, 'M', u'mv'), - (0x33BA, 'M', u'pw'), - (0x33BB, 'M', u'nw'), - (0x33BC, 'M', u'μw'), - (0x33BD, 'M', u'mw'), - (0x33BE, 'M', u'kw'), - (0x33BF, 'M', u'mw'), - (0x33C0, 'M', u'kω'), - (0x33C1, 'M', u'mω'), + (0x337A, 'M', 'iu'), + (0x337B, 'M', '平成'), + (0x337C, 'M', '昭和'), + (0x337D, 'M', '大正'), + (0x337E, 'M', '明治'), + (0x337F, 'M', '株式会社'), + (0x3380, 'M', 'pa'), + (0x3381, 'M', 'na'), + (0x3382, 'M', 'μa'), + (0x3383, 'M', 'ma'), + (0x3384, 'M', 'ka'), + (0x3385, 'M', 'kb'), + (0x3386, 'M', 'mb'), + (0x3387, 'M', 'gb'), + (0x3388, 'M', 'cal'), + (0x3389, 'M', 'kcal'), + (0x338A, 'M', 'pf'), + (0x338B, 'M', 'nf'), + (0x338C, 'M', 'μf'), + (0x338D, 'M', 'μg'), + (0x338E, 'M', 'mg'), + (0x338F, 'M', 'kg'), + (0x3390, 'M', 'hz'), + (0x3391, 'M', 'khz'), + (0x3392, 'M', 'mhz'), + (0x3393, 'M', 'ghz'), + (0x3394, 'M', 'thz'), + (0x3395, 'M', 'μl'), + (0x3396, 'M', 'ml'), + (0x3397, 'M', 'dl'), + (0x3398, 'M', 'kl'), + (0x3399, 'M', 'fm'), + (0x339A, 'M', 'nm'), + (0x339B, 'M', 'μm'), + (0x339C, 'M', 'mm'), + (0x339D, 'M', 'cm'), + (0x339E, 'M', 'km'), + (0x339F, 'M', 'mm2'), + (0x33A0, 'M', 'cm2'), + (0x33A1, 'M', 'm2'), + (0x33A2, 'M', 'km2'), + (0x33A3, 'M', 'mm3'), + (0x33A4, 'M', 'cm3'), + (0x33A5, 'M', 'm3'), + (0x33A6, 'M', 'km3'), + (0x33A7, 'M', 'm∕s'), + (0x33A8, 'M', 'm∕s2'), + (0x33A9, 'M', 'pa'), + (0x33AA, 'M', 'kpa'), + (0x33AB, 'M', 'mpa'), + (0x33AC, 'M', 'gpa'), + (0x33AD, 'M', 'rad'), + (0x33AE, 'M', 'rad∕s'), + (0x33AF, 'M', 'rad∕s2'), + (0x33B0, 'M', 'ps'), + (0x33B1, 'M', 'ns'), + (0x33B2, 'M', 'μs'), + (0x33B3, 'M', 'ms'), + (0x33B4, 'M', 'pv'), + (0x33B5, 'M', 'nv'), + (0x33B6, 'M', 'μv'), + (0x33B7, 'M', 'mv'), + (0x33B8, 'M', 'kv'), + (0x33B9, 'M', 'mv'), + (0x33BA, 'M', 'pw'), + (0x33BB, 'M', 'nw'), + (0x33BC, 'M', 'μw'), + (0x33BD, 'M', 'mw'), + (0x33BE, 'M', 'kw'), + (0x33BF, 'M', 'mw'), + (0x33C0, 'M', 'kω'), + (0x33C1, 'M', 'mω'), (0x33C2, 'X'), - (0x33C3, 'M', u'bq'), - (0x33C4, 'M', u'cc'), - (0x33C5, 'M', u'cd'), - (0x33C6, 'M', u'c∕kg'), + (0x33C3, 'M', 'bq'), + (0x33C4, 'M', 'cc'), + (0x33C5, 'M', 'cd'), + (0x33C6, 'M', 'c∕kg'), (0x33C7, 'X'), - (0x33C8, 'M', u'db'), - (0x33C9, 'M', u'gy'), - (0x33CA, 'M', u'ha'), - (0x33CB, 'M', u'hp'), - (0x33CC, 'M', u'in'), - (0x33CD, 'M', u'kk'), - (0x33CE, 'M', u'km'), - (0x33CF, 'M', u'kt'), - (0x33D0, 'M', u'lm'), - (0x33D1, 'M', u'ln'), - (0x33D2, 'M', u'log'), - (0x33D3, 'M', u'lx'), - (0x33D4, 'M', u'mb'), - (0x33D5, 'M', u'mil'), - (0x33D6, 'M', u'mol'), - (0x33D7, 'M', u'ph'), + (0x33C8, 'M', 'db'), + (0x33C9, 'M', 'gy'), + (0x33CA, 'M', 'ha'), + (0x33CB, 'M', 'hp'), + (0x33CC, 'M', 'in'), + (0x33CD, 'M', 'kk'), + (0x33CE, 'M', 'km'), + (0x33CF, 'M', 'kt'), + (0x33D0, 'M', 'lm'), + (0x33D1, 'M', 'ln'), + (0x33D2, 'M', 'log'), + (0x33D3, 'M', 'lx'), + (0x33D4, 'M', 'mb'), + (0x33D5, 'M', 'mil'), + (0x33D6, 'M', 'mol'), + (0x33D7, 'M', 'ph'), (0x33D8, 'X'), - (0x33D9, 'M', u'ppm'), + (0x33D9, 'M', 'ppm'), + (0x33DA, 'M', 'pr'), + (0x33DB, 'M', 'sr'), + (0x33DC, 'M', 'sv'), + (0x33DD, 'M', 'wb'), ] -def _seg_35(): +def _seg_35() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x33DA, 'M', u'pr'), - (0x33DB, 'M', u'sr'), - (0x33DC, 'M', u'sv'), - (0x33DD, 'M', u'wb'), - (0x33DE, 'M', u'v∕m'), - (0x33DF, 'M', u'a∕m'), - (0x33E0, 'M', u'1日'), - (0x33E1, 'M', u'2日'), - (0x33E2, 'M', u'3日'), - (0x33E3, 'M', u'4日'), - (0x33E4, 'M', u'5日'), - (0x33E5, 'M', u'6日'), - (0x33E6, 'M', u'7日'), - (0x33E7, 'M', u'8日'), - (0x33E8, 'M', u'9日'), - (0x33E9, 'M', u'10日'), - (0x33EA, 'M', u'11日'), - (0x33EB, 'M', u'12日'), - (0x33EC, 'M', u'13日'), - (0x33ED, 'M', u'14日'), - (0x33EE, 'M', u'15日'), - (0x33EF, 'M', u'16日'), - (0x33F0, 'M', u'17日'), - (0x33F1, 'M', u'18日'), - (0x33F2, 'M', u'19日'), - (0x33F3, 'M', u'20日'), - (0x33F4, 'M', u'21日'), - (0x33F5, 'M', u'22日'), - (0x33F6, 'M', u'23日'), - (0x33F7, 'M', u'24日'), - (0x33F8, 'M', u'25日'), - (0x33F9, 'M', u'26日'), - (0x33FA, 'M', u'27日'), - (0x33FB, 'M', u'28日'), - (0x33FC, 'M', u'29日'), - (0x33FD, 'M', u'30日'), - (0x33FE, 'M', u'31日'), - (0x33FF, 'M', u'gal'), + (0x33DE, 'M', 'v∕m'), + (0x33DF, 'M', 'a∕m'), + (0x33E0, 'M', '1日'), + (0x33E1, 'M', '2日'), + (0x33E2, 'M', '3日'), + (0x33E3, 'M', '4日'), + (0x33E4, 'M', '5日'), + (0x33E5, 'M', '6日'), + (0x33E6, 'M', '7日'), + (0x33E7, 'M', '8日'), + (0x33E8, 'M', '9日'), + (0x33E9, 'M', '10日'), + (0x33EA, 'M', '11日'), + (0x33EB, 'M', '12日'), + (0x33EC, 'M', '13日'), + (0x33ED, 'M', '14日'), + (0x33EE, 'M', '15日'), + (0x33EF, 'M', '16日'), + (0x33F0, 'M', '17日'), + (0x33F1, 'M', '18日'), + (0x33F2, 'M', '19日'), + (0x33F3, 'M', '20日'), + (0x33F4, 'M', '21日'), + (0x33F5, 'M', '22日'), + (0x33F6, 'M', '23日'), + (0x33F7, 'M', '24日'), + (0x33F8, 'M', '25日'), + (0x33F9, 'M', '26日'), + (0x33FA, 'M', '27日'), + (0x33FB, 'M', '28日'), + (0x33FC, 'M', '29日'), + (0x33FD, 'M', '30日'), + (0x33FE, 'M', '31日'), + (0x33FF, 'M', 'gal'), (0x3400, 'V'), - (0x9FFD, 'X'), - (0xA000, 'V'), (0xA48D, 'X'), (0xA490, 'V'), (0xA4C7, 'X'), (0xA4D0, 'V'), (0xA62C, 'X'), - (0xA640, 'M', u'ꙁ'), + (0xA640, 'M', 'ꙁ'), (0xA641, 'V'), - (0xA642, 'M', u'ꙃ'), + (0xA642, 'M', 'ꙃ'), (0xA643, 'V'), - (0xA644, 'M', u'ꙅ'), + (0xA644, 'M', 'ꙅ'), (0xA645, 'V'), - (0xA646, 'M', u'ꙇ'), + (0xA646, 'M', 'ꙇ'), (0xA647, 'V'), - (0xA648, 'M', u'ꙉ'), + (0xA648, 'M', 'ꙉ'), (0xA649, 'V'), - (0xA64A, 'M', u'ꙋ'), + (0xA64A, 'M', 'ꙋ'), (0xA64B, 'V'), - (0xA64C, 'M', u'ꙍ'), + (0xA64C, 'M', 'ꙍ'), (0xA64D, 'V'), - (0xA64E, 'M', u'ꙏ'), + (0xA64E, 'M', 'ꙏ'), (0xA64F, 'V'), - (0xA650, 'M', u'ꙑ'), + (0xA650, 'M', 'ꙑ'), (0xA651, 'V'), - (0xA652, 'M', u'ꙓ'), + (0xA652, 'M', 'ꙓ'), (0xA653, 'V'), - (0xA654, 'M', u'ꙕ'), + (0xA654, 'M', 'ꙕ'), (0xA655, 'V'), - (0xA656, 'M', u'ꙗ'), + (0xA656, 'M', 'ꙗ'), (0xA657, 'V'), - (0xA658, 'M', u'ꙙ'), + (0xA658, 'M', 'ꙙ'), (0xA659, 'V'), - (0xA65A, 'M', u'ꙛ'), + (0xA65A, 'M', 'ꙛ'), (0xA65B, 'V'), - (0xA65C, 'M', u'ꙝ'), + (0xA65C, 'M', 'ꙝ'), (0xA65D, 'V'), - (0xA65E, 'M', u'ꙟ'), + (0xA65E, 'M', 'ꙟ'), (0xA65F, 'V'), - (0xA660, 'M', u'ꙡ'), + (0xA660, 'M', 'ꙡ'), (0xA661, 'V'), - (0xA662, 'M', u'ꙣ'), + (0xA662, 'M', 'ꙣ'), (0xA663, 'V'), - (0xA664, 'M', u'ꙥ'), + (0xA664, 'M', 'ꙥ'), (0xA665, 'V'), - (0xA666, 'M', u'ꙧ'), + (0xA666, 'M', 'ꙧ'), (0xA667, 'V'), - (0xA668, 'M', u'ꙩ'), + (0xA668, 'M', 'ꙩ'), (0xA669, 'V'), - (0xA66A, 'M', u'ꙫ'), + (0xA66A, 'M', 'ꙫ'), (0xA66B, 'V'), - (0xA66C, 'M', u'ꙭ'), + (0xA66C, 'M', 'ꙭ'), (0xA66D, 'V'), - (0xA680, 'M', u'ꚁ'), + (0xA680, 'M', 'ꚁ'), (0xA681, 'V'), - (0xA682, 'M', u'ꚃ'), + (0xA682, 'M', 'ꚃ'), (0xA683, 'V'), - (0xA684, 'M', u'ꚅ'), + (0xA684, 'M', 'ꚅ'), (0xA685, 'V'), - (0xA686, 'M', u'ꚇ'), + (0xA686, 'M', 'ꚇ'), (0xA687, 'V'), + (0xA688, 'M', 'ꚉ'), + (0xA689, 'V'), + (0xA68A, 'M', 'ꚋ'), + (0xA68B, 'V'), + (0xA68C, 'M', 'ꚍ'), + (0xA68D, 'V'), ] -def _seg_36(): +def _seg_36() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xA688, 'M', u'ꚉ'), - (0xA689, 'V'), - (0xA68A, 'M', u'ꚋ'), - (0xA68B, 'V'), - (0xA68C, 'M', u'ꚍ'), - (0xA68D, 'V'), - (0xA68E, 'M', u'ꚏ'), + (0xA68E, 'M', 'ꚏ'), (0xA68F, 'V'), - (0xA690, 'M', u'ꚑ'), + (0xA690, 'M', 'ꚑ'), (0xA691, 'V'), - (0xA692, 'M', u'ꚓ'), + (0xA692, 'M', 'ꚓ'), (0xA693, 'V'), - (0xA694, 'M', u'ꚕ'), + (0xA694, 'M', 'ꚕ'), (0xA695, 'V'), - (0xA696, 'M', u'ꚗ'), + (0xA696, 'M', 'ꚗ'), (0xA697, 'V'), - (0xA698, 'M', u'ꚙ'), + (0xA698, 'M', 'ꚙ'), (0xA699, 'V'), - (0xA69A, 'M', u'ꚛ'), + (0xA69A, 'M', 'ꚛ'), (0xA69B, 'V'), - (0xA69C, 'M', u'ъ'), - (0xA69D, 'M', u'ь'), + (0xA69C, 'M', 'ъ'), + (0xA69D, 'M', 'ь'), (0xA69E, 'V'), (0xA6F8, 'X'), (0xA700, 'V'), - (0xA722, 'M', u'ꜣ'), + (0xA722, 'M', 'ꜣ'), (0xA723, 'V'), - (0xA724, 'M', u'ꜥ'), + (0xA724, 'M', 'ꜥ'), (0xA725, 'V'), - (0xA726, 'M', u'ꜧ'), + (0xA726, 'M', 'ꜧ'), (0xA727, 'V'), - (0xA728, 'M', u'ꜩ'), + (0xA728, 'M', 'ꜩ'), (0xA729, 'V'), - (0xA72A, 'M', u'ꜫ'), + (0xA72A, 'M', 'ꜫ'), (0xA72B, 'V'), - (0xA72C, 'M', u'ꜭ'), + (0xA72C, 'M', 'ꜭ'), (0xA72D, 'V'), - (0xA72E, 'M', u'ꜯ'), + (0xA72E, 'M', 'ꜯ'), (0xA72F, 'V'), - (0xA732, 'M', u'ꜳ'), + (0xA732, 'M', 'ꜳ'), (0xA733, 'V'), - (0xA734, 'M', u'ꜵ'), + (0xA734, 'M', 'ꜵ'), (0xA735, 'V'), - (0xA736, 'M', u'ꜷ'), + (0xA736, 'M', 'ꜷ'), (0xA737, 'V'), - (0xA738, 'M', u'ꜹ'), + (0xA738, 'M', 'ꜹ'), (0xA739, 'V'), - (0xA73A, 'M', u'ꜻ'), + (0xA73A, 'M', 'ꜻ'), (0xA73B, 'V'), - (0xA73C, 'M', u'ꜽ'), + (0xA73C, 'M', 'ꜽ'), (0xA73D, 'V'), - (0xA73E, 'M', u'ꜿ'), + (0xA73E, 'M', 'ꜿ'), (0xA73F, 'V'), - (0xA740, 'M', u'ꝁ'), + (0xA740, 'M', 'ꝁ'), (0xA741, 'V'), - (0xA742, 'M', u'ꝃ'), + (0xA742, 'M', 'ꝃ'), (0xA743, 'V'), - (0xA744, 'M', u'ꝅ'), + (0xA744, 'M', 'ꝅ'), (0xA745, 'V'), - (0xA746, 'M', u'ꝇ'), + (0xA746, 'M', 'ꝇ'), (0xA747, 'V'), - (0xA748, 'M', u'ꝉ'), + (0xA748, 'M', 'ꝉ'), (0xA749, 'V'), - (0xA74A, 'M', u'ꝋ'), + (0xA74A, 'M', 'ꝋ'), (0xA74B, 'V'), - (0xA74C, 'M', u'ꝍ'), + (0xA74C, 'M', 'ꝍ'), (0xA74D, 'V'), - (0xA74E, 'M', u'ꝏ'), + (0xA74E, 'M', 'ꝏ'), (0xA74F, 'V'), - (0xA750, 'M', u'ꝑ'), + (0xA750, 'M', 'ꝑ'), (0xA751, 'V'), - (0xA752, 'M', u'ꝓ'), + (0xA752, 'M', 'ꝓ'), (0xA753, 'V'), - (0xA754, 'M', u'ꝕ'), + (0xA754, 'M', 'ꝕ'), (0xA755, 'V'), - (0xA756, 'M', u'ꝗ'), + (0xA756, 'M', 'ꝗ'), (0xA757, 'V'), - (0xA758, 'M', u'ꝙ'), + (0xA758, 'M', 'ꝙ'), (0xA759, 'V'), - (0xA75A, 'M', u'ꝛ'), + (0xA75A, 'M', 'ꝛ'), (0xA75B, 'V'), - (0xA75C, 'M', u'ꝝ'), + (0xA75C, 'M', 'ꝝ'), (0xA75D, 'V'), - (0xA75E, 'M', u'ꝟ'), + (0xA75E, 'M', 'ꝟ'), (0xA75F, 'V'), - (0xA760, 'M', u'ꝡ'), + (0xA760, 'M', 'ꝡ'), (0xA761, 'V'), - (0xA762, 'M', u'ꝣ'), + (0xA762, 'M', 'ꝣ'), (0xA763, 'V'), - (0xA764, 'M', u'ꝥ'), + (0xA764, 'M', 'ꝥ'), (0xA765, 'V'), - (0xA766, 'M', u'ꝧ'), + (0xA766, 'M', 'ꝧ'), (0xA767, 'V'), - (0xA768, 'M', u'ꝩ'), + (0xA768, 'M', 'ꝩ'), (0xA769, 'V'), - (0xA76A, 'M', u'ꝫ'), + (0xA76A, 'M', 'ꝫ'), (0xA76B, 'V'), - (0xA76C, 'M', u'ꝭ'), + (0xA76C, 'M', 'ꝭ'), (0xA76D, 'V'), - (0xA76E, 'M', u'ꝯ'), + (0xA76E, 'M', 'ꝯ'), + (0xA76F, 'V'), + (0xA770, 'M', 'ꝯ'), + (0xA771, 'V'), + (0xA779, 'M', 'ꝺ'), + (0xA77A, 'V'), + (0xA77B, 'M', 'ꝼ'), ] -def _seg_37(): +def _seg_37() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xA76F, 'V'), - (0xA770, 'M', u'ꝯ'), - (0xA771, 'V'), - (0xA779, 'M', u'ꝺ'), - (0xA77A, 'V'), - (0xA77B, 'M', u'ꝼ'), (0xA77C, 'V'), - (0xA77D, 'M', u'ᵹ'), - (0xA77E, 'M', u'ꝿ'), + (0xA77D, 'M', 'ᵹ'), + (0xA77E, 'M', 'ꝿ'), (0xA77F, 'V'), - (0xA780, 'M', u'ꞁ'), + (0xA780, 'M', 'ꞁ'), (0xA781, 'V'), - (0xA782, 'M', u'ꞃ'), + (0xA782, 'M', 'ꞃ'), (0xA783, 'V'), - (0xA784, 'M', u'ꞅ'), + (0xA784, 'M', 'ꞅ'), (0xA785, 'V'), - (0xA786, 'M', u'ꞇ'), + (0xA786, 'M', 'ꞇ'), (0xA787, 'V'), - (0xA78B, 'M', u'ꞌ'), + (0xA78B, 'M', 'ꞌ'), (0xA78C, 'V'), - (0xA78D, 'M', u'ɥ'), + (0xA78D, 'M', 'ɥ'), (0xA78E, 'V'), - (0xA790, 'M', u'ꞑ'), + (0xA790, 'M', 'ꞑ'), (0xA791, 'V'), - (0xA792, 'M', u'ꞓ'), + (0xA792, 'M', 'ꞓ'), (0xA793, 'V'), - (0xA796, 'M', u'ꞗ'), + (0xA796, 'M', 'ꞗ'), (0xA797, 'V'), - (0xA798, 'M', u'ꞙ'), + (0xA798, 'M', 'ꞙ'), (0xA799, 'V'), - (0xA79A, 'M', u'ꞛ'), + (0xA79A, 'M', 'ꞛ'), (0xA79B, 'V'), - (0xA79C, 'M', u'ꞝ'), + (0xA79C, 'M', 'ꞝ'), (0xA79D, 'V'), - (0xA79E, 'M', u'ꞟ'), + (0xA79E, 'M', 'ꞟ'), (0xA79F, 'V'), - (0xA7A0, 'M', u'ꞡ'), + (0xA7A0, 'M', 'ꞡ'), (0xA7A1, 'V'), - (0xA7A2, 'M', u'ꞣ'), + (0xA7A2, 'M', 'ꞣ'), (0xA7A3, 'V'), - (0xA7A4, 'M', u'ꞥ'), + (0xA7A4, 'M', 'ꞥ'), (0xA7A5, 'V'), - (0xA7A6, 'M', u'ꞧ'), + (0xA7A6, 'M', 'ꞧ'), (0xA7A7, 'V'), - (0xA7A8, 'M', u'ꞩ'), + (0xA7A8, 'M', 'ꞩ'), (0xA7A9, 'V'), - (0xA7AA, 'M', u'ɦ'), - (0xA7AB, 'M', u'ɜ'), - (0xA7AC, 'M', u'ɡ'), - (0xA7AD, 'M', u'ɬ'), - (0xA7AE, 'M', u'ɪ'), + (0xA7AA, 'M', 'ɦ'), + (0xA7AB, 'M', 'ɜ'), + (0xA7AC, 'M', 'ɡ'), + (0xA7AD, 'M', 'ɬ'), + (0xA7AE, 'M', 'ɪ'), (0xA7AF, 'V'), - (0xA7B0, 'M', u'ʞ'), - (0xA7B1, 'M', u'ʇ'), - (0xA7B2, 'M', u'ʝ'), - (0xA7B3, 'M', u'ꭓ'), - (0xA7B4, 'M', u'ꞵ'), + (0xA7B0, 'M', 'ʞ'), + (0xA7B1, 'M', 'ʇ'), + (0xA7B2, 'M', 'ʝ'), + (0xA7B3, 'M', 'ꭓ'), + (0xA7B4, 'M', 'ꞵ'), (0xA7B5, 'V'), - (0xA7B6, 'M', u'ꞷ'), + (0xA7B6, 'M', 'ꞷ'), (0xA7B7, 'V'), - (0xA7B8, 'M', u'ꞹ'), + (0xA7B8, 'M', 'ꞹ'), (0xA7B9, 'V'), - (0xA7BA, 'M', u'ꞻ'), + (0xA7BA, 'M', 'ꞻ'), (0xA7BB, 'V'), - (0xA7BC, 'M', u'ꞽ'), + (0xA7BC, 'M', 'ꞽ'), (0xA7BD, 'V'), - (0xA7BE, 'M', u'ꞿ'), + (0xA7BE, 'M', 'ꞿ'), (0xA7BF, 'V'), - (0xA7C0, 'X'), - (0xA7C2, 'M', u'ꟃ'), + (0xA7C0, 'M', 'ꟁ'), + (0xA7C1, 'V'), + (0xA7C2, 'M', 'ꟃ'), (0xA7C3, 'V'), - (0xA7C4, 'M', u'ꞔ'), - (0xA7C5, 'M', u'ʂ'), - (0xA7C6, 'M', u'ᶎ'), - (0xA7C7, 'M', u'ꟈ'), + (0xA7C4, 'M', 'ꞔ'), + (0xA7C5, 'M', 'ʂ'), + (0xA7C6, 'M', 'ᶎ'), + (0xA7C7, 'M', 'ꟈ'), (0xA7C8, 'V'), - (0xA7C9, 'M', u'ꟊ'), + (0xA7C9, 'M', 'ꟊ'), (0xA7CA, 'V'), (0xA7CB, 'X'), - (0xA7F5, 'M', u'ꟶ'), + (0xA7D0, 'M', 'ꟑ'), + (0xA7D1, 'V'), + (0xA7D2, 'X'), + (0xA7D3, 'V'), + (0xA7D4, 'X'), + (0xA7D5, 'V'), + (0xA7D6, 'M', 'ꟗ'), + (0xA7D7, 'V'), + (0xA7D8, 'M', 'ꟙ'), + (0xA7D9, 'V'), + (0xA7DA, 'X'), + (0xA7F2, 'M', 'c'), + (0xA7F3, 'M', 'f'), + (0xA7F4, 'M', 'q'), + (0xA7F5, 'M', 'ꟶ'), (0xA7F6, 'V'), - (0xA7F8, 'M', u'ħ'), - (0xA7F9, 'M', u'œ'), + (0xA7F8, 'M', 'ħ'), + (0xA7F9, 'M', 'œ'), (0xA7FA, 'V'), (0xA82D, 'X'), (0xA830, 'V'), @@ -3946,6 +3958,10 @@ def _seg_37(): (0xA878, 'X'), (0xA880, 'V'), (0xA8C6, 'X'), + ] + +def _seg_38() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0xA8CE, 'V'), (0xA8DA, 'X'), (0xA8E0, 'V'), @@ -3955,10 +3971,6 @@ def _seg_37(): (0xA980, 'V'), (0xA9CE, 'X'), (0xA9CF, 'V'), - ] - -def _seg_38(): - return [ (0xA9DA, 'X'), (0xA9DE, 'V'), (0xA9FF, 'X'), @@ -3983,98 +3995,98 @@ def _seg_38(): (0xAB28, 'V'), (0xAB2F, 'X'), (0xAB30, 'V'), - (0xAB5C, 'M', u'ꜧ'), - (0xAB5D, 'M', u'ꬷ'), - (0xAB5E, 'M', u'ɫ'), - (0xAB5F, 'M', u'ꭒ'), + (0xAB5C, 'M', 'ꜧ'), + (0xAB5D, 'M', 'ꬷ'), + (0xAB5E, 'M', 'ɫ'), + (0xAB5F, 'M', 'ꭒ'), (0xAB60, 'V'), - (0xAB69, 'M', u'ʍ'), + (0xAB69, 'M', 'ʍ'), (0xAB6A, 'V'), (0xAB6C, 'X'), - (0xAB70, 'M', u'Ꭰ'), - (0xAB71, 'M', u'Ꭱ'), - (0xAB72, 'M', u'Ꭲ'), - (0xAB73, 'M', u'Ꭳ'), - (0xAB74, 'M', u'Ꭴ'), - (0xAB75, 'M', u'Ꭵ'), - (0xAB76, 'M', u'Ꭶ'), - (0xAB77, 'M', u'Ꭷ'), - (0xAB78, 'M', u'Ꭸ'), - (0xAB79, 'M', u'Ꭹ'), - (0xAB7A, 'M', u'Ꭺ'), - (0xAB7B, 'M', u'Ꭻ'), - (0xAB7C, 'M', u'Ꭼ'), - (0xAB7D, 'M', u'Ꭽ'), - (0xAB7E, 'M', u'Ꭾ'), - (0xAB7F, 'M', u'Ꭿ'), - (0xAB80, 'M', u'Ꮀ'), - (0xAB81, 'M', u'Ꮁ'), - (0xAB82, 'M', u'Ꮂ'), - (0xAB83, 'M', u'Ꮃ'), - (0xAB84, 'M', u'Ꮄ'), - (0xAB85, 'M', u'Ꮅ'), - (0xAB86, 'M', u'Ꮆ'), - (0xAB87, 'M', u'Ꮇ'), - (0xAB88, 'M', u'Ꮈ'), - (0xAB89, 'M', u'Ꮉ'), - (0xAB8A, 'M', u'Ꮊ'), - (0xAB8B, 'M', u'Ꮋ'), - (0xAB8C, 'M', u'Ꮌ'), - (0xAB8D, 'M', u'Ꮍ'), - (0xAB8E, 'M', u'Ꮎ'), - (0xAB8F, 'M', u'Ꮏ'), - (0xAB90, 'M', u'Ꮐ'), - (0xAB91, 'M', u'Ꮑ'), - (0xAB92, 'M', u'Ꮒ'), - (0xAB93, 'M', u'Ꮓ'), - (0xAB94, 'M', u'Ꮔ'), - (0xAB95, 'M', u'Ꮕ'), - (0xAB96, 'M', u'Ꮖ'), - (0xAB97, 'M', u'Ꮗ'), - (0xAB98, 'M', u'Ꮘ'), - (0xAB99, 'M', u'Ꮙ'), - (0xAB9A, 'M', u'Ꮚ'), - (0xAB9B, 'M', u'Ꮛ'), - (0xAB9C, 'M', u'Ꮜ'), - (0xAB9D, 'M', u'Ꮝ'), - (0xAB9E, 'M', u'Ꮞ'), - (0xAB9F, 'M', u'Ꮟ'), - (0xABA0, 'M', u'Ꮠ'), - (0xABA1, 'M', u'Ꮡ'), - (0xABA2, 'M', u'Ꮢ'), - (0xABA3, 'M', u'Ꮣ'), - (0xABA4, 'M', u'Ꮤ'), - (0xABA5, 'M', u'Ꮥ'), - (0xABA6, 'M', u'Ꮦ'), - (0xABA7, 'M', u'Ꮧ'), - (0xABA8, 'M', u'Ꮨ'), - (0xABA9, 'M', u'Ꮩ'), - (0xABAA, 'M', u'Ꮪ'), - (0xABAB, 'M', u'Ꮫ'), - (0xABAC, 'M', u'Ꮬ'), - (0xABAD, 'M', u'Ꮭ'), - (0xABAE, 'M', u'Ꮮ'), - (0xABAF, 'M', u'Ꮯ'), - (0xABB0, 'M', u'Ꮰ'), - (0xABB1, 'M', u'Ꮱ'), - (0xABB2, 'M', u'Ꮲ'), - (0xABB3, 'M', u'Ꮳ'), + (0xAB70, 'M', 'Ꭰ'), + (0xAB71, 'M', 'Ꭱ'), + (0xAB72, 'M', 'Ꭲ'), + (0xAB73, 'M', 'Ꭳ'), + (0xAB74, 'M', 'Ꭴ'), + (0xAB75, 'M', 'Ꭵ'), + (0xAB76, 'M', 'Ꭶ'), + (0xAB77, 'M', 'Ꭷ'), + (0xAB78, 'M', 'Ꭸ'), + (0xAB79, 'M', 'Ꭹ'), + (0xAB7A, 'M', 'Ꭺ'), + (0xAB7B, 'M', 'Ꭻ'), + (0xAB7C, 'M', 'Ꭼ'), + (0xAB7D, 'M', 'Ꭽ'), + (0xAB7E, 'M', 'Ꭾ'), + (0xAB7F, 'M', 'Ꭿ'), + (0xAB80, 'M', 'Ꮀ'), + (0xAB81, 'M', 'Ꮁ'), + (0xAB82, 'M', 'Ꮂ'), + (0xAB83, 'M', 'Ꮃ'), + (0xAB84, 'M', 'Ꮄ'), + (0xAB85, 'M', 'Ꮅ'), + (0xAB86, 'M', 'Ꮆ'), + (0xAB87, 'M', 'Ꮇ'), + (0xAB88, 'M', 'Ꮈ'), + (0xAB89, 'M', 'Ꮉ'), + (0xAB8A, 'M', 'Ꮊ'), + (0xAB8B, 'M', 'Ꮋ'), + (0xAB8C, 'M', 'Ꮌ'), + (0xAB8D, 'M', 'Ꮍ'), + (0xAB8E, 'M', 'Ꮎ'), + (0xAB8F, 'M', 'Ꮏ'), + (0xAB90, 'M', 'Ꮐ'), + (0xAB91, 'M', 'Ꮑ'), + (0xAB92, 'M', 'Ꮒ'), + (0xAB93, 'M', 'Ꮓ'), + (0xAB94, 'M', 'Ꮔ'), + (0xAB95, 'M', 'Ꮕ'), + (0xAB96, 'M', 'Ꮖ'), + (0xAB97, 'M', 'Ꮗ'), + (0xAB98, 'M', 'Ꮘ'), + (0xAB99, 'M', 'Ꮙ'), + (0xAB9A, 'M', 'Ꮚ'), + (0xAB9B, 'M', 'Ꮛ'), + (0xAB9C, 'M', 'Ꮜ'), + (0xAB9D, 'M', 'Ꮝ'), + (0xAB9E, 'M', 'Ꮞ'), + (0xAB9F, 'M', 'Ꮟ'), + (0xABA0, 'M', 'Ꮠ'), + (0xABA1, 'M', 'Ꮡ'), + (0xABA2, 'M', 'Ꮢ'), + (0xABA3, 'M', 'Ꮣ'), + (0xABA4, 'M', 'Ꮤ'), + (0xABA5, 'M', 'Ꮥ'), + (0xABA6, 'M', 'Ꮦ'), + (0xABA7, 'M', 'Ꮧ'), + (0xABA8, 'M', 'Ꮨ'), + (0xABA9, 'M', 'Ꮩ'), + (0xABAA, 'M', 'Ꮪ'), ] -def _seg_39(): +def _seg_39() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xABB4, 'M', u'Ꮴ'), - (0xABB5, 'M', u'Ꮵ'), - (0xABB6, 'M', u'Ꮶ'), - (0xABB7, 'M', u'Ꮷ'), - (0xABB8, 'M', u'Ꮸ'), - (0xABB9, 'M', u'Ꮹ'), - (0xABBA, 'M', u'Ꮺ'), - (0xABBB, 'M', u'Ꮻ'), - (0xABBC, 'M', u'Ꮼ'), - (0xABBD, 'M', u'Ꮽ'), - (0xABBE, 'M', u'Ꮾ'), - (0xABBF, 'M', u'Ꮿ'), + (0xABAB, 'M', 'Ꮫ'), + (0xABAC, 'M', 'Ꮬ'), + (0xABAD, 'M', 'Ꮭ'), + (0xABAE, 'M', 'Ꮮ'), + (0xABAF, 'M', 'Ꮯ'), + (0xABB0, 'M', 'Ꮰ'), + (0xABB1, 'M', 'Ꮱ'), + (0xABB2, 'M', 'Ꮲ'), + (0xABB3, 'M', 'Ꮳ'), + (0xABB4, 'M', 'Ꮴ'), + (0xABB5, 'M', 'Ꮵ'), + (0xABB6, 'M', 'Ꮶ'), + (0xABB7, 'M', 'Ꮷ'), + (0xABB8, 'M', 'Ꮸ'), + (0xABB9, 'M', 'Ꮹ'), + (0xABBA, 'M', 'Ꮺ'), + (0xABBB, 'M', 'Ꮻ'), + (0xABBC, 'M', 'Ꮼ'), + (0xABBD, 'M', 'Ꮽ'), + (0xABBE, 'M', 'Ꮾ'), + (0xABBF, 'M', 'Ꮿ'), (0xABC0, 'V'), (0xABEE, 'X'), (0xABF0, 'V'), @@ -4085,1440 +4097,1440 @@ def _seg_39(): (0xD7C7, 'X'), (0xD7CB, 'V'), (0xD7FC, 'X'), - (0xF900, 'M', u'豈'), - (0xF901, 'M', u'更'), - (0xF902, 'M', u'車'), - (0xF903, 'M', u'賈'), - (0xF904, 'M', u'滑'), - (0xF905, 'M', u'串'), - (0xF906, 'M', u'句'), - (0xF907, 'M', u'龜'), - (0xF909, 'M', u'契'), - (0xF90A, 'M', u'金'), - (0xF90B, 'M', u'喇'), - (0xF90C, 'M', u'奈'), - (0xF90D, 'M', u'懶'), - (0xF90E, 'M', u'癩'), - (0xF90F, 'M', u'羅'), - (0xF910, 'M', u'蘿'), - (0xF911, 'M', u'螺'), - (0xF912, 'M', u'裸'), - (0xF913, 'M', u'邏'), - (0xF914, 'M', u'樂'), - (0xF915, 'M', u'洛'), - (0xF916, 'M', u'烙'), - (0xF917, 'M', u'珞'), - (0xF918, 'M', u'落'), - (0xF919, 'M', u'酪'), - (0xF91A, 'M', u'駱'), - (0xF91B, 'M', u'亂'), - (0xF91C, 'M', u'卵'), - (0xF91D, 'M', u'欄'), - (0xF91E, 'M', u'爛'), - (0xF91F, 'M', u'蘭'), - (0xF920, 'M', u'鸞'), - (0xF921, 'M', u'嵐'), - (0xF922, 'M', u'濫'), - (0xF923, 'M', u'藍'), - (0xF924, 'M', u'襤'), - (0xF925, 'M', u'拉'), - (0xF926, 'M', u'臘'), - (0xF927, 'M', u'蠟'), - (0xF928, 'M', u'廊'), - (0xF929, 'M', u'朗'), - (0xF92A, 'M', u'浪'), - (0xF92B, 'M', u'狼'), - (0xF92C, 'M', u'郎'), - (0xF92D, 'M', u'來'), - (0xF92E, 'M', u'冷'), - (0xF92F, 'M', u'勞'), - (0xF930, 'M', u'擄'), - (0xF931, 'M', u'櫓'), - (0xF932, 'M', u'爐'), - (0xF933, 'M', u'盧'), - (0xF934, 'M', u'老'), - (0xF935, 'M', u'蘆'), - (0xF936, 'M', u'虜'), - (0xF937, 'M', u'路'), - (0xF938, 'M', u'露'), - (0xF939, 'M', u'魯'), - (0xF93A, 'M', u'鷺'), - (0xF93B, 'M', u'碌'), - (0xF93C, 'M', u'祿'), - (0xF93D, 'M', u'綠'), - (0xF93E, 'M', u'菉'), - (0xF93F, 'M', u'錄'), - (0xF940, 'M', u'鹿'), - (0xF941, 'M', u'論'), - (0xF942, 'M', u'壟'), - (0xF943, 'M', u'弄'), - (0xF944, 'M', u'籠'), - (0xF945, 'M', u'聾'), - (0xF946, 'M', u'牢'), - (0xF947, 'M', u'磊'), - (0xF948, 'M', u'賂'), - (0xF949, 'M', u'雷'), - (0xF94A, 'M', u'壘'), - (0xF94B, 'M', u'屢'), - (0xF94C, 'M', u'樓'), - (0xF94D, 'M', u'淚'), - (0xF94E, 'M', u'漏'), + (0xF900, 'M', '豈'), + (0xF901, 'M', '更'), + (0xF902, 'M', '車'), + (0xF903, 'M', '賈'), + (0xF904, 'M', '滑'), + (0xF905, 'M', '串'), + (0xF906, 'M', '句'), + (0xF907, 'M', '龜'), + (0xF909, 'M', '契'), + (0xF90A, 'M', '金'), + (0xF90B, 'M', '喇'), + (0xF90C, 'M', '奈'), + (0xF90D, 'M', '懶'), + (0xF90E, 'M', '癩'), + (0xF90F, 'M', '羅'), + (0xF910, 'M', '蘿'), + (0xF911, 'M', '螺'), + (0xF912, 'M', '裸'), + (0xF913, 'M', '邏'), + (0xF914, 'M', '樂'), + (0xF915, 'M', '洛'), + (0xF916, 'M', '烙'), + (0xF917, 'M', '珞'), + (0xF918, 'M', '落'), + (0xF919, 'M', '酪'), + (0xF91A, 'M', '駱'), + (0xF91B, 'M', '亂'), + (0xF91C, 'M', '卵'), + (0xF91D, 'M', '欄'), + (0xF91E, 'M', '爛'), + (0xF91F, 'M', '蘭'), + (0xF920, 'M', '鸞'), + (0xF921, 'M', '嵐'), + (0xF922, 'M', '濫'), + (0xF923, 'M', '藍'), + (0xF924, 'M', '襤'), + (0xF925, 'M', '拉'), + (0xF926, 'M', '臘'), + (0xF927, 'M', '蠟'), + (0xF928, 'M', '廊'), + (0xF929, 'M', '朗'), + (0xF92A, 'M', '浪'), + (0xF92B, 'M', '狼'), + (0xF92C, 'M', '郎'), + (0xF92D, 'M', '來'), + (0xF92E, 'M', '冷'), + (0xF92F, 'M', '勞'), + (0xF930, 'M', '擄'), + (0xF931, 'M', '櫓'), + (0xF932, 'M', '爐'), + (0xF933, 'M', '盧'), + (0xF934, 'M', '老'), + (0xF935, 'M', '蘆'), + (0xF936, 'M', '虜'), + (0xF937, 'M', '路'), + (0xF938, 'M', '露'), + (0xF939, 'M', '魯'), + (0xF93A, 'M', '鷺'), + (0xF93B, 'M', '碌'), + (0xF93C, 'M', '祿'), + (0xF93D, 'M', '綠'), + (0xF93E, 'M', '菉'), + (0xF93F, 'M', '錄'), + (0xF940, 'M', '鹿'), + (0xF941, 'M', '論'), + (0xF942, 'M', '壟'), + (0xF943, 'M', '弄'), + (0xF944, 'M', '籠'), + (0xF945, 'M', '聾'), ] -def _seg_40(): +def _seg_40() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xF94F, 'M', u'累'), - (0xF950, 'M', u'縷'), - (0xF951, 'M', u'陋'), - (0xF952, 'M', u'勒'), - (0xF953, 'M', u'肋'), - (0xF954, 'M', u'凜'), - (0xF955, 'M', u'凌'), - (0xF956, 'M', u'稜'), - (0xF957, 'M', u'綾'), - (0xF958, 'M', u'菱'), - (0xF959, 'M', u'陵'), - (0xF95A, 'M', u'讀'), - (0xF95B, 'M', u'拏'), - (0xF95C, 'M', u'樂'), - (0xF95D, 'M', u'諾'), - (0xF95E, 'M', u'丹'), - (0xF95F, 'M', u'寧'), - (0xF960, 'M', u'怒'), - (0xF961, 'M', u'率'), - (0xF962, 'M', u'異'), - (0xF963, 'M', u'北'), - (0xF964, 'M', u'磻'), - (0xF965, 'M', u'便'), - (0xF966, 'M', u'復'), - (0xF967, 'M', u'不'), - (0xF968, 'M', u'泌'), - (0xF969, 'M', u'數'), - (0xF96A, 'M', u'索'), - (0xF96B, 'M', u'參'), - (0xF96C, 'M', u'塞'), - (0xF96D, 'M', u'省'), - (0xF96E, 'M', u'葉'), - (0xF96F, 'M', u'說'), - (0xF970, 'M', u'殺'), - (0xF971, 'M', u'辰'), - (0xF972, 'M', u'沈'), - (0xF973, 'M', u'拾'), - (0xF974, 'M', u'若'), - (0xF975, 'M', u'掠'), - (0xF976, 'M', u'略'), - (0xF977, 'M', u'亮'), - (0xF978, 'M', u'兩'), - (0xF979, 'M', u'凉'), - (0xF97A, 'M', u'梁'), - (0xF97B, 'M', u'糧'), - (0xF97C, 'M', u'良'), - (0xF97D, 'M', u'諒'), - (0xF97E, 'M', u'量'), - (0xF97F, 'M', u'勵'), - (0xF980, 'M', u'呂'), - (0xF981, 'M', u'女'), - (0xF982, 'M', u'廬'), - (0xF983, 'M', u'旅'), - (0xF984, 'M', u'濾'), - (0xF985, 'M', u'礪'), - (0xF986, 'M', u'閭'), - (0xF987, 'M', u'驪'), - (0xF988, 'M', u'麗'), - (0xF989, 'M', u'黎'), - (0xF98A, 'M', u'力'), - (0xF98B, 'M', u'曆'), - (0xF98C, 'M', u'歷'), - (0xF98D, 'M', u'轢'), - (0xF98E, 'M', u'年'), - (0xF98F, 'M', u'憐'), - (0xF990, 'M', u'戀'), - (0xF991, 'M', u'撚'), - (0xF992, 'M', u'漣'), - (0xF993, 'M', u'煉'), - (0xF994, 'M', u'璉'), - (0xF995, 'M', u'秊'), - (0xF996, 'M', u'練'), - (0xF997, 'M', u'聯'), - (0xF998, 'M', u'輦'), - (0xF999, 'M', u'蓮'), - (0xF99A, 'M', u'連'), - (0xF99B, 'M', u'鍊'), - (0xF99C, 'M', u'列'), - (0xF99D, 'M', u'劣'), - (0xF99E, 'M', u'咽'), - (0xF99F, 'M', u'烈'), - (0xF9A0, 'M', u'裂'), - (0xF9A1, 'M', u'說'), - (0xF9A2, 'M', u'廉'), - (0xF9A3, 'M', u'念'), - (0xF9A4, 'M', u'捻'), - (0xF9A5, 'M', u'殮'), - (0xF9A6, 'M', u'簾'), - (0xF9A7, 'M', u'獵'), - (0xF9A8, 'M', u'令'), - (0xF9A9, 'M', u'囹'), - (0xF9AA, 'M', u'寧'), - (0xF9AB, 'M', u'嶺'), - (0xF9AC, 'M', u'怜'), - (0xF9AD, 'M', u'玲'), - (0xF9AE, 'M', u'瑩'), - (0xF9AF, 'M', u'羚'), - (0xF9B0, 'M', u'聆'), - (0xF9B1, 'M', u'鈴'), - (0xF9B2, 'M', u'零'), + (0xF946, 'M', '牢'), + (0xF947, 'M', '磊'), + (0xF948, 'M', '賂'), + (0xF949, 'M', '雷'), + (0xF94A, 'M', '壘'), + (0xF94B, 'M', '屢'), + (0xF94C, 'M', '樓'), + (0xF94D, 'M', '淚'), + (0xF94E, 'M', '漏'), + (0xF94F, 'M', '累'), + (0xF950, 'M', '縷'), + (0xF951, 'M', '陋'), + (0xF952, 'M', '勒'), + (0xF953, 'M', '肋'), + (0xF954, 'M', '凜'), + (0xF955, 'M', '凌'), + (0xF956, 'M', '稜'), + (0xF957, 'M', '綾'), + (0xF958, 'M', '菱'), + (0xF959, 'M', '陵'), + (0xF95A, 'M', '讀'), + (0xF95B, 'M', '拏'), + (0xF95C, 'M', '樂'), + (0xF95D, 'M', '諾'), + (0xF95E, 'M', '丹'), + (0xF95F, 'M', '寧'), + (0xF960, 'M', '怒'), + (0xF961, 'M', '率'), + (0xF962, 'M', '異'), + (0xF963, 'M', '北'), + (0xF964, 'M', '磻'), + (0xF965, 'M', '便'), + (0xF966, 'M', '復'), + (0xF967, 'M', '不'), + (0xF968, 'M', '泌'), + (0xF969, 'M', '數'), + (0xF96A, 'M', '索'), + (0xF96B, 'M', '參'), + (0xF96C, 'M', '塞'), + (0xF96D, 'M', '省'), + (0xF96E, 'M', '葉'), + (0xF96F, 'M', '說'), + (0xF970, 'M', '殺'), + (0xF971, 'M', '辰'), + (0xF972, 'M', '沈'), + (0xF973, 'M', '拾'), + (0xF974, 'M', '若'), + (0xF975, 'M', '掠'), + (0xF976, 'M', '略'), + (0xF977, 'M', '亮'), + (0xF978, 'M', '兩'), + (0xF979, 'M', '凉'), + (0xF97A, 'M', '梁'), + (0xF97B, 'M', '糧'), + (0xF97C, 'M', '良'), + (0xF97D, 'M', '諒'), + (0xF97E, 'M', '量'), + (0xF97F, 'M', '勵'), + (0xF980, 'M', '呂'), + (0xF981, 'M', '女'), + (0xF982, 'M', '廬'), + (0xF983, 'M', '旅'), + (0xF984, 'M', '濾'), + (0xF985, 'M', '礪'), + (0xF986, 'M', '閭'), + (0xF987, 'M', '驪'), + (0xF988, 'M', '麗'), + (0xF989, 'M', '黎'), + (0xF98A, 'M', '力'), + (0xF98B, 'M', '曆'), + (0xF98C, 'M', '歷'), + (0xF98D, 'M', '轢'), + (0xF98E, 'M', '年'), + (0xF98F, 'M', '憐'), + (0xF990, 'M', '戀'), + (0xF991, 'M', '撚'), + (0xF992, 'M', '漣'), + (0xF993, 'M', '煉'), + (0xF994, 'M', '璉'), + (0xF995, 'M', '秊'), + (0xF996, 'M', '練'), + (0xF997, 'M', '聯'), + (0xF998, 'M', '輦'), + (0xF999, 'M', '蓮'), + (0xF99A, 'M', '連'), + (0xF99B, 'M', '鍊'), + (0xF99C, 'M', '列'), + (0xF99D, 'M', '劣'), + (0xF99E, 'M', '咽'), + (0xF99F, 'M', '烈'), + (0xF9A0, 'M', '裂'), + (0xF9A1, 'M', '說'), + (0xF9A2, 'M', '廉'), + (0xF9A3, 'M', '念'), + (0xF9A4, 'M', '捻'), + (0xF9A5, 'M', '殮'), + (0xF9A6, 'M', '簾'), + (0xF9A7, 'M', '獵'), + (0xF9A8, 'M', '令'), + (0xF9A9, 'M', '囹'), ] -def _seg_41(): +def _seg_41() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xF9AA, 'M', '寧'), + (0xF9AB, 'M', '嶺'), + (0xF9AC, 'M', '怜'), + (0xF9AD, 'M', '玲'), + (0xF9AE, 'M', '瑩'), + (0xF9AF, 'M', '羚'), + (0xF9B0, 'M', '聆'), + (0xF9B1, 'M', '鈴'), + (0xF9B2, 'M', '零'), + (0xF9B3, 'M', '靈'), + (0xF9B4, 'M', '領'), + (0xF9B5, 'M', '例'), + (0xF9B6, 'M', '禮'), + (0xF9B7, 'M', '醴'), + (0xF9B8, 'M', '隸'), + (0xF9B9, 'M', '惡'), + (0xF9BA, 'M', '了'), + (0xF9BB, 'M', '僚'), + (0xF9BC, 'M', '寮'), + (0xF9BD, 'M', '尿'), + (0xF9BE, 'M', '料'), + (0xF9BF, 'M', '樂'), + (0xF9C0, 'M', '燎'), + (0xF9C1, 'M', '療'), + (0xF9C2, 'M', '蓼'), + (0xF9C3, 'M', '遼'), + (0xF9C4, 'M', '龍'), + (0xF9C5, 'M', '暈'), + (0xF9C6, 'M', '阮'), + (0xF9C7, 'M', '劉'), + (0xF9C8, 'M', '杻'), + (0xF9C9, 'M', '柳'), + (0xF9CA, 'M', '流'), + (0xF9CB, 'M', '溜'), + (0xF9CC, 'M', '琉'), + (0xF9CD, 'M', '留'), + (0xF9CE, 'M', '硫'), + (0xF9CF, 'M', '紐'), + (0xF9D0, 'M', '類'), + (0xF9D1, 'M', '六'), + (0xF9D2, 'M', '戮'), + (0xF9D3, 'M', '陸'), + (0xF9D4, 'M', '倫'), + (0xF9D5, 'M', '崙'), + (0xF9D6, 'M', '淪'), + (0xF9D7, 'M', '輪'), + (0xF9D8, 'M', '律'), + (0xF9D9, 'M', '慄'), + (0xF9DA, 'M', '栗'), + (0xF9DB, 'M', '率'), + (0xF9DC, 'M', '隆'), + (0xF9DD, 'M', '利'), + (0xF9DE, 'M', '吏'), + (0xF9DF, 'M', '履'), + (0xF9E0, 'M', '易'), + (0xF9E1, 'M', '李'), + (0xF9E2, 'M', '梨'), + (0xF9E3, 'M', '泥'), + (0xF9E4, 'M', '理'), + (0xF9E5, 'M', '痢'), + (0xF9E6, 'M', '罹'), + (0xF9E7, 'M', '裏'), + (0xF9E8, 'M', '裡'), + (0xF9E9, 'M', '里'), + (0xF9EA, 'M', '離'), + (0xF9EB, 'M', '匿'), + (0xF9EC, 'M', '溺'), + (0xF9ED, 'M', '吝'), + (0xF9EE, 'M', '燐'), + (0xF9EF, 'M', '璘'), + (0xF9F0, 'M', '藺'), + (0xF9F1, 'M', '隣'), + (0xF9F2, 'M', '鱗'), + (0xF9F3, 'M', '麟'), + (0xF9F4, 'M', '林'), + (0xF9F5, 'M', '淋'), + (0xF9F6, 'M', '臨'), + (0xF9F7, 'M', '立'), + (0xF9F8, 'M', '笠'), + (0xF9F9, 'M', '粒'), + (0xF9FA, 'M', '狀'), + (0xF9FB, 'M', '炙'), + (0xF9FC, 'M', '識'), + (0xF9FD, 'M', '什'), + (0xF9FE, 'M', '茶'), + (0xF9FF, 'M', '刺'), + (0xFA00, 'M', '切'), + (0xFA01, 'M', '度'), + (0xFA02, 'M', '拓'), + (0xFA03, 'M', '糖'), + (0xFA04, 'M', '宅'), + (0xFA05, 'M', '洞'), + (0xFA06, 'M', '暴'), + (0xFA07, 'M', '輻'), + (0xFA08, 'M', '行'), + (0xFA09, 'M', '降'), + (0xFA0A, 'M', '見'), + (0xFA0B, 'M', '廓'), + (0xFA0C, 'M', '兀'), + (0xFA0D, 'M', '嗀'), + ] + +def _seg_42() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xF9B3, 'M', u'靈'), - (0xF9B4, 'M', u'領'), - (0xF9B5, 'M', u'例'), - (0xF9B6, 'M', u'禮'), - (0xF9B7, 'M', u'醴'), - (0xF9B8, 'M', u'隸'), - (0xF9B9, 'M', u'惡'), - (0xF9BA, 'M', u'了'), - (0xF9BB, 'M', u'僚'), - (0xF9BC, 'M', u'寮'), - (0xF9BD, 'M', u'尿'), - (0xF9BE, 'M', u'料'), - (0xF9BF, 'M', u'樂'), - (0xF9C0, 'M', u'燎'), - (0xF9C1, 'M', u'療'), - (0xF9C2, 'M', u'蓼'), - (0xF9C3, 'M', u'遼'), - (0xF9C4, 'M', u'龍'), - (0xF9C5, 'M', u'暈'), - (0xF9C6, 'M', u'阮'), - (0xF9C7, 'M', u'劉'), - (0xF9C8, 'M', u'杻'), - (0xF9C9, 'M', u'柳'), - (0xF9CA, 'M', u'流'), - (0xF9CB, 'M', u'溜'), - (0xF9CC, 'M', u'琉'), - (0xF9CD, 'M', u'留'), - (0xF9CE, 'M', u'硫'), - (0xF9CF, 'M', u'紐'), - (0xF9D0, 'M', u'類'), - (0xF9D1, 'M', u'六'), - (0xF9D2, 'M', u'戮'), - (0xF9D3, 'M', u'陸'), - (0xF9D4, 'M', u'倫'), - (0xF9D5, 'M', u'崙'), - (0xF9D6, 'M', u'淪'), - (0xF9D7, 'M', u'輪'), - (0xF9D8, 'M', u'律'), - (0xF9D9, 'M', u'慄'), - (0xF9DA, 'M', u'栗'), - (0xF9DB, 'M', u'率'), - (0xF9DC, 'M', u'隆'), - (0xF9DD, 'M', u'利'), - (0xF9DE, 'M', u'吏'), - (0xF9DF, 'M', u'履'), - (0xF9E0, 'M', u'易'), - (0xF9E1, 'M', u'李'), - (0xF9E2, 'M', u'梨'), - (0xF9E3, 'M', u'泥'), - (0xF9E4, 'M', u'理'), - (0xF9E5, 'M', u'痢'), - (0xF9E6, 'M', u'罹'), - (0xF9E7, 'M', u'裏'), - (0xF9E8, 'M', u'裡'), - (0xF9E9, 'M', u'里'), - (0xF9EA, 'M', u'離'), - (0xF9EB, 'M', u'匿'), - (0xF9EC, 'M', u'溺'), - (0xF9ED, 'M', u'吝'), - (0xF9EE, 'M', u'燐'), - (0xF9EF, 'M', u'璘'), - (0xF9F0, 'M', u'藺'), - (0xF9F1, 'M', u'隣'), - (0xF9F2, 'M', u'鱗'), - (0xF9F3, 'M', u'麟'), - (0xF9F4, 'M', u'林'), - (0xF9F5, 'M', u'淋'), - (0xF9F6, 'M', u'臨'), - (0xF9F7, 'M', u'立'), - (0xF9F8, 'M', u'笠'), - (0xF9F9, 'M', u'粒'), - (0xF9FA, 'M', u'狀'), - (0xF9FB, 'M', u'炙'), - (0xF9FC, 'M', u'識'), - (0xF9FD, 'M', u'什'), - (0xF9FE, 'M', u'茶'), - (0xF9FF, 'M', u'刺'), - (0xFA00, 'M', u'切'), - (0xFA01, 'M', u'度'), - (0xFA02, 'M', u'拓'), - (0xFA03, 'M', u'糖'), - (0xFA04, 'M', u'宅'), - (0xFA05, 'M', u'洞'), - (0xFA06, 'M', u'暴'), - (0xFA07, 'M', u'輻'), - (0xFA08, 'M', u'行'), - (0xFA09, 'M', u'降'), - (0xFA0A, 'M', u'見'), - (0xFA0B, 'M', u'廓'), - (0xFA0C, 'M', u'兀'), - (0xFA0D, 'M', u'嗀'), (0xFA0E, 'V'), - (0xFA10, 'M', u'塚'), + (0xFA10, 'M', '塚'), (0xFA11, 'V'), - (0xFA12, 'M', u'晴'), + (0xFA12, 'M', '晴'), (0xFA13, 'V'), - (0xFA15, 'M', u'凞'), - (0xFA16, 'M', u'猪'), - (0xFA17, 'M', u'益'), - (0xFA18, 'M', u'礼'), - ] - -def _seg_42(): - return [ - (0xFA19, 'M', u'神'), - (0xFA1A, 'M', u'祥'), - (0xFA1B, 'M', u'福'), - (0xFA1C, 'M', u'靖'), - (0xFA1D, 'M', u'精'), - (0xFA1E, 'M', u'羽'), + (0xFA15, 'M', '凞'), + (0xFA16, 'M', '猪'), + (0xFA17, 'M', '益'), + (0xFA18, 'M', '礼'), + (0xFA19, 'M', '神'), + (0xFA1A, 'M', '祥'), + (0xFA1B, 'M', '福'), + (0xFA1C, 'M', '靖'), + (0xFA1D, 'M', '精'), + (0xFA1E, 'M', '羽'), (0xFA1F, 'V'), - (0xFA20, 'M', u'蘒'), + (0xFA20, 'M', '蘒'), (0xFA21, 'V'), - (0xFA22, 'M', u'諸'), + (0xFA22, 'M', '諸'), (0xFA23, 'V'), - (0xFA25, 'M', u'逸'), - (0xFA26, 'M', u'都'), + (0xFA25, 'M', '逸'), + (0xFA26, 'M', '都'), (0xFA27, 'V'), - (0xFA2A, 'M', u'飯'), - (0xFA2B, 'M', u'飼'), - (0xFA2C, 'M', u'館'), - (0xFA2D, 'M', u'鶴'), - (0xFA2E, 'M', u'郞'), - (0xFA2F, 'M', u'隷'), - (0xFA30, 'M', u'侮'), - (0xFA31, 'M', u'僧'), - (0xFA32, 'M', u'免'), - (0xFA33, 'M', u'勉'), - (0xFA34, 'M', u'勤'), - (0xFA35, 'M', u'卑'), - (0xFA36, 'M', u'喝'), - (0xFA37, 'M', u'嘆'), - (0xFA38, 'M', u'器'), - (0xFA39, 'M', u'塀'), - (0xFA3A, 'M', u'墨'), - (0xFA3B, 'M', u'層'), - (0xFA3C, 'M', u'屮'), - (0xFA3D, 'M', u'悔'), - (0xFA3E, 'M', u'慨'), - (0xFA3F, 'M', u'憎'), - (0xFA40, 'M', u'懲'), - (0xFA41, 'M', u'敏'), - (0xFA42, 'M', u'既'), - (0xFA43, 'M', u'暑'), - (0xFA44, 'M', u'梅'), - (0xFA45, 'M', u'海'), - (0xFA46, 'M', u'渚'), - (0xFA47, 'M', u'漢'), - (0xFA48, 'M', u'煮'), - (0xFA49, 'M', u'爫'), - (0xFA4A, 'M', u'琢'), - (0xFA4B, 'M', u'碑'), - (0xFA4C, 'M', u'社'), - (0xFA4D, 'M', u'祉'), - (0xFA4E, 'M', u'祈'), - (0xFA4F, 'M', u'祐'), - (0xFA50, 'M', u'祖'), - (0xFA51, 'M', u'祝'), - (0xFA52, 'M', u'禍'), - (0xFA53, 'M', u'禎'), - (0xFA54, 'M', u'穀'), - (0xFA55, 'M', u'突'), - (0xFA56, 'M', u'節'), - (0xFA57, 'M', u'練'), - (0xFA58, 'M', u'縉'), - (0xFA59, 'M', u'繁'), - (0xFA5A, 'M', u'署'), - (0xFA5B, 'M', u'者'), - (0xFA5C, 'M', u'臭'), - (0xFA5D, 'M', u'艹'), - (0xFA5F, 'M', u'著'), - (0xFA60, 'M', u'褐'), - (0xFA61, 'M', u'視'), - (0xFA62, 'M', u'謁'), - (0xFA63, 'M', u'謹'), - (0xFA64, 'M', u'賓'), - (0xFA65, 'M', u'贈'), - (0xFA66, 'M', u'辶'), - (0xFA67, 'M', u'逸'), - (0xFA68, 'M', u'難'), - (0xFA69, 'M', u'響'), - (0xFA6A, 'M', u'頻'), - (0xFA6B, 'M', u'恵'), - (0xFA6C, 'M', u'𤋮'), - (0xFA6D, 'M', u'舘'), + (0xFA2A, 'M', '飯'), + (0xFA2B, 'M', '飼'), + (0xFA2C, 'M', '館'), + (0xFA2D, 'M', '鶴'), + (0xFA2E, 'M', '郞'), + (0xFA2F, 'M', '隷'), + (0xFA30, 'M', '侮'), + (0xFA31, 'M', '僧'), + (0xFA32, 'M', '免'), + (0xFA33, 'M', '勉'), + (0xFA34, 'M', '勤'), + (0xFA35, 'M', '卑'), + (0xFA36, 'M', '喝'), + (0xFA37, 'M', '嘆'), + (0xFA38, 'M', '器'), + (0xFA39, 'M', '塀'), + (0xFA3A, 'M', '墨'), + (0xFA3B, 'M', '層'), + (0xFA3C, 'M', '屮'), + (0xFA3D, 'M', '悔'), + (0xFA3E, 'M', '慨'), + (0xFA3F, 'M', '憎'), + (0xFA40, 'M', '懲'), + (0xFA41, 'M', '敏'), + (0xFA42, 'M', '既'), + (0xFA43, 'M', '暑'), + (0xFA44, 'M', '梅'), + (0xFA45, 'M', '海'), + (0xFA46, 'M', '渚'), + (0xFA47, 'M', '漢'), + (0xFA48, 'M', '煮'), + (0xFA49, 'M', '爫'), + (0xFA4A, 'M', '琢'), + (0xFA4B, 'M', '碑'), + (0xFA4C, 'M', '社'), + (0xFA4D, 'M', '祉'), + (0xFA4E, 'M', '祈'), + (0xFA4F, 'M', '祐'), + (0xFA50, 'M', '祖'), + (0xFA51, 'M', '祝'), + (0xFA52, 'M', '禍'), + (0xFA53, 'M', '禎'), + (0xFA54, 'M', '穀'), + (0xFA55, 'M', '突'), + (0xFA56, 'M', '節'), + (0xFA57, 'M', '練'), + (0xFA58, 'M', '縉'), + (0xFA59, 'M', '繁'), + (0xFA5A, 'M', '署'), + (0xFA5B, 'M', '者'), + (0xFA5C, 'M', '臭'), + (0xFA5D, 'M', '艹'), + (0xFA5F, 'M', '著'), + (0xFA60, 'M', '褐'), + (0xFA61, 'M', '視'), + (0xFA62, 'M', '謁'), + (0xFA63, 'M', '謹'), + (0xFA64, 'M', '賓'), + (0xFA65, 'M', '贈'), + (0xFA66, 'M', '辶'), + (0xFA67, 'M', '逸'), + (0xFA68, 'M', '難'), + (0xFA69, 'M', '響'), + (0xFA6A, 'M', '頻'), + (0xFA6B, 'M', '恵'), + (0xFA6C, 'M', '𤋮'), + (0xFA6D, 'M', '舘'), (0xFA6E, 'X'), - (0xFA70, 'M', u'並'), - (0xFA71, 'M', u'况'), - (0xFA72, 'M', u'全'), - (0xFA73, 'M', u'侀'), - (0xFA74, 'M', u'充'), - (0xFA75, 'M', u'冀'), - (0xFA76, 'M', u'勇'), - (0xFA77, 'M', u'勺'), - (0xFA78, 'M', u'喝'), - (0xFA79, 'M', u'啕'), - (0xFA7A, 'M', u'喙'), - (0xFA7B, 'M', u'嗢'), - (0xFA7C, 'M', u'塚'), - (0xFA7D, 'M', u'墳'), - (0xFA7E, 'M', u'奄'), - (0xFA7F, 'M', u'奔'), - (0xFA80, 'M', u'婢'), - (0xFA81, 'M', u'嬨'), + (0xFA70, 'M', '並'), + (0xFA71, 'M', '况'), + (0xFA72, 'M', '全'), + (0xFA73, 'M', '侀'), + (0xFA74, 'M', '充'), + (0xFA75, 'M', '冀'), + (0xFA76, 'M', '勇'), + (0xFA77, 'M', '勺'), + (0xFA78, 'M', '喝'), ] -def _seg_43(): +def _seg_43() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFA82, 'M', u'廒'), - (0xFA83, 'M', u'廙'), - (0xFA84, 'M', u'彩'), - (0xFA85, 'M', u'徭'), - (0xFA86, 'M', u'惘'), - (0xFA87, 'M', u'慎'), - (0xFA88, 'M', u'愈'), - (0xFA89, 'M', u'憎'), - (0xFA8A, 'M', u'慠'), - (0xFA8B, 'M', u'懲'), - (0xFA8C, 'M', u'戴'), - (0xFA8D, 'M', u'揄'), - (0xFA8E, 'M', u'搜'), - (0xFA8F, 'M', u'摒'), - (0xFA90, 'M', u'敖'), - (0xFA91, 'M', u'晴'), - (0xFA92, 'M', u'朗'), - (0xFA93, 'M', u'望'), - (0xFA94, 'M', u'杖'), - (0xFA95, 'M', u'歹'), - (0xFA96, 'M', u'殺'), - (0xFA97, 'M', u'流'), - (0xFA98, 'M', u'滛'), - (0xFA99, 'M', u'滋'), - (0xFA9A, 'M', u'漢'), - (0xFA9B, 'M', u'瀞'), - (0xFA9C, 'M', u'煮'), - (0xFA9D, 'M', u'瞧'), - (0xFA9E, 'M', u'爵'), - (0xFA9F, 'M', u'犯'), - (0xFAA0, 'M', u'猪'), - (0xFAA1, 'M', u'瑱'), - (0xFAA2, 'M', u'甆'), - (0xFAA3, 'M', u'画'), - (0xFAA4, 'M', u'瘝'), - (0xFAA5, 'M', u'瘟'), - (0xFAA6, 'M', u'益'), - (0xFAA7, 'M', u'盛'), - (0xFAA8, 'M', u'直'), - (0xFAA9, 'M', u'睊'), - (0xFAAA, 'M', u'着'), - (0xFAAB, 'M', u'磌'), - (0xFAAC, 'M', u'窱'), - (0xFAAD, 'M', u'節'), - (0xFAAE, 'M', u'类'), - (0xFAAF, 'M', u'絛'), - (0xFAB0, 'M', u'練'), - (0xFAB1, 'M', u'缾'), - (0xFAB2, 'M', u'者'), - (0xFAB3, 'M', u'荒'), - (0xFAB4, 'M', u'華'), - (0xFAB5, 'M', u'蝹'), - (0xFAB6, 'M', u'襁'), - (0xFAB7, 'M', u'覆'), - (0xFAB8, 'M', u'視'), - (0xFAB9, 'M', u'調'), - (0xFABA, 'M', u'諸'), - (0xFABB, 'M', u'請'), - (0xFABC, 'M', u'謁'), - (0xFABD, 'M', u'諾'), - (0xFABE, 'M', u'諭'), - (0xFABF, 'M', u'謹'), - (0xFAC0, 'M', u'變'), - (0xFAC1, 'M', u'贈'), - (0xFAC2, 'M', u'輸'), - (0xFAC3, 'M', u'遲'), - (0xFAC4, 'M', u'醙'), - (0xFAC5, 'M', u'鉶'), - (0xFAC6, 'M', u'陼'), - (0xFAC7, 'M', u'難'), - (0xFAC8, 'M', u'靖'), - (0xFAC9, 'M', u'韛'), - (0xFACA, 'M', u'響'), - (0xFACB, 'M', u'頋'), - (0xFACC, 'M', u'頻'), - (0xFACD, 'M', u'鬒'), - (0xFACE, 'M', u'龜'), - (0xFACF, 'M', u'𢡊'), - (0xFAD0, 'M', u'𢡄'), - (0xFAD1, 'M', u'𣏕'), - (0xFAD2, 'M', u'㮝'), - (0xFAD3, 'M', u'䀘'), - (0xFAD4, 'M', u'䀹'), - (0xFAD5, 'M', u'𥉉'), - (0xFAD6, 'M', u'𥳐'), - (0xFAD7, 'M', u'𧻓'), - (0xFAD8, 'M', u'齃'), - (0xFAD9, 'M', u'龎'), + (0xFA79, 'M', '啕'), + (0xFA7A, 'M', '喙'), + (0xFA7B, 'M', '嗢'), + (0xFA7C, 'M', '塚'), + (0xFA7D, 'M', '墳'), + (0xFA7E, 'M', '奄'), + (0xFA7F, 'M', '奔'), + (0xFA80, 'M', '婢'), + (0xFA81, 'M', '嬨'), + (0xFA82, 'M', '廒'), + (0xFA83, 'M', '廙'), + (0xFA84, 'M', '彩'), + (0xFA85, 'M', '徭'), + (0xFA86, 'M', '惘'), + (0xFA87, 'M', '慎'), + (0xFA88, 'M', '愈'), + (0xFA89, 'M', '憎'), + (0xFA8A, 'M', '慠'), + (0xFA8B, 'M', '懲'), + (0xFA8C, 'M', '戴'), + (0xFA8D, 'M', '揄'), + (0xFA8E, 'M', '搜'), + (0xFA8F, 'M', '摒'), + (0xFA90, 'M', '敖'), + (0xFA91, 'M', '晴'), + (0xFA92, 'M', '朗'), + (0xFA93, 'M', '望'), + (0xFA94, 'M', '杖'), + (0xFA95, 'M', '歹'), + (0xFA96, 'M', '殺'), + (0xFA97, 'M', '流'), + (0xFA98, 'M', '滛'), + (0xFA99, 'M', '滋'), + (0xFA9A, 'M', '漢'), + (0xFA9B, 'M', '瀞'), + (0xFA9C, 'M', '煮'), + (0xFA9D, 'M', '瞧'), + (0xFA9E, 'M', '爵'), + (0xFA9F, 'M', '犯'), + (0xFAA0, 'M', '猪'), + (0xFAA1, 'M', '瑱'), + (0xFAA2, 'M', '甆'), + (0xFAA3, 'M', '画'), + (0xFAA4, 'M', '瘝'), + (0xFAA5, 'M', '瘟'), + (0xFAA6, 'M', '益'), + (0xFAA7, 'M', '盛'), + (0xFAA8, 'M', '直'), + (0xFAA9, 'M', '睊'), + (0xFAAA, 'M', '着'), + (0xFAAB, 'M', '磌'), + (0xFAAC, 'M', '窱'), + (0xFAAD, 'M', '節'), + (0xFAAE, 'M', '类'), + (0xFAAF, 'M', '絛'), + (0xFAB0, 'M', '練'), + (0xFAB1, 'M', '缾'), + (0xFAB2, 'M', '者'), + (0xFAB3, 'M', '荒'), + (0xFAB4, 'M', '華'), + (0xFAB5, 'M', '蝹'), + (0xFAB6, 'M', '襁'), + (0xFAB7, 'M', '覆'), + (0xFAB8, 'M', '視'), + (0xFAB9, 'M', '調'), + (0xFABA, 'M', '諸'), + (0xFABB, 'M', '請'), + (0xFABC, 'M', '謁'), + (0xFABD, 'M', '諾'), + (0xFABE, 'M', '諭'), + (0xFABF, 'M', '謹'), + (0xFAC0, 'M', '變'), + (0xFAC1, 'M', '贈'), + (0xFAC2, 'M', '輸'), + (0xFAC3, 'M', '遲'), + (0xFAC4, 'M', '醙'), + (0xFAC5, 'M', '鉶'), + (0xFAC6, 'M', '陼'), + (0xFAC7, 'M', '難'), + (0xFAC8, 'M', '靖'), + (0xFAC9, 'M', '韛'), + (0xFACA, 'M', '響'), + (0xFACB, 'M', '頋'), + (0xFACC, 'M', '頻'), + (0xFACD, 'M', '鬒'), + (0xFACE, 'M', '龜'), + (0xFACF, 'M', '𢡊'), + (0xFAD0, 'M', '𢡄'), + (0xFAD1, 'M', '𣏕'), + (0xFAD2, 'M', '㮝'), + (0xFAD3, 'M', '䀘'), + (0xFAD4, 'M', '䀹'), + (0xFAD5, 'M', '𥉉'), + (0xFAD6, 'M', '𥳐'), + (0xFAD7, 'M', '𧻓'), + (0xFAD8, 'M', '齃'), + (0xFAD9, 'M', '龎'), (0xFADA, 'X'), - (0xFB00, 'M', u'ff'), - (0xFB01, 'M', u'fi'), - (0xFB02, 'M', u'fl'), - (0xFB03, 'M', u'ffi'), - (0xFB04, 'M', u'ffl'), - (0xFB05, 'M', u'st'), + (0xFB00, 'M', 'ff'), + (0xFB01, 'M', 'fi'), + ] + +def _seg_44() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFB02, 'M', 'fl'), + (0xFB03, 'M', 'ffi'), + (0xFB04, 'M', 'ffl'), + (0xFB05, 'M', 'st'), (0xFB07, 'X'), - (0xFB13, 'M', u'մն'), - (0xFB14, 'M', u'մե'), - (0xFB15, 'M', u'մի'), - (0xFB16, 'M', u'վն'), - ] - -def _seg_44(): - return [ - (0xFB17, 'M', u'մխ'), + (0xFB13, 'M', 'մն'), + (0xFB14, 'M', 'մե'), + (0xFB15, 'M', 'մի'), + (0xFB16, 'M', 'վն'), + (0xFB17, 'M', 'մխ'), (0xFB18, 'X'), - (0xFB1D, 'M', u'יִ'), + (0xFB1D, 'M', 'יִ'), (0xFB1E, 'V'), - (0xFB1F, 'M', u'ײַ'), - (0xFB20, 'M', u'ע'), - (0xFB21, 'M', u'א'), - (0xFB22, 'M', u'ד'), - (0xFB23, 'M', u'ה'), - (0xFB24, 'M', u'כ'), - (0xFB25, 'M', u'ל'), - (0xFB26, 'M', u'ם'), - (0xFB27, 'M', u'ר'), - (0xFB28, 'M', u'ת'), - (0xFB29, '3', u'+'), - (0xFB2A, 'M', u'שׁ'), - (0xFB2B, 'M', u'שׂ'), - (0xFB2C, 'M', u'שּׁ'), - (0xFB2D, 'M', u'שּׂ'), - (0xFB2E, 'M', u'אַ'), - (0xFB2F, 'M', u'אָ'), - (0xFB30, 'M', u'אּ'), - (0xFB31, 'M', u'בּ'), - (0xFB32, 'M', u'גּ'), - (0xFB33, 'M', u'דּ'), - (0xFB34, 'M', u'הּ'), - (0xFB35, 'M', u'וּ'), - (0xFB36, 'M', u'זּ'), + (0xFB1F, 'M', 'ײַ'), + (0xFB20, 'M', 'ע'), + (0xFB21, 'M', 'א'), + (0xFB22, 'M', 'ד'), + (0xFB23, 'M', 'ה'), + (0xFB24, 'M', 'כ'), + (0xFB25, 'M', 'ל'), + (0xFB26, 'M', 'ם'), + (0xFB27, 'M', 'ר'), + (0xFB28, 'M', 'ת'), + (0xFB29, '3', '+'), + (0xFB2A, 'M', 'שׁ'), + (0xFB2B, 'M', 'שׂ'), + (0xFB2C, 'M', 'שּׁ'), + (0xFB2D, 'M', 'שּׂ'), + (0xFB2E, 'M', 'אַ'), + (0xFB2F, 'M', 'אָ'), + (0xFB30, 'M', 'אּ'), + (0xFB31, 'M', 'בּ'), + (0xFB32, 'M', 'גּ'), + (0xFB33, 'M', 'דּ'), + (0xFB34, 'M', 'הּ'), + (0xFB35, 'M', 'וּ'), + (0xFB36, 'M', 'זּ'), (0xFB37, 'X'), - (0xFB38, 'M', u'טּ'), - (0xFB39, 'M', u'יּ'), - (0xFB3A, 'M', u'ךּ'), - (0xFB3B, 'M', u'כּ'), - (0xFB3C, 'M', u'לּ'), + (0xFB38, 'M', 'טּ'), + (0xFB39, 'M', 'יּ'), + (0xFB3A, 'M', 'ךּ'), + (0xFB3B, 'M', 'כּ'), + (0xFB3C, 'M', 'לּ'), (0xFB3D, 'X'), - (0xFB3E, 'M', u'מּ'), + (0xFB3E, 'M', 'מּ'), (0xFB3F, 'X'), - (0xFB40, 'M', u'נּ'), - (0xFB41, 'M', u'סּ'), + (0xFB40, 'M', 'נּ'), + (0xFB41, 'M', 'סּ'), (0xFB42, 'X'), - (0xFB43, 'M', u'ףּ'), - (0xFB44, 'M', u'פּ'), + (0xFB43, 'M', 'ףּ'), + (0xFB44, 'M', 'פּ'), (0xFB45, 'X'), - (0xFB46, 'M', u'צּ'), - (0xFB47, 'M', u'קּ'), - (0xFB48, 'M', u'רּ'), - (0xFB49, 'M', u'שּ'), - (0xFB4A, 'M', u'תּ'), - (0xFB4B, 'M', u'וֹ'), - (0xFB4C, 'M', u'בֿ'), - (0xFB4D, 'M', u'כֿ'), - (0xFB4E, 'M', u'פֿ'), - (0xFB4F, 'M', u'אל'), - (0xFB50, 'M', u'ٱ'), - (0xFB52, 'M', u'ٻ'), - (0xFB56, 'M', u'پ'), - (0xFB5A, 'M', u'ڀ'), - (0xFB5E, 'M', u'ٺ'), - (0xFB62, 'M', u'ٿ'), - (0xFB66, 'M', u'ٹ'), - (0xFB6A, 'M', u'ڤ'), - (0xFB6E, 'M', u'ڦ'), - (0xFB72, 'M', u'ڄ'), - (0xFB76, 'M', u'ڃ'), - (0xFB7A, 'M', u'چ'), - (0xFB7E, 'M', u'ڇ'), - (0xFB82, 'M', u'ڍ'), - (0xFB84, 'M', u'ڌ'), - (0xFB86, 'M', u'ڎ'), - (0xFB88, 'M', u'ڈ'), - (0xFB8A, 'M', u'ژ'), - (0xFB8C, 'M', u'ڑ'), - (0xFB8E, 'M', u'ک'), - (0xFB92, 'M', u'گ'), - (0xFB96, 'M', u'ڳ'), - (0xFB9A, 'M', u'ڱ'), - (0xFB9E, 'M', u'ں'), - (0xFBA0, 'M', u'ڻ'), - (0xFBA4, 'M', u'ۀ'), - (0xFBA6, 'M', u'ہ'), - (0xFBAA, 'M', u'ھ'), - (0xFBAE, 'M', u'ے'), - (0xFBB0, 'M', u'ۓ'), + (0xFB46, 'M', 'צּ'), + (0xFB47, 'M', 'קּ'), + (0xFB48, 'M', 'רּ'), + (0xFB49, 'M', 'שּ'), + (0xFB4A, 'M', 'תּ'), + (0xFB4B, 'M', 'וֹ'), + (0xFB4C, 'M', 'בֿ'), + (0xFB4D, 'M', 'כֿ'), + (0xFB4E, 'M', 'פֿ'), + (0xFB4F, 'M', 'אל'), + (0xFB50, 'M', 'ٱ'), + (0xFB52, 'M', 'ٻ'), + (0xFB56, 'M', 'پ'), + (0xFB5A, 'M', 'ڀ'), + (0xFB5E, 'M', 'ٺ'), + (0xFB62, 'M', 'ٿ'), + (0xFB66, 'M', 'ٹ'), + (0xFB6A, 'M', 'ڤ'), + (0xFB6E, 'M', 'ڦ'), + (0xFB72, 'M', 'ڄ'), + (0xFB76, 'M', 'ڃ'), + (0xFB7A, 'M', 'چ'), + (0xFB7E, 'M', 'ڇ'), + (0xFB82, 'M', 'ڍ'), + (0xFB84, 'M', 'ڌ'), + (0xFB86, 'M', 'ڎ'), + (0xFB88, 'M', 'ڈ'), + (0xFB8A, 'M', 'ژ'), + (0xFB8C, 'M', 'ڑ'), + (0xFB8E, 'M', 'ک'), + (0xFB92, 'M', 'گ'), + (0xFB96, 'M', 'ڳ'), + (0xFB9A, 'M', 'ڱ'), + (0xFB9E, 'M', 'ں'), + (0xFBA0, 'M', 'ڻ'), + (0xFBA4, 'M', 'ۀ'), + (0xFBA6, 'M', 'ہ'), + (0xFBAA, 'M', 'ھ'), + (0xFBAE, 'M', 'ے'), + (0xFBB0, 'M', 'ۓ'), (0xFBB2, 'V'), - (0xFBC2, 'X'), - (0xFBD3, 'M', u'ڭ'), - (0xFBD7, 'M', u'ۇ'), - (0xFBD9, 'M', u'ۆ'), - (0xFBDB, 'M', u'ۈ'), - (0xFBDD, 'M', u'ۇٴ'), - (0xFBDE, 'M', u'ۋ'), - (0xFBE0, 'M', u'ۅ'), - (0xFBE2, 'M', u'ۉ'), - (0xFBE4, 'M', u'ې'), - (0xFBE8, 'M', u'ى'), - (0xFBEA, 'M', u'ئا'), - (0xFBEC, 'M', u'ئە'), - (0xFBEE, 'M', u'ئو'), - (0xFBF0, 'M', u'ئۇ'), - (0xFBF2, 'M', u'ئۆ'), + (0xFBC3, 'X'), + (0xFBD3, 'M', 'ڭ'), + (0xFBD7, 'M', 'ۇ'), + (0xFBD9, 'M', 'ۆ'), + (0xFBDB, 'M', 'ۈ'), + (0xFBDD, 'M', 'ۇٴ'), + (0xFBDE, 'M', 'ۋ'), ] -def _seg_45(): +def _seg_45() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFBF4, 'M', u'ئۈ'), - (0xFBF6, 'M', u'ئې'), - (0xFBF9, 'M', u'ئى'), - (0xFBFC, 'M', u'ی'), - (0xFC00, 'M', u'ئج'), - (0xFC01, 'M', u'ئح'), - (0xFC02, 'M', u'ئم'), - (0xFC03, 'M', u'ئى'), - (0xFC04, 'M', u'ئي'), - (0xFC05, 'M', u'بج'), - (0xFC06, 'M', u'بح'), - (0xFC07, 'M', u'بخ'), - (0xFC08, 'M', u'بم'), - (0xFC09, 'M', u'بى'), - (0xFC0A, 'M', u'بي'), - (0xFC0B, 'M', u'تج'), - (0xFC0C, 'M', u'تح'), - (0xFC0D, 'M', u'تخ'), - (0xFC0E, 'M', u'تم'), - (0xFC0F, 'M', u'تى'), - (0xFC10, 'M', u'تي'), - (0xFC11, 'M', u'ثج'), - (0xFC12, 'M', u'ثم'), - (0xFC13, 'M', u'ثى'), - (0xFC14, 'M', u'ثي'), - (0xFC15, 'M', u'جح'), - (0xFC16, 'M', u'جم'), - (0xFC17, 'M', u'حج'), - (0xFC18, 'M', u'حم'), - (0xFC19, 'M', u'خج'), - (0xFC1A, 'M', u'خح'), - (0xFC1B, 'M', u'خم'), - (0xFC1C, 'M', u'سج'), - (0xFC1D, 'M', u'سح'), - (0xFC1E, 'M', u'سخ'), - (0xFC1F, 'M', u'سم'), - (0xFC20, 'M', u'صح'), - (0xFC21, 'M', u'صم'), - (0xFC22, 'M', u'ضج'), - (0xFC23, 'M', u'ضح'), - (0xFC24, 'M', u'ضخ'), - (0xFC25, 'M', u'ضم'), - (0xFC26, 'M', u'طح'), - (0xFC27, 'M', u'طم'), - (0xFC28, 'M', u'ظم'), - (0xFC29, 'M', u'عج'), - (0xFC2A, 'M', u'عم'), - (0xFC2B, 'M', u'غج'), - (0xFC2C, 'M', u'غم'), - (0xFC2D, 'M', u'فج'), - (0xFC2E, 'M', u'فح'), - (0xFC2F, 'M', u'فخ'), - (0xFC30, 'M', u'فم'), - (0xFC31, 'M', u'فى'), - (0xFC32, 'M', u'في'), - (0xFC33, 'M', u'قح'), - (0xFC34, 'M', u'قم'), - (0xFC35, 'M', u'قى'), - (0xFC36, 'M', u'قي'), - (0xFC37, 'M', u'كا'), - (0xFC38, 'M', u'كج'), - (0xFC39, 'M', u'كح'), - (0xFC3A, 'M', u'كخ'), - (0xFC3B, 'M', u'كل'), - (0xFC3C, 'M', u'كم'), - (0xFC3D, 'M', u'كى'), - (0xFC3E, 'M', u'كي'), - (0xFC3F, 'M', u'لج'), - (0xFC40, 'M', u'لح'), - (0xFC41, 'M', u'لخ'), - (0xFC42, 'M', u'لم'), - (0xFC43, 'M', u'لى'), - (0xFC44, 'M', u'لي'), - (0xFC45, 'M', u'مج'), - (0xFC46, 'M', u'مح'), - (0xFC47, 'M', u'مخ'), - (0xFC48, 'M', u'مم'), - (0xFC49, 'M', u'مى'), - (0xFC4A, 'M', u'مي'), - (0xFC4B, 'M', u'نج'), - (0xFC4C, 'M', u'نح'), - (0xFC4D, 'M', u'نخ'), - (0xFC4E, 'M', u'نم'), - (0xFC4F, 'M', u'نى'), - (0xFC50, 'M', u'ني'), - (0xFC51, 'M', u'هج'), - (0xFC52, 'M', u'هم'), - (0xFC53, 'M', u'هى'), - (0xFC54, 'M', u'هي'), - (0xFC55, 'M', u'يج'), - (0xFC56, 'M', u'يح'), - (0xFC57, 'M', u'يخ'), - (0xFC58, 'M', u'يم'), - (0xFC59, 'M', u'يى'), - (0xFC5A, 'M', u'يي'), - (0xFC5B, 'M', u'ذٰ'), - (0xFC5C, 'M', u'رٰ'), - (0xFC5D, 'M', u'ىٰ'), - (0xFC5E, '3', u' ٌّ'), - (0xFC5F, '3', u' ٍّ'), + (0xFBE0, 'M', 'ۅ'), + (0xFBE2, 'M', 'ۉ'), + (0xFBE4, 'M', 'ې'), + (0xFBE8, 'M', 'ى'), + (0xFBEA, 'M', 'ئا'), + (0xFBEC, 'M', 'ئە'), + (0xFBEE, 'M', 'ئو'), + (0xFBF0, 'M', 'ئۇ'), + (0xFBF2, 'M', 'ئۆ'), + (0xFBF4, 'M', 'ئۈ'), + (0xFBF6, 'M', 'ئې'), + (0xFBF9, 'M', 'ئى'), + (0xFBFC, 'M', 'ی'), + (0xFC00, 'M', 'ئج'), + (0xFC01, 'M', 'ئح'), + (0xFC02, 'M', 'ئم'), + (0xFC03, 'M', 'ئى'), + (0xFC04, 'M', 'ئي'), + (0xFC05, 'M', 'بج'), + (0xFC06, 'M', 'بح'), + (0xFC07, 'M', 'بخ'), + (0xFC08, 'M', 'بم'), + (0xFC09, 'M', 'بى'), + (0xFC0A, 'M', 'بي'), + (0xFC0B, 'M', 'تج'), + (0xFC0C, 'M', 'تح'), + (0xFC0D, 'M', 'تخ'), + (0xFC0E, 'M', 'تم'), + (0xFC0F, 'M', 'تى'), + (0xFC10, 'M', 'تي'), + (0xFC11, 'M', 'ثج'), + (0xFC12, 'M', 'ثم'), + (0xFC13, 'M', 'ثى'), + (0xFC14, 'M', 'ثي'), + (0xFC15, 'M', 'جح'), + (0xFC16, 'M', 'جم'), + (0xFC17, 'M', 'حج'), + (0xFC18, 'M', 'حم'), + (0xFC19, 'M', 'خج'), + (0xFC1A, 'M', 'خح'), + (0xFC1B, 'M', 'خم'), + (0xFC1C, 'M', 'سج'), + (0xFC1D, 'M', 'سح'), + (0xFC1E, 'M', 'سخ'), + (0xFC1F, 'M', 'سم'), + (0xFC20, 'M', 'صح'), + (0xFC21, 'M', 'صم'), + (0xFC22, 'M', 'ضج'), + (0xFC23, 'M', 'ضح'), + (0xFC24, 'M', 'ضخ'), + (0xFC25, 'M', 'ضم'), + (0xFC26, 'M', 'طح'), + (0xFC27, 'M', 'طم'), + (0xFC28, 'M', 'ظم'), + (0xFC29, 'M', 'عج'), + (0xFC2A, 'M', 'عم'), + (0xFC2B, 'M', 'غج'), + (0xFC2C, 'M', 'غم'), + (0xFC2D, 'M', 'فج'), + (0xFC2E, 'M', 'فح'), + (0xFC2F, 'M', 'فخ'), + (0xFC30, 'M', 'فم'), + (0xFC31, 'M', 'فى'), + (0xFC32, 'M', 'في'), + (0xFC33, 'M', 'قح'), + (0xFC34, 'M', 'قم'), + (0xFC35, 'M', 'قى'), + (0xFC36, 'M', 'قي'), + (0xFC37, 'M', 'كا'), + (0xFC38, 'M', 'كج'), + (0xFC39, 'M', 'كح'), + (0xFC3A, 'M', 'كخ'), + (0xFC3B, 'M', 'كل'), + (0xFC3C, 'M', 'كم'), + (0xFC3D, 'M', 'كى'), + (0xFC3E, 'M', 'كي'), + (0xFC3F, 'M', 'لج'), + (0xFC40, 'M', 'لح'), + (0xFC41, 'M', 'لخ'), + (0xFC42, 'M', 'لم'), + (0xFC43, 'M', 'لى'), + (0xFC44, 'M', 'لي'), + (0xFC45, 'M', 'مج'), + (0xFC46, 'M', 'مح'), + (0xFC47, 'M', 'مخ'), + (0xFC48, 'M', 'مم'), + (0xFC49, 'M', 'مى'), + (0xFC4A, 'M', 'مي'), + (0xFC4B, 'M', 'نج'), + (0xFC4C, 'M', 'نح'), + (0xFC4D, 'M', 'نخ'), + (0xFC4E, 'M', 'نم'), + (0xFC4F, 'M', 'نى'), + (0xFC50, 'M', 'ني'), + (0xFC51, 'M', 'هج'), + (0xFC52, 'M', 'هم'), + (0xFC53, 'M', 'هى'), + (0xFC54, 'M', 'هي'), + (0xFC55, 'M', 'يج'), + (0xFC56, 'M', 'يح'), ] -def _seg_46(): +def _seg_46() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFC60, '3', u' َّ'), - (0xFC61, '3', u' ُّ'), - (0xFC62, '3', u' ِّ'), - (0xFC63, '3', u' ّٰ'), - (0xFC64, 'M', u'ئر'), - (0xFC65, 'M', u'ئز'), - (0xFC66, 'M', u'ئم'), - (0xFC67, 'M', u'ئن'), - (0xFC68, 'M', u'ئى'), - (0xFC69, 'M', u'ئي'), - (0xFC6A, 'M', u'بر'), - (0xFC6B, 'M', u'بز'), - (0xFC6C, 'M', u'بم'), - (0xFC6D, 'M', u'بن'), - (0xFC6E, 'M', u'بى'), - (0xFC6F, 'M', u'بي'), - (0xFC70, 'M', u'تر'), - (0xFC71, 'M', u'تز'), - (0xFC72, 'M', u'تم'), - (0xFC73, 'M', u'تن'), - (0xFC74, 'M', u'تى'), - (0xFC75, 'M', u'تي'), - (0xFC76, 'M', u'ثر'), - (0xFC77, 'M', u'ثز'), - (0xFC78, 'M', u'ثم'), - (0xFC79, 'M', u'ثن'), - (0xFC7A, 'M', u'ثى'), - (0xFC7B, 'M', u'ثي'), - (0xFC7C, 'M', u'فى'), - (0xFC7D, 'M', u'في'), - (0xFC7E, 'M', u'قى'), - (0xFC7F, 'M', u'قي'), - (0xFC80, 'M', u'كا'), - (0xFC81, 'M', u'كل'), - (0xFC82, 'M', u'كم'), - (0xFC83, 'M', u'كى'), - (0xFC84, 'M', u'كي'), - (0xFC85, 'M', u'لم'), - (0xFC86, 'M', u'لى'), - (0xFC87, 'M', u'لي'), - (0xFC88, 'M', u'ما'), - (0xFC89, 'M', u'مم'), - (0xFC8A, 'M', u'نر'), - (0xFC8B, 'M', u'نز'), - (0xFC8C, 'M', u'نم'), - (0xFC8D, 'M', u'نن'), - (0xFC8E, 'M', u'نى'), - (0xFC8F, 'M', u'ني'), - (0xFC90, 'M', u'ىٰ'), - (0xFC91, 'M', u'ير'), - (0xFC92, 'M', u'يز'), - (0xFC93, 'M', u'يم'), - (0xFC94, 'M', u'ين'), - (0xFC95, 'M', u'يى'), - (0xFC96, 'M', u'يي'), - (0xFC97, 'M', u'ئج'), - (0xFC98, 'M', u'ئح'), - (0xFC99, 'M', u'ئخ'), - (0xFC9A, 'M', u'ئم'), - (0xFC9B, 'M', u'ئه'), - (0xFC9C, 'M', u'بج'), - (0xFC9D, 'M', u'بح'), - (0xFC9E, 'M', u'بخ'), - (0xFC9F, 'M', u'بم'), - (0xFCA0, 'M', u'به'), - (0xFCA1, 'M', u'تج'), - (0xFCA2, 'M', u'تح'), - (0xFCA3, 'M', u'تخ'), - (0xFCA4, 'M', u'تم'), - (0xFCA5, 'M', u'ته'), - (0xFCA6, 'M', u'ثم'), - (0xFCA7, 'M', u'جح'), - (0xFCA8, 'M', u'جم'), - (0xFCA9, 'M', u'حج'), - (0xFCAA, 'M', u'حم'), - (0xFCAB, 'M', u'خج'), - (0xFCAC, 'M', u'خم'), - (0xFCAD, 'M', u'سج'), - (0xFCAE, 'M', u'سح'), - (0xFCAF, 'M', u'سخ'), - (0xFCB0, 'M', u'سم'), - (0xFCB1, 'M', u'صح'), - (0xFCB2, 'M', u'صخ'), - (0xFCB3, 'M', u'صم'), - (0xFCB4, 'M', u'ضج'), - (0xFCB5, 'M', u'ضح'), - (0xFCB6, 'M', u'ضخ'), - (0xFCB7, 'M', u'ضم'), - (0xFCB8, 'M', u'طح'), - (0xFCB9, 'M', u'ظم'), - (0xFCBA, 'M', u'عج'), - (0xFCBB, 'M', u'عم'), - (0xFCBC, 'M', u'غج'), - (0xFCBD, 'M', u'غم'), - (0xFCBE, 'M', u'فج'), - (0xFCBF, 'M', u'فح'), - (0xFCC0, 'M', u'فخ'), - (0xFCC1, 'M', u'فم'), - (0xFCC2, 'M', u'قح'), - (0xFCC3, 'M', u'قم'), + (0xFC57, 'M', 'يخ'), + (0xFC58, 'M', 'يم'), + (0xFC59, 'M', 'يى'), + (0xFC5A, 'M', 'يي'), + (0xFC5B, 'M', 'ذٰ'), + (0xFC5C, 'M', 'رٰ'), + (0xFC5D, 'M', 'ىٰ'), + (0xFC5E, '3', ' ٌّ'), + (0xFC5F, '3', ' ٍّ'), + (0xFC60, '3', ' َّ'), + (0xFC61, '3', ' ُّ'), + (0xFC62, '3', ' ِّ'), + (0xFC63, '3', ' ّٰ'), + (0xFC64, 'M', 'ئر'), + (0xFC65, 'M', 'ئز'), + (0xFC66, 'M', 'ئم'), + (0xFC67, 'M', 'ئن'), + (0xFC68, 'M', 'ئى'), + (0xFC69, 'M', 'ئي'), + (0xFC6A, 'M', 'بر'), + (0xFC6B, 'M', 'بز'), + (0xFC6C, 'M', 'بم'), + (0xFC6D, 'M', 'بن'), + (0xFC6E, 'M', 'بى'), + (0xFC6F, 'M', 'بي'), + (0xFC70, 'M', 'تر'), + (0xFC71, 'M', 'تز'), + (0xFC72, 'M', 'تم'), + (0xFC73, 'M', 'تن'), + (0xFC74, 'M', 'تى'), + (0xFC75, 'M', 'تي'), + (0xFC76, 'M', 'ثر'), + (0xFC77, 'M', 'ثز'), + (0xFC78, 'M', 'ثم'), + (0xFC79, 'M', 'ثن'), + (0xFC7A, 'M', 'ثى'), + (0xFC7B, 'M', 'ثي'), + (0xFC7C, 'M', 'فى'), + (0xFC7D, 'M', 'في'), + (0xFC7E, 'M', 'قى'), + (0xFC7F, 'M', 'قي'), + (0xFC80, 'M', 'كا'), + (0xFC81, 'M', 'كل'), + (0xFC82, 'M', 'كم'), + (0xFC83, 'M', 'كى'), + (0xFC84, 'M', 'كي'), + (0xFC85, 'M', 'لم'), + (0xFC86, 'M', 'لى'), + (0xFC87, 'M', 'لي'), + (0xFC88, 'M', 'ما'), + (0xFC89, 'M', 'مم'), + (0xFC8A, 'M', 'نر'), + (0xFC8B, 'M', 'نز'), + (0xFC8C, 'M', 'نم'), + (0xFC8D, 'M', 'نن'), + (0xFC8E, 'M', 'نى'), + (0xFC8F, 'M', 'ني'), + (0xFC90, 'M', 'ىٰ'), + (0xFC91, 'M', 'ير'), + (0xFC92, 'M', 'يز'), + (0xFC93, 'M', 'يم'), + (0xFC94, 'M', 'ين'), + (0xFC95, 'M', 'يى'), + (0xFC96, 'M', 'يي'), + (0xFC97, 'M', 'ئج'), + (0xFC98, 'M', 'ئح'), + (0xFC99, 'M', 'ئخ'), + (0xFC9A, 'M', 'ئم'), + (0xFC9B, 'M', 'ئه'), + (0xFC9C, 'M', 'بج'), + (0xFC9D, 'M', 'بح'), + (0xFC9E, 'M', 'بخ'), + (0xFC9F, 'M', 'بم'), + (0xFCA0, 'M', 'به'), + (0xFCA1, 'M', 'تج'), + (0xFCA2, 'M', 'تح'), + (0xFCA3, 'M', 'تخ'), + (0xFCA4, 'M', 'تم'), + (0xFCA5, 'M', 'ته'), + (0xFCA6, 'M', 'ثم'), + (0xFCA7, 'M', 'جح'), + (0xFCA8, 'M', 'جم'), + (0xFCA9, 'M', 'حج'), + (0xFCAA, 'M', 'حم'), + (0xFCAB, 'M', 'خج'), + (0xFCAC, 'M', 'خم'), + (0xFCAD, 'M', 'سج'), + (0xFCAE, 'M', 'سح'), + (0xFCAF, 'M', 'سخ'), + (0xFCB0, 'M', 'سم'), + (0xFCB1, 'M', 'صح'), + (0xFCB2, 'M', 'صخ'), + (0xFCB3, 'M', 'صم'), + (0xFCB4, 'M', 'ضج'), + (0xFCB5, 'M', 'ضح'), + (0xFCB6, 'M', 'ضخ'), + (0xFCB7, 'M', 'ضم'), + (0xFCB8, 'M', 'طح'), + (0xFCB9, 'M', 'ظم'), + (0xFCBA, 'M', 'عج'), ] -def _seg_47(): +def _seg_47() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFCC4, 'M', u'كج'), - (0xFCC5, 'M', u'كح'), - (0xFCC6, 'M', u'كخ'), - (0xFCC7, 'M', u'كل'), - (0xFCC8, 'M', u'كم'), - (0xFCC9, 'M', u'لج'), - (0xFCCA, 'M', u'لح'), - (0xFCCB, 'M', u'لخ'), - (0xFCCC, 'M', u'لم'), - (0xFCCD, 'M', u'له'), - (0xFCCE, 'M', u'مج'), - (0xFCCF, 'M', u'مح'), - (0xFCD0, 'M', u'مخ'), - (0xFCD1, 'M', u'مم'), - (0xFCD2, 'M', u'نج'), - (0xFCD3, 'M', u'نح'), - (0xFCD4, 'M', u'نخ'), - (0xFCD5, 'M', u'نم'), - (0xFCD6, 'M', u'نه'), - (0xFCD7, 'M', u'هج'), - (0xFCD8, 'M', u'هم'), - (0xFCD9, 'M', u'هٰ'), - (0xFCDA, 'M', u'يج'), - (0xFCDB, 'M', u'يح'), - (0xFCDC, 'M', u'يخ'), - (0xFCDD, 'M', u'يم'), - (0xFCDE, 'M', u'يه'), - (0xFCDF, 'M', u'ئم'), - (0xFCE0, 'M', u'ئه'), - (0xFCE1, 'M', u'بم'), - (0xFCE2, 'M', u'به'), - (0xFCE3, 'M', u'تم'), - (0xFCE4, 'M', u'ته'), - (0xFCE5, 'M', u'ثم'), - (0xFCE6, 'M', u'ثه'), - (0xFCE7, 'M', u'سم'), - (0xFCE8, 'M', u'سه'), - (0xFCE9, 'M', u'شم'), - (0xFCEA, 'M', u'شه'), - (0xFCEB, 'M', u'كل'), - (0xFCEC, 'M', u'كم'), - (0xFCED, 'M', u'لم'), - (0xFCEE, 'M', u'نم'), - (0xFCEF, 'M', u'نه'), - (0xFCF0, 'M', u'يم'), - (0xFCF1, 'M', u'يه'), - (0xFCF2, 'M', u'ـَّ'), - (0xFCF3, 'M', u'ـُّ'), - (0xFCF4, 'M', u'ـِّ'), - (0xFCF5, 'M', u'طى'), - (0xFCF6, 'M', u'طي'), - (0xFCF7, 'M', u'عى'), - (0xFCF8, 'M', u'عي'), - (0xFCF9, 'M', u'غى'), - (0xFCFA, 'M', u'غي'), - (0xFCFB, 'M', u'سى'), - (0xFCFC, 'M', u'سي'), - (0xFCFD, 'M', u'شى'), - (0xFCFE, 'M', u'شي'), - (0xFCFF, 'M', u'حى'), - (0xFD00, 'M', u'حي'), - (0xFD01, 'M', u'جى'), - (0xFD02, 'M', u'جي'), - (0xFD03, 'M', u'خى'), - (0xFD04, 'M', u'خي'), - (0xFD05, 'M', u'صى'), - (0xFD06, 'M', u'صي'), - (0xFD07, 'M', u'ضى'), - (0xFD08, 'M', u'ضي'), - (0xFD09, 'M', u'شج'), - (0xFD0A, 'M', u'شح'), - (0xFD0B, 'M', u'شخ'), - (0xFD0C, 'M', u'شم'), - (0xFD0D, 'M', u'شر'), - (0xFD0E, 'M', u'سر'), - (0xFD0F, 'M', u'صر'), - (0xFD10, 'M', u'ضر'), - (0xFD11, 'M', u'طى'), - (0xFD12, 'M', u'طي'), - (0xFD13, 'M', u'عى'), - (0xFD14, 'M', u'عي'), - (0xFD15, 'M', u'غى'), - (0xFD16, 'M', u'غي'), - (0xFD17, 'M', u'سى'), - (0xFD18, 'M', u'سي'), - (0xFD19, 'M', u'شى'), - (0xFD1A, 'M', u'شي'), - (0xFD1B, 'M', u'حى'), - (0xFD1C, 'M', u'حي'), - (0xFD1D, 'M', u'جى'), - (0xFD1E, 'M', u'جي'), - (0xFD1F, 'M', u'خى'), - (0xFD20, 'M', u'خي'), - (0xFD21, 'M', u'صى'), - (0xFD22, 'M', u'صي'), - (0xFD23, 'M', u'ضى'), - (0xFD24, 'M', u'ضي'), - (0xFD25, 'M', u'شج'), - (0xFD26, 'M', u'شح'), - (0xFD27, 'M', u'شخ'), + (0xFCBB, 'M', 'عم'), + (0xFCBC, 'M', 'غج'), + (0xFCBD, 'M', 'غم'), + (0xFCBE, 'M', 'فج'), + (0xFCBF, 'M', 'فح'), + (0xFCC0, 'M', 'فخ'), + (0xFCC1, 'M', 'فم'), + (0xFCC2, 'M', 'قح'), + (0xFCC3, 'M', 'قم'), + (0xFCC4, 'M', 'كج'), + (0xFCC5, 'M', 'كح'), + (0xFCC6, 'M', 'كخ'), + (0xFCC7, 'M', 'كل'), + (0xFCC8, 'M', 'كم'), + (0xFCC9, 'M', 'لج'), + (0xFCCA, 'M', 'لح'), + (0xFCCB, 'M', 'لخ'), + (0xFCCC, 'M', 'لم'), + (0xFCCD, 'M', 'له'), + (0xFCCE, 'M', 'مج'), + (0xFCCF, 'M', 'مح'), + (0xFCD0, 'M', 'مخ'), + (0xFCD1, 'M', 'مم'), + (0xFCD2, 'M', 'نج'), + (0xFCD3, 'M', 'نح'), + (0xFCD4, 'M', 'نخ'), + (0xFCD5, 'M', 'نم'), + (0xFCD6, 'M', 'نه'), + (0xFCD7, 'M', 'هج'), + (0xFCD8, 'M', 'هم'), + (0xFCD9, 'M', 'هٰ'), + (0xFCDA, 'M', 'يج'), + (0xFCDB, 'M', 'يح'), + (0xFCDC, 'M', 'يخ'), + (0xFCDD, 'M', 'يم'), + (0xFCDE, 'M', 'يه'), + (0xFCDF, 'M', 'ئم'), + (0xFCE0, 'M', 'ئه'), + (0xFCE1, 'M', 'بم'), + (0xFCE2, 'M', 'به'), + (0xFCE3, 'M', 'تم'), + (0xFCE4, 'M', 'ته'), + (0xFCE5, 'M', 'ثم'), + (0xFCE6, 'M', 'ثه'), + (0xFCE7, 'M', 'سم'), + (0xFCE8, 'M', 'سه'), + (0xFCE9, 'M', 'شم'), + (0xFCEA, 'M', 'شه'), + (0xFCEB, 'M', 'كل'), + (0xFCEC, 'M', 'كم'), + (0xFCED, 'M', 'لم'), + (0xFCEE, 'M', 'نم'), + (0xFCEF, 'M', 'نه'), + (0xFCF0, 'M', 'يم'), + (0xFCF1, 'M', 'يه'), + (0xFCF2, 'M', 'ـَّ'), + (0xFCF3, 'M', 'ـُّ'), + (0xFCF4, 'M', 'ـِّ'), + (0xFCF5, 'M', 'طى'), + (0xFCF6, 'M', 'طي'), + (0xFCF7, 'M', 'عى'), + (0xFCF8, 'M', 'عي'), + (0xFCF9, 'M', 'غى'), + (0xFCFA, 'M', 'غي'), + (0xFCFB, 'M', 'سى'), + (0xFCFC, 'M', 'سي'), + (0xFCFD, 'M', 'شى'), + (0xFCFE, 'M', 'شي'), + (0xFCFF, 'M', 'حى'), + (0xFD00, 'M', 'حي'), + (0xFD01, 'M', 'جى'), + (0xFD02, 'M', 'جي'), + (0xFD03, 'M', 'خى'), + (0xFD04, 'M', 'خي'), + (0xFD05, 'M', 'صى'), + (0xFD06, 'M', 'صي'), + (0xFD07, 'M', 'ضى'), + (0xFD08, 'M', 'ضي'), + (0xFD09, 'M', 'شج'), + (0xFD0A, 'M', 'شح'), + (0xFD0B, 'M', 'شخ'), + (0xFD0C, 'M', 'شم'), + (0xFD0D, 'M', 'شر'), + (0xFD0E, 'M', 'سر'), + (0xFD0F, 'M', 'صر'), + (0xFD10, 'M', 'ضر'), + (0xFD11, 'M', 'طى'), + (0xFD12, 'M', 'طي'), + (0xFD13, 'M', 'عى'), + (0xFD14, 'M', 'عي'), + (0xFD15, 'M', 'غى'), + (0xFD16, 'M', 'غي'), + (0xFD17, 'M', 'سى'), + (0xFD18, 'M', 'سي'), + (0xFD19, 'M', 'شى'), + (0xFD1A, 'M', 'شي'), + (0xFD1B, 'M', 'حى'), + (0xFD1C, 'M', 'حي'), + (0xFD1D, 'M', 'جى'), + (0xFD1E, 'M', 'جي'), ] -def _seg_48(): +def _seg_48() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFD28, 'M', u'شم'), - (0xFD29, 'M', u'شر'), - (0xFD2A, 'M', u'سر'), - (0xFD2B, 'M', u'صر'), - (0xFD2C, 'M', u'ضر'), - (0xFD2D, 'M', u'شج'), - (0xFD2E, 'M', u'شح'), - (0xFD2F, 'M', u'شخ'), - (0xFD30, 'M', u'شم'), - (0xFD31, 'M', u'سه'), - (0xFD32, 'M', u'شه'), - (0xFD33, 'M', u'طم'), - (0xFD34, 'M', u'سج'), - (0xFD35, 'M', u'سح'), - (0xFD36, 'M', u'سخ'), - (0xFD37, 'M', u'شج'), - (0xFD38, 'M', u'شح'), - (0xFD39, 'M', u'شخ'), - (0xFD3A, 'M', u'طم'), - (0xFD3B, 'M', u'ظم'), - (0xFD3C, 'M', u'اً'), + (0xFD1F, 'M', 'خى'), + (0xFD20, 'M', 'خي'), + (0xFD21, 'M', 'صى'), + (0xFD22, 'M', 'صي'), + (0xFD23, 'M', 'ضى'), + (0xFD24, 'M', 'ضي'), + (0xFD25, 'M', 'شج'), + (0xFD26, 'M', 'شح'), + (0xFD27, 'M', 'شخ'), + (0xFD28, 'M', 'شم'), + (0xFD29, 'M', 'شر'), + (0xFD2A, 'M', 'سر'), + (0xFD2B, 'M', 'صر'), + (0xFD2C, 'M', 'ضر'), + (0xFD2D, 'M', 'شج'), + (0xFD2E, 'M', 'شح'), + (0xFD2F, 'M', 'شخ'), + (0xFD30, 'M', 'شم'), + (0xFD31, 'M', 'سه'), + (0xFD32, 'M', 'شه'), + (0xFD33, 'M', 'طم'), + (0xFD34, 'M', 'سج'), + (0xFD35, 'M', 'سح'), + (0xFD36, 'M', 'سخ'), + (0xFD37, 'M', 'شج'), + (0xFD38, 'M', 'شح'), + (0xFD39, 'M', 'شخ'), + (0xFD3A, 'M', 'طم'), + (0xFD3B, 'M', 'ظم'), + (0xFD3C, 'M', 'اً'), (0xFD3E, 'V'), - (0xFD40, 'X'), - (0xFD50, 'M', u'تجم'), - (0xFD51, 'M', u'تحج'), - (0xFD53, 'M', u'تحم'), - (0xFD54, 'M', u'تخم'), - (0xFD55, 'M', u'تمج'), - (0xFD56, 'M', u'تمح'), - (0xFD57, 'M', u'تمخ'), - (0xFD58, 'M', u'جمح'), - (0xFD5A, 'M', u'حمي'), - (0xFD5B, 'M', u'حمى'), - (0xFD5C, 'M', u'سحج'), - (0xFD5D, 'M', u'سجح'), - (0xFD5E, 'M', u'سجى'), - (0xFD5F, 'M', u'سمح'), - (0xFD61, 'M', u'سمج'), - (0xFD62, 'M', u'سمم'), - (0xFD64, 'M', u'صحح'), - (0xFD66, 'M', u'صمم'), - (0xFD67, 'M', u'شحم'), - (0xFD69, 'M', u'شجي'), - (0xFD6A, 'M', u'شمخ'), - (0xFD6C, 'M', u'شمم'), - (0xFD6E, 'M', u'ضحى'), - (0xFD6F, 'M', u'ضخم'), - (0xFD71, 'M', u'طمح'), - (0xFD73, 'M', u'طمم'), - (0xFD74, 'M', u'طمي'), - (0xFD75, 'M', u'عجم'), - (0xFD76, 'M', u'عمم'), - (0xFD78, 'M', u'عمى'), - (0xFD79, 'M', u'غمم'), - (0xFD7A, 'M', u'غمي'), - (0xFD7B, 'M', u'غمى'), - (0xFD7C, 'M', u'فخم'), - (0xFD7E, 'M', u'قمح'), - (0xFD7F, 'M', u'قمم'), - (0xFD80, 'M', u'لحم'), - (0xFD81, 'M', u'لحي'), - (0xFD82, 'M', u'لحى'), - (0xFD83, 'M', u'لجج'), - (0xFD85, 'M', u'لخم'), - (0xFD87, 'M', u'لمح'), - (0xFD89, 'M', u'محج'), - (0xFD8A, 'M', u'محم'), - (0xFD8B, 'M', u'محي'), - (0xFD8C, 'M', u'مجح'), - (0xFD8D, 'M', u'مجم'), - (0xFD8E, 'M', u'مخج'), - (0xFD8F, 'M', u'مخم'), + (0xFD50, 'M', 'تجم'), + (0xFD51, 'M', 'تحج'), + (0xFD53, 'M', 'تحم'), + (0xFD54, 'M', 'تخم'), + (0xFD55, 'M', 'تمج'), + (0xFD56, 'M', 'تمح'), + (0xFD57, 'M', 'تمخ'), + (0xFD58, 'M', 'جمح'), + (0xFD5A, 'M', 'حمي'), + (0xFD5B, 'M', 'حمى'), + (0xFD5C, 'M', 'سحج'), + (0xFD5D, 'M', 'سجح'), + (0xFD5E, 'M', 'سجى'), + (0xFD5F, 'M', 'سمح'), + (0xFD61, 'M', 'سمج'), + (0xFD62, 'M', 'سمم'), + (0xFD64, 'M', 'صحح'), + (0xFD66, 'M', 'صمم'), + (0xFD67, 'M', 'شحم'), + (0xFD69, 'M', 'شجي'), + (0xFD6A, 'M', 'شمخ'), + (0xFD6C, 'M', 'شمم'), + (0xFD6E, 'M', 'ضحى'), + (0xFD6F, 'M', 'ضخم'), + (0xFD71, 'M', 'طمح'), + (0xFD73, 'M', 'طمم'), + (0xFD74, 'M', 'طمي'), + (0xFD75, 'M', 'عجم'), + (0xFD76, 'M', 'عمم'), + (0xFD78, 'M', 'عمى'), + (0xFD79, 'M', 'غمم'), + (0xFD7A, 'M', 'غمي'), + (0xFD7B, 'M', 'غمى'), + (0xFD7C, 'M', 'فخم'), + (0xFD7E, 'M', 'قمح'), + (0xFD7F, 'M', 'قمم'), + (0xFD80, 'M', 'لحم'), + (0xFD81, 'M', 'لحي'), + (0xFD82, 'M', 'لحى'), + (0xFD83, 'M', 'لجج'), + (0xFD85, 'M', 'لخم'), + (0xFD87, 'M', 'لمح'), + (0xFD89, 'M', 'محج'), + (0xFD8A, 'M', 'محم'), + (0xFD8B, 'M', 'محي'), + (0xFD8C, 'M', 'مجح'), + (0xFD8D, 'M', 'مجم'), + (0xFD8E, 'M', 'مخج'), + (0xFD8F, 'M', 'مخم'), (0xFD90, 'X'), - (0xFD92, 'M', u'مجخ'), - (0xFD93, 'M', u'همج'), - (0xFD94, 'M', u'همم'), - (0xFD95, 'M', u'نحم'), - (0xFD96, 'M', u'نحى'), - (0xFD97, 'M', u'نجم'), - (0xFD99, 'M', u'نجى'), - (0xFD9A, 'M', u'نمي'), - (0xFD9B, 'M', u'نمى'), - (0xFD9C, 'M', u'يمم'), - (0xFD9E, 'M', u'بخي'), - (0xFD9F, 'M', u'تجي'), - (0xFDA0, 'M', u'تجى'), - (0xFDA1, 'M', u'تخي'), - (0xFDA2, 'M', u'تخى'), - (0xFDA3, 'M', u'تمي'), - (0xFDA4, 'M', u'تمى'), - (0xFDA5, 'M', u'جمي'), - (0xFDA6, 'M', u'جحى'), - (0xFDA7, 'M', u'جمى'), - (0xFDA8, 'M', u'سخى'), - (0xFDA9, 'M', u'صحي'), - (0xFDAA, 'M', u'شحي'), - (0xFDAB, 'M', u'ضحي'), - (0xFDAC, 'M', u'لجي'), - (0xFDAD, 'M', u'لمي'), - (0xFDAE, 'M', u'يحي'), + (0xFD92, 'M', 'مجخ'), + (0xFD93, 'M', 'همج'), + (0xFD94, 'M', 'همم'), + (0xFD95, 'M', 'نحم'), + (0xFD96, 'M', 'نحى'), + (0xFD97, 'M', 'نجم'), + (0xFD99, 'M', 'نجى'), + (0xFD9A, 'M', 'نمي'), + (0xFD9B, 'M', 'نمى'), + (0xFD9C, 'M', 'يمم'), + (0xFD9E, 'M', 'بخي'), + (0xFD9F, 'M', 'تجي'), + (0xFDA0, 'M', 'تجى'), + (0xFDA1, 'M', 'تخي'), + (0xFDA2, 'M', 'تخى'), + (0xFDA3, 'M', 'تمي'), + (0xFDA4, 'M', 'تمى'), + (0xFDA5, 'M', 'جمي'), + (0xFDA6, 'M', 'جحى'), ] -def _seg_49(): +def _seg_49() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFDAF, 'M', u'يجي'), - (0xFDB0, 'M', u'يمي'), - (0xFDB1, 'M', u'ممي'), - (0xFDB2, 'M', u'قمي'), - (0xFDB3, 'M', u'نحي'), - (0xFDB4, 'M', u'قمح'), - (0xFDB5, 'M', u'لحم'), - (0xFDB6, 'M', u'عمي'), - (0xFDB7, 'M', u'كمي'), - (0xFDB8, 'M', u'نجح'), - (0xFDB9, 'M', u'مخي'), - (0xFDBA, 'M', u'لجم'), - (0xFDBB, 'M', u'كمم'), - (0xFDBC, 'M', u'لجم'), - (0xFDBD, 'M', u'نجح'), - (0xFDBE, 'M', u'جحي'), - (0xFDBF, 'M', u'حجي'), - (0xFDC0, 'M', u'مجي'), - (0xFDC1, 'M', u'فمي'), - (0xFDC2, 'M', u'بحي'), - (0xFDC3, 'M', u'كمم'), - (0xFDC4, 'M', u'عجم'), - (0xFDC5, 'M', u'صمم'), - (0xFDC6, 'M', u'سخي'), - (0xFDC7, 'M', u'نجي'), + (0xFDA7, 'M', 'جمى'), + (0xFDA8, 'M', 'سخى'), + (0xFDA9, 'M', 'صحي'), + (0xFDAA, 'M', 'شحي'), + (0xFDAB, 'M', 'ضحي'), + (0xFDAC, 'M', 'لجي'), + (0xFDAD, 'M', 'لمي'), + (0xFDAE, 'M', 'يحي'), + (0xFDAF, 'M', 'يجي'), + (0xFDB0, 'M', 'يمي'), + (0xFDB1, 'M', 'ممي'), + (0xFDB2, 'M', 'قمي'), + (0xFDB3, 'M', 'نحي'), + (0xFDB4, 'M', 'قمح'), + (0xFDB5, 'M', 'لحم'), + (0xFDB6, 'M', 'عمي'), + (0xFDB7, 'M', 'كمي'), + (0xFDB8, 'M', 'نجح'), + (0xFDB9, 'M', 'مخي'), + (0xFDBA, 'M', 'لجم'), + (0xFDBB, 'M', 'كمم'), + (0xFDBC, 'M', 'لجم'), + (0xFDBD, 'M', 'نجح'), + (0xFDBE, 'M', 'جحي'), + (0xFDBF, 'M', 'حجي'), + (0xFDC0, 'M', 'مجي'), + (0xFDC1, 'M', 'فمي'), + (0xFDC2, 'M', 'بحي'), + (0xFDC3, 'M', 'كمم'), + (0xFDC4, 'M', 'عجم'), + (0xFDC5, 'M', 'صمم'), + (0xFDC6, 'M', 'سخي'), + (0xFDC7, 'M', 'نجي'), (0xFDC8, 'X'), - (0xFDF0, 'M', u'صلے'), - (0xFDF1, 'M', u'قلے'), - (0xFDF2, 'M', u'الله'), - (0xFDF3, 'M', u'اكبر'), - (0xFDF4, 'M', u'محمد'), - (0xFDF5, 'M', u'صلعم'), - (0xFDF6, 'M', u'رسول'), - (0xFDF7, 'M', u'عليه'), - (0xFDF8, 'M', u'وسلم'), - (0xFDF9, 'M', u'صلى'), - (0xFDFA, '3', u'صلى الله عليه وسلم'), - (0xFDFB, '3', u'جل جلاله'), - (0xFDFC, 'M', u'ریال'), + (0xFDCF, 'V'), + (0xFDD0, 'X'), + (0xFDF0, 'M', 'صلے'), + (0xFDF1, 'M', 'قلے'), + (0xFDF2, 'M', 'الله'), + (0xFDF3, 'M', 'اكبر'), + (0xFDF4, 'M', 'محمد'), + (0xFDF5, 'M', 'صلعم'), + (0xFDF6, 'M', 'رسول'), + (0xFDF7, 'M', 'عليه'), + (0xFDF8, 'M', 'وسلم'), + (0xFDF9, 'M', 'صلى'), + (0xFDFA, '3', 'صلى الله عليه وسلم'), + (0xFDFB, '3', 'جل جلاله'), + (0xFDFC, 'M', 'ریال'), (0xFDFD, 'V'), - (0xFDFE, 'X'), (0xFE00, 'I'), - (0xFE10, '3', u','), - (0xFE11, 'M', u'、'), + (0xFE10, '3', ','), + (0xFE11, 'M', '、'), (0xFE12, 'X'), - (0xFE13, '3', u':'), - (0xFE14, '3', u';'), - (0xFE15, '3', u'!'), - (0xFE16, '3', u'?'), - (0xFE17, 'M', u'〖'), - (0xFE18, 'M', u'〗'), + (0xFE13, '3', ':'), + (0xFE14, '3', ';'), + (0xFE15, '3', '!'), + (0xFE16, '3', '?'), + (0xFE17, 'M', '〖'), + (0xFE18, 'M', '〗'), (0xFE19, 'X'), (0xFE20, 'V'), (0xFE30, 'X'), - (0xFE31, 'M', u'—'), - (0xFE32, 'M', u'–'), - (0xFE33, '3', u'_'), - (0xFE35, '3', u'('), - (0xFE36, '3', u')'), - (0xFE37, '3', u'{'), - (0xFE38, '3', u'}'), - (0xFE39, 'M', u'〔'), - (0xFE3A, 'M', u'〕'), - (0xFE3B, 'M', u'【'), - (0xFE3C, 'M', u'】'), - (0xFE3D, 'M', u'《'), - (0xFE3E, 'M', u'》'), - (0xFE3F, 'M', u'〈'), - (0xFE40, 'M', u'〉'), - (0xFE41, 'M', u'「'), - (0xFE42, 'M', u'」'), - (0xFE43, 'M', u'『'), - (0xFE44, 'M', u'』'), + (0xFE31, 'M', '—'), + (0xFE32, 'M', '–'), + (0xFE33, '3', '_'), + (0xFE35, '3', '('), + (0xFE36, '3', ')'), + (0xFE37, '3', '{'), + (0xFE38, '3', '}'), + (0xFE39, 'M', '〔'), + (0xFE3A, 'M', '〕'), + (0xFE3B, 'M', '【'), + (0xFE3C, 'M', '】'), + (0xFE3D, 'M', '《'), + (0xFE3E, 'M', '》'), + (0xFE3F, 'M', '〈'), + (0xFE40, 'M', '〉'), + (0xFE41, 'M', '「'), + (0xFE42, 'M', '」'), + (0xFE43, 'M', '『'), + (0xFE44, 'M', '』'), (0xFE45, 'V'), - (0xFE47, '3', u'['), - (0xFE48, '3', u']'), - (0xFE49, '3', u' ̅'), - (0xFE4D, '3', u'_'), - (0xFE50, '3', u','), - (0xFE51, 'M', u'、'), + (0xFE47, '3', '['), + (0xFE48, '3', ']'), + (0xFE49, '3', ' ̅'), + (0xFE4D, '3', '_'), + (0xFE50, '3', ','), + (0xFE51, 'M', '、'), (0xFE52, 'X'), - (0xFE54, '3', u';'), - (0xFE55, '3', u':'), - (0xFE56, '3', u'?'), - (0xFE57, '3', u'!'), - (0xFE58, 'M', u'—'), - (0xFE59, '3', u'('), - (0xFE5A, '3', u')'), - (0xFE5B, '3', u'{'), - (0xFE5C, '3', u'}'), - (0xFE5D, 'M', u'〔'), - (0xFE5E, 'M', u'〕'), - (0xFE5F, '3', u'#'), - (0xFE60, '3', u'&'), - (0xFE61, '3', u'*'), - (0xFE62, '3', u'+'), - (0xFE63, 'M', u'-'), - (0xFE64, '3', u'<'), - (0xFE65, '3', u'>'), - (0xFE66, '3', u'='), + (0xFE54, '3', ';'), + (0xFE55, '3', ':'), + (0xFE56, '3', '?'), + (0xFE57, '3', '!'), + (0xFE58, 'M', '—'), + (0xFE59, '3', '('), + (0xFE5A, '3', ')'), + (0xFE5B, '3', '{'), + (0xFE5C, '3', '}'), + (0xFE5D, 'M', '〔'), ] -def _seg_50(): +def _seg_50() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ + (0xFE5E, 'M', '〕'), + (0xFE5F, '3', '#'), + (0xFE60, '3', '&'), + (0xFE61, '3', '*'), + (0xFE62, '3', '+'), + (0xFE63, 'M', '-'), + (0xFE64, '3', '<'), + (0xFE65, '3', '>'), + (0xFE66, '3', '='), (0xFE67, 'X'), - (0xFE68, '3', u'\\'), - (0xFE69, '3', u'$'), - (0xFE6A, '3', u'%'), - (0xFE6B, '3', u'@'), + (0xFE68, '3', '\\'), + (0xFE69, '3', '$'), + (0xFE6A, '3', '%'), + (0xFE6B, '3', '@'), (0xFE6C, 'X'), - (0xFE70, '3', u' ً'), - (0xFE71, 'M', u'ـً'), - (0xFE72, '3', u' ٌ'), + (0xFE70, '3', ' ً'), + (0xFE71, 'M', 'ـً'), + (0xFE72, '3', ' ٌ'), (0xFE73, 'V'), - (0xFE74, '3', u' ٍ'), + (0xFE74, '3', ' ٍ'), (0xFE75, 'X'), - (0xFE76, '3', u' َ'), - (0xFE77, 'M', u'ـَ'), - (0xFE78, '3', u' ُ'), - (0xFE79, 'M', u'ـُ'), - (0xFE7A, '3', u' ِ'), - (0xFE7B, 'M', u'ـِ'), - (0xFE7C, '3', u' ّ'), - (0xFE7D, 'M', u'ـّ'), - (0xFE7E, '3', u' ْ'), - (0xFE7F, 'M', u'ـْ'), - (0xFE80, 'M', u'ء'), - (0xFE81, 'M', u'آ'), - (0xFE83, 'M', u'أ'), - (0xFE85, 'M', u'ؤ'), - (0xFE87, 'M', u'إ'), - (0xFE89, 'M', u'ئ'), - (0xFE8D, 'M', u'ا'), - (0xFE8F, 'M', u'ب'), - (0xFE93, 'M', u'ة'), - (0xFE95, 'M', u'ت'), - (0xFE99, 'M', u'ث'), - (0xFE9D, 'M', u'ج'), - (0xFEA1, 'M', u'ح'), - (0xFEA5, 'M', u'خ'), - (0xFEA9, 'M', u'د'), - (0xFEAB, 'M', u'ذ'), - (0xFEAD, 'M', u'ر'), - (0xFEAF, 'M', u'ز'), - (0xFEB1, 'M', u'س'), - (0xFEB5, 'M', u'ش'), - (0xFEB9, 'M', u'ص'), - (0xFEBD, 'M', u'ض'), - (0xFEC1, 'M', u'ط'), - (0xFEC5, 'M', u'ظ'), - (0xFEC9, 'M', u'ع'), - (0xFECD, 'M', u'غ'), - (0xFED1, 'M', u'ف'), - (0xFED5, 'M', u'ق'), - (0xFED9, 'M', u'ك'), - (0xFEDD, 'M', u'ل'), - (0xFEE1, 'M', u'م'), - (0xFEE5, 'M', u'ن'), - (0xFEE9, 'M', u'ه'), - (0xFEED, 'M', u'و'), - (0xFEEF, 'M', u'ى'), - (0xFEF1, 'M', u'ي'), - (0xFEF5, 'M', u'لآ'), - (0xFEF7, 'M', u'لأ'), - (0xFEF9, 'M', u'لإ'), - (0xFEFB, 'M', u'لا'), + (0xFE76, '3', ' َ'), + (0xFE77, 'M', 'ـَ'), + (0xFE78, '3', ' ُ'), + (0xFE79, 'M', 'ـُ'), + (0xFE7A, '3', ' ِ'), + (0xFE7B, 'M', 'ـِ'), + (0xFE7C, '3', ' ّ'), + (0xFE7D, 'M', 'ـّ'), + (0xFE7E, '3', ' ْ'), + (0xFE7F, 'M', 'ـْ'), + (0xFE80, 'M', 'ء'), + (0xFE81, 'M', 'آ'), + (0xFE83, 'M', 'أ'), + (0xFE85, 'M', 'ؤ'), + (0xFE87, 'M', 'إ'), + (0xFE89, 'M', 'ئ'), + (0xFE8D, 'M', 'ا'), + (0xFE8F, 'M', 'ب'), + (0xFE93, 'M', 'ة'), + (0xFE95, 'M', 'ت'), + (0xFE99, 'M', 'ث'), + (0xFE9D, 'M', 'ج'), + (0xFEA1, 'M', 'ح'), + (0xFEA5, 'M', 'خ'), + (0xFEA9, 'M', 'د'), + (0xFEAB, 'M', 'ذ'), + (0xFEAD, 'M', 'ر'), + (0xFEAF, 'M', 'ز'), + (0xFEB1, 'M', 'س'), + (0xFEB5, 'M', 'ش'), + (0xFEB9, 'M', 'ص'), + (0xFEBD, 'M', 'ض'), + (0xFEC1, 'M', 'ط'), + (0xFEC5, 'M', 'ظ'), + (0xFEC9, 'M', 'ع'), + (0xFECD, 'M', 'غ'), + (0xFED1, 'M', 'ف'), + (0xFED5, 'M', 'ق'), + (0xFED9, 'M', 'ك'), + (0xFEDD, 'M', 'ل'), + (0xFEE1, 'M', 'م'), + (0xFEE5, 'M', 'ن'), + (0xFEE9, 'M', 'ه'), + (0xFEED, 'M', 'و'), + (0xFEEF, 'M', 'ى'), + (0xFEF1, 'M', 'ي'), + (0xFEF5, 'M', 'لآ'), + (0xFEF7, 'M', 'لأ'), + (0xFEF9, 'M', 'لإ'), + (0xFEFB, 'M', 'لا'), (0xFEFD, 'X'), (0xFEFF, 'I'), (0xFF00, 'X'), - (0xFF01, '3', u'!'), - (0xFF02, '3', u'"'), - (0xFF03, '3', u'#'), - (0xFF04, '3', u'$'), - (0xFF05, '3', u'%'), - (0xFF06, '3', u'&'), - (0xFF07, '3', u'\''), - (0xFF08, '3', u'('), - (0xFF09, '3', u')'), - (0xFF0A, '3', u'*'), - (0xFF0B, '3', u'+'), - (0xFF0C, '3', u','), - (0xFF0D, 'M', u'-'), - (0xFF0E, 'M', u'.'), - (0xFF0F, '3', u'/'), - (0xFF10, 'M', u'0'), - (0xFF11, 'M', u'1'), - (0xFF12, 'M', u'2'), - (0xFF13, 'M', u'3'), - (0xFF14, 'M', u'4'), - (0xFF15, 'M', u'5'), - (0xFF16, 'M', u'6'), - (0xFF17, 'M', u'7'), - (0xFF18, 'M', u'8'), - (0xFF19, 'M', u'9'), - (0xFF1A, '3', u':'), - (0xFF1B, '3', u';'), - (0xFF1C, '3', u'<'), - (0xFF1D, '3', u'='), - (0xFF1E, '3', u'>'), - (0xFF1F, '3', u'?'), - (0xFF20, '3', u'@'), - (0xFF21, 'M', u'a'), - (0xFF22, 'M', u'b'), - (0xFF23, 'M', u'c'), + (0xFF01, '3', '!'), + (0xFF02, '3', '"'), + (0xFF03, '3', '#'), + (0xFF04, '3', '$'), + (0xFF05, '3', '%'), + (0xFF06, '3', '&'), + (0xFF07, '3', '\''), + (0xFF08, '3', '('), + (0xFF09, '3', ')'), + (0xFF0A, '3', '*'), + (0xFF0B, '3', '+'), + (0xFF0C, '3', ','), + (0xFF0D, 'M', '-'), + (0xFF0E, 'M', '.'), + (0xFF0F, '3', '/'), + (0xFF10, 'M', '0'), + (0xFF11, 'M', '1'), + (0xFF12, 'M', '2'), + (0xFF13, 'M', '3'), + (0xFF14, 'M', '4'), + (0xFF15, 'M', '5'), + (0xFF16, 'M', '6'), + (0xFF17, 'M', '7'), + (0xFF18, 'M', '8'), + (0xFF19, 'M', '9'), + (0xFF1A, '3', ':'), ] -def _seg_51(): +def _seg_51() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFF24, 'M', u'd'), - (0xFF25, 'M', u'e'), - (0xFF26, 'M', u'f'), - (0xFF27, 'M', u'g'), - (0xFF28, 'M', u'h'), - (0xFF29, 'M', u'i'), - (0xFF2A, 'M', u'j'), - (0xFF2B, 'M', u'k'), - (0xFF2C, 'M', u'l'), - (0xFF2D, 'M', u'm'), - (0xFF2E, 'M', u'n'), - (0xFF2F, 'M', u'o'), - (0xFF30, 'M', u'p'), - (0xFF31, 'M', u'q'), - (0xFF32, 'M', u'r'), - (0xFF33, 'M', u's'), - (0xFF34, 'M', u't'), - (0xFF35, 'M', u'u'), - (0xFF36, 'M', u'v'), - (0xFF37, 'M', u'w'), - (0xFF38, 'M', u'x'), - (0xFF39, 'M', u'y'), - (0xFF3A, 'M', u'z'), - (0xFF3B, '3', u'['), - (0xFF3C, '3', u'\\'), - (0xFF3D, '3', u']'), - (0xFF3E, '3', u'^'), - (0xFF3F, '3', u'_'), - (0xFF40, '3', u'`'), - (0xFF41, 'M', u'a'), - (0xFF42, 'M', u'b'), - (0xFF43, 'M', u'c'), - (0xFF44, 'M', u'd'), - (0xFF45, 'M', u'e'), - (0xFF46, 'M', u'f'), - (0xFF47, 'M', u'g'), - (0xFF48, 'M', u'h'), - (0xFF49, 'M', u'i'), - (0xFF4A, 'M', u'j'), - (0xFF4B, 'M', u'k'), - (0xFF4C, 'M', u'l'), - (0xFF4D, 'M', u'm'), - (0xFF4E, 'M', u'n'), - (0xFF4F, 'M', u'o'), - (0xFF50, 'M', u'p'), - (0xFF51, 'M', u'q'), - (0xFF52, 'M', u'r'), - (0xFF53, 'M', u's'), - (0xFF54, 'M', u't'), - (0xFF55, 'M', u'u'), - (0xFF56, 'M', u'v'), - (0xFF57, 'M', u'w'), - (0xFF58, 'M', u'x'), - (0xFF59, 'M', u'y'), - (0xFF5A, 'M', u'z'), - (0xFF5B, '3', u'{'), - (0xFF5C, '3', u'|'), - (0xFF5D, '3', u'}'), - (0xFF5E, '3', u'~'), - (0xFF5F, 'M', u'⦅'), - (0xFF60, 'M', u'⦆'), - (0xFF61, 'M', u'.'), - (0xFF62, 'M', u'「'), - (0xFF63, 'M', u'」'), - (0xFF64, 'M', u'、'), - (0xFF65, 'M', u'・'), - (0xFF66, 'M', u'ヲ'), - (0xFF67, 'M', u'ァ'), - (0xFF68, 'M', u'ィ'), - (0xFF69, 'M', u'ゥ'), - (0xFF6A, 'M', u'ェ'), - (0xFF6B, 'M', u'ォ'), - (0xFF6C, 'M', u'ャ'), - (0xFF6D, 'M', u'ュ'), - (0xFF6E, 'M', u'ョ'), - (0xFF6F, 'M', u'ッ'), - (0xFF70, 'M', u'ー'), - (0xFF71, 'M', u'ア'), - (0xFF72, 'M', u'イ'), - (0xFF73, 'M', u'ウ'), - (0xFF74, 'M', u'エ'), - (0xFF75, 'M', u'オ'), - (0xFF76, 'M', u'カ'), - (0xFF77, 'M', u'キ'), - (0xFF78, 'M', u'ク'), - (0xFF79, 'M', u'ケ'), - (0xFF7A, 'M', u'コ'), - (0xFF7B, 'M', u'サ'), - (0xFF7C, 'M', u'シ'), - (0xFF7D, 'M', u'ス'), - (0xFF7E, 'M', u'セ'), - (0xFF7F, 'M', u'ソ'), - (0xFF80, 'M', u'タ'), - (0xFF81, 'M', u'チ'), - (0xFF82, 'M', u'ツ'), - (0xFF83, 'M', u'テ'), - (0xFF84, 'M', u'ト'), - (0xFF85, 'M', u'ナ'), - (0xFF86, 'M', u'ニ'), - (0xFF87, 'M', u'ヌ'), + (0xFF1B, '3', ';'), + (0xFF1C, '3', '<'), + (0xFF1D, '3', '='), + (0xFF1E, '3', '>'), + (0xFF1F, '3', '?'), + (0xFF20, '3', '@'), + (0xFF21, 'M', 'a'), + (0xFF22, 'M', 'b'), + (0xFF23, 'M', 'c'), + (0xFF24, 'M', 'd'), + (0xFF25, 'M', 'e'), + (0xFF26, 'M', 'f'), + (0xFF27, 'M', 'g'), + (0xFF28, 'M', 'h'), + (0xFF29, 'M', 'i'), + (0xFF2A, 'M', 'j'), + (0xFF2B, 'M', 'k'), + (0xFF2C, 'M', 'l'), + (0xFF2D, 'M', 'm'), + (0xFF2E, 'M', 'n'), + (0xFF2F, 'M', 'o'), + (0xFF30, 'M', 'p'), + (0xFF31, 'M', 'q'), + (0xFF32, 'M', 'r'), + (0xFF33, 'M', 's'), + (0xFF34, 'M', 't'), + (0xFF35, 'M', 'u'), + (0xFF36, 'M', 'v'), + (0xFF37, 'M', 'w'), + (0xFF38, 'M', 'x'), + (0xFF39, 'M', 'y'), + (0xFF3A, 'M', 'z'), + (0xFF3B, '3', '['), + (0xFF3C, '3', '\\'), + (0xFF3D, '3', ']'), + (0xFF3E, '3', '^'), + (0xFF3F, '3', '_'), + (0xFF40, '3', '`'), + (0xFF41, 'M', 'a'), + (0xFF42, 'M', 'b'), + (0xFF43, 'M', 'c'), + (0xFF44, 'M', 'd'), + (0xFF45, 'M', 'e'), + (0xFF46, 'M', 'f'), + (0xFF47, 'M', 'g'), + (0xFF48, 'M', 'h'), + (0xFF49, 'M', 'i'), + (0xFF4A, 'M', 'j'), + (0xFF4B, 'M', 'k'), + (0xFF4C, 'M', 'l'), + (0xFF4D, 'M', 'm'), + (0xFF4E, 'M', 'n'), + (0xFF4F, 'M', 'o'), + (0xFF50, 'M', 'p'), + (0xFF51, 'M', 'q'), + (0xFF52, 'M', 'r'), + (0xFF53, 'M', 's'), + (0xFF54, 'M', 't'), + (0xFF55, 'M', 'u'), + (0xFF56, 'M', 'v'), + (0xFF57, 'M', 'w'), + (0xFF58, 'M', 'x'), + (0xFF59, 'M', 'y'), + (0xFF5A, 'M', 'z'), + (0xFF5B, '3', '{'), + (0xFF5C, '3', '|'), + (0xFF5D, '3', '}'), + (0xFF5E, '3', '~'), + (0xFF5F, 'M', '⦅'), + (0xFF60, 'M', '⦆'), + (0xFF61, 'M', '.'), + (0xFF62, 'M', '「'), + (0xFF63, 'M', '」'), + (0xFF64, 'M', '、'), + (0xFF65, 'M', '・'), + (0xFF66, 'M', 'ヲ'), + (0xFF67, 'M', 'ァ'), + (0xFF68, 'M', 'ィ'), + (0xFF69, 'M', 'ゥ'), + (0xFF6A, 'M', 'ェ'), + (0xFF6B, 'M', 'ォ'), + (0xFF6C, 'M', 'ャ'), + (0xFF6D, 'M', 'ュ'), + (0xFF6E, 'M', 'ョ'), + (0xFF6F, 'M', 'ッ'), + (0xFF70, 'M', 'ー'), + (0xFF71, 'M', 'ア'), + (0xFF72, 'M', 'イ'), + (0xFF73, 'M', 'ウ'), + (0xFF74, 'M', 'エ'), + (0xFF75, 'M', 'オ'), + (0xFF76, 'M', 'カ'), + (0xFF77, 'M', 'キ'), + (0xFF78, 'M', 'ク'), + (0xFF79, 'M', 'ケ'), + (0xFF7A, 'M', 'コ'), + (0xFF7B, 'M', 'サ'), + (0xFF7C, 'M', 'シ'), + (0xFF7D, 'M', 'ス'), + (0xFF7E, 'M', 'セ'), ] -def _seg_52(): +def _seg_52() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0xFF88, 'M', u'ネ'), - (0xFF89, 'M', u'ノ'), - (0xFF8A, 'M', u'ハ'), - (0xFF8B, 'M', u'ヒ'), - (0xFF8C, 'M', u'フ'), - (0xFF8D, 'M', u'ヘ'), - (0xFF8E, 'M', u'ホ'), - (0xFF8F, 'M', u'マ'), - (0xFF90, 'M', u'ミ'), - (0xFF91, 'M', u'ム'), - (0xFF92, 'M', u'メ'), - (0xFF93, 'M', u'モ'), - (0xFF94, 'M', u'ヤ'), - (0xFF95, 'M', u'ユ'), - (0xFF96, 'M', u'ヨ'), - (0xFF97, 'M', u'ラ'), - (0xFF98, 'M', u'リ'), - (0xFF99, 'M', u'ル'), - (0xFF9A, 'M', u'レ'), - (0xFF9B, 'M', u'ロ'), - (0xFF9C, 'M', u'ワ'), - (0xFF9D, 'M', u'ン'), - (0xFF9E, 'M', u'゙'), - (0xFF9F, 'M', u'゚'), + (0xFF7F, 'M', 'ソ'), + (0xFF80, 'M', 'タ'), + (0xFF81, 'M', 'チ'), + (0xFF82, 'M', 'ツ'), + (0xFF83, 'M', 'テ'), + (0xFF84, 'M', 'ト'), + (0xFF85, 'M', 'ナ'), + (0xFF86, 'M', 'ニ'), + (0xFF87, 'M', 'ヌ'), + (0xFF88, 'M', 'ネ'), + (0xFF89, 'M', 'ノ'), + (0xFF8A, 'M', 'ハ'), + (0xFF8B, 'M', 'ヒ'), + (0xFF8C, 'M', 'フ'), + (0xFF8D, 'M', 'ヘ'), + (0xFF8E, 'M', 'ホ'), + (0xFF8F, 'M', 'マ'), + (0xFF90, 'M', 'ミ'), + (0xFF91, 'M', 'ム'), + (0xFF92, 'M', 'メ'), + (0xFF93, 'M', 'モ'), + (0xFF94, 'M', 'ヤ'), + (0xFF95, 'M', 'ユ'), + (0xFF96, 'M', 'ヨ'), + (0xFF97, 'M', 'ラ'), + (0xFF98, 'M', 'リ'), + (0xFF99, 'M', 'ル'), + (0xFF9A, 'M', 'レ'), + (0xFF9B, 'M', 'ロ'), + (0xFF9C, 'M', 'ワ'), + (0xFF9D, 'M', 'ン'), + (0xFF9E, 'M', '゙'), + (0xFF9F, 'M', '゚'), (0xFFA0, 'X'), - (0xFFA1, 'M', u'ᄀ'), - (0xFFA2, 'M', u'ᄁ'), - (0xFFA3, 'M', u'ᆪ'), - (0xFFA4, 'M', u'ᄂ'), - (0xFFA5, 'M', u'ᆬ'), - (0xFFA6, 'M', u'ᆭ'), - (0xFFA7, 'M', u'ᄃ'), - (0xFFA8, 'M', u'ᄄ'), - (0xFFA9, 'M', u'ᄅ'), - (0xFFAA, 'M', u'ᆰ'), - (0xFFAB, 'M', u'ᆱ'), - (0xFFAC, 'M', u'ᆲ'), - (0xFFAD, 'M', u'ᆳ'), - (0xFFAE, 'M', u'ᆴ'), - (0xFFAF, 'M', u'ᆵ'), - (0xFFB0, 'M', u'ᄚ'), - (0xFFB1, 'M', u'ᄆ'), - (0xFFB2, 'M', u'ᄇ'), - (0xFFB3, 'M', u'ᄈ'), - (0xFFB4, 'M', u'ᄡ'), - (0xFFB5, 'M', u'ᄉ'), - (0xFFB6, 'M', u'ᄊ'), - (0xFFB7, 'M', u'ᄋ'), - (0xFFB8, 'M', u'ᄌ'), - (0xFFB9, 'M', u'ᄍ'), - (0xFFBA, 'M', u'ᄎ'), - (0xFFBB, 'M', u'ᄏ'), - (0xFFBC, 'M', u'ᄐ'), - (0xFFBD, 'M', u'ᄑ'), - (0xFFBE, 'M', u'ᄒ'), + (0xFFA1, 'M', 'ᄀ'), + (0xFFA2, 'M', 'ᄁ'), + (0xFFA3, 'M', 'ᆪ'), + (0xFFA4, 'M', 'ᄂ'), + (0xFFA5, 'M', 'ᆬ'), + (0xFFA6, 'M', 'ᆭ'), + (0xFFA7, 'M', 'ᄃ'), + (0xFFA8, 'M', 'ᄄ'), + (0xFFA9, 'M', 'ᄅ'), + (0xFFAA, 'M', 'ᆰ'), + (0xFFAB, 'M', 'ᆱ'), + (0xFFAC, 'M', 'ᆲ'), + (0xFFAD, 'M', 'ᆳ'), + (0xFFAE, 'M', 'ᆴ'), + (0xFFAF, 'M', 'ᆵ'), + (0xFFB0, 'M', 'ᄚ'), + (0xFFB1, 'M', 'ᄆ'), + (0xFFB2, 'M', 'ᄇ'), + (0xFFB3, 'M', 'ᄈ'), + (0xFFB4, 'M', 'ᄡ'), + (0xFFB5, 'M', 'ᄉ'), + (0xFFB6, 'M', 'ᄊ'), + (0xFFB7, 'M', 'ᄋ'), + (0xFFB8, 'M', 'ᄌ'), + (0xFFB9, 'M', 'ᄍ'), + (0xFFBA, 'M', 'ᄎ'), + (0xFFBB, 'M', 'ᄏ'), + (0xFFBC, 'M', 'ᄐ'), + (0xFFBD, 'M', 'ᄑ'), + (0xFFBE, 'M', 'ᄒ'), (0xFFBF, 'X'), - (0xFFC2, 'M', u'ᅡ'), - (0xFFC3, 'M', u'ᅢ'), - (0xFFC4, 'M', u'ᅣ'), - (0xFFC5, 'M', u'ᅤ'), - (0xFFC6, 'M', u'ᅥ'), - (0xFFC7, 'M', u'ᅦ'), + (0xFFC2, 'M', 'ᅡ'), + (0xFFC3, 'M', 'ᅢ'), + (0xFFC4, 'M', 'ᅣ'), + (0xFFC5, 'M', 'ᅤ'), + (0xFFC6, 'M', 'ᅥ'), + (0xFFC7, 'M', 'ᅦ'), (0xFFC8, 'X'), - (0xFFCA, 'M', u'ᅧ'), - (0xFFCB, 'M', u'ᅨ'), - (0xFFCC, 'M', u'ᅩ'), - (0xFFCD, 'M', u'ᅪ'), - (0xFFCE, 'M', u'ᅫ'), - (0xFFCF, 'M', u'ᅬ'), + (0xFFCA, 'M', 'ᅧ'), + (0xFFCB, 'M', 'ᅨ'), + (0xFFCC, 'M', 'ᅩ'), + (0xFFCD, 'M', 'ᅪ'), + (0xFFCE, 'M', 'ᅫ'), + (0xFFCF, 'M', 'ᅬ'), (0xFFD0, 'X'), - (0xFFD2, 'M', u'ᅭ'), - (0xFFD3, 'M', u'ᅮ'), - (0xFFD4, 'M', u'ᅯ'), - (0xFFD5, 'M', u'ᅰ'), - (0xFFD6, 'M', u'ᅱ'), - (0xFFD7, 'M', u'ᅲ'), + (0xFFD2, 'M', 'ᅭ'), + (0xFFD3, 'M', 'ᅮ'), + (0xFFD4, 'M', 'ᅯ'), + (0xFFD5, 'M', 'ᅰ'), + (0xFFD6, 'M', 'ᅱ'), + (0xFFD7, 'M', 'ᅲ'), (0xFFD8, 'X'), - (0xFFDA, 'M', u'ᅳ'), - (0xFFDB, 'M', u'ᅴ'), - (0xFFDC, 'M', u'ᅵ'), + (0xFFDA, 'M', 'ᅳ'), + (0xFFDB, 'M', 'ᅴ'), + (0xFFDC, 'M', 'ᅵ'), (0xFFDD, 'X'), - (0xFFE0, 'M', u'¢'), - (0xFFE1, 'M', u'£'), - (0xFFE2, 'M', u'¬'), - (0xFFE3, '3', u' ̄'), - (0xFFE4, 'M', u'¦'), - (0xFFE5, 'M', u'¥'), - (0xFFE6, 'M', u'₩'), + (0xFFE0, 'M', '¢'), + (0xFFE1, 'M', '£'), + (0xFFE2, 'M', '¬'), + (0xFFE3, '3', ' ̄'), + (0xFFE4, 'M', '¦'), + (0xFFE5, 'M', '¥'), + (0xFFE6, 'M', '₩'), (0xFFE7, 'X'), - (0xFFE8, 'M', u'│'), - (0xFFE9, 'M', u'←'), - (0xFFEA, 'M', u'↑'), - (0xFFEB, 'M', u'→'), - (0xFFEC, 'M', u'↓'), - (0xFFED, 'M', u'■'), - (0xFFEE, 'M', u'○'), + (0xFFE8, 'M', '│'), + (0xFFE9, 'M', '←'), + ] + +def _seg_53() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFFEA, 'M', '↑'), + (0xFFEB, 'M', '→'), + (0xFFEC, 'M', '↓'), + (0xFFED, 'M', '■'), + (0xFFEE, 'M', '○'), (0xFFEF, 'X'), (0x10000, 'V'), (0x1000C, 'X'), (0x1000D, 'V'), - ] - -def _seg_53(): - return [ (0x10027, 'X'), (0x10028, 'V'), (0x1003B, 'X'), @@ -5560,90 +5572,90 @@ def _seg_53(): (0x103C4, 'X'), (0x103C8, 'V'), (0x103D6, 'X'), - (0x10400, 'M', u'𐐨'), - (0x10401, 'M', u'𐐩'), - (0x10402, 'M', u'𐐪'), - (0x10403, 'M', u'𐐫'), - (0x10404, 'M', u'𐐬'), - (0x10405, 'M', u'𐐭'), - (0x10406, 'M', u'𐐮'), - (0x10407, 'M', u'𐐯'), - (0x10408, 'M', u'𐐰'), - (0x10409, 'M', u'𐐱'), - (0x1040A, 'M', u'𐐲'), - (0x1040B, 'M', u'𐐳'), - (0x1040C, 'M', u'𐐴'), - (0x1040D, 'M', u'𐐵'), - (0x1040E, 'M', u'𐐶'), - (0x1040F, 'M', u'𐐷'), - (0x10410, 'M', u'𐐸'), - (0x10411, 'M', u'𐐹'), - (0x10412, 'M', u'𐐺'), - (0x10413, 'M', u'𐐻'), - (0x10414, 'M', u'𐐼'), - (0x10415, 'M', u'𐐽'), - (0x10416, 'M', u'𐐾'), - (0x10417, 'M', u'𐐿'), - (0x10418, 'M', u'𐑀'), - (0x10419, 'M', u'𐑁'), - (0x1041A, 'M', u'𐑂'), - (0x1041B, 'M', u'𐑃'), - (0x1041C, 'M', u'𐑄'), - (0x1041D, 'M', u'𐑅'), - (0x1041E, 'M', u'𐑆'), - (0x1041F, 'M', u'𐑇'), - (0x10420, 'M', u'𐑈'), - (0x10421, 'M', u'𐑉'), - (0x10422, 'M', u'𐑊'), - (0x10423, 'M', u'𐑋'), - (0x10424, 'M', u'𐑌'), - (0x10425, 'M', u'𐑍'), - (0x10426, 'M', u'𐑎'), - (0x10427, 'M', u'𐑏'), + (0x10400, 'M', '𐐨'), + (0x10401, 'M', '𐐩'), + (0x10402, 'M', '𐐪'), + (0x10403, 'M', '𐐫'), + (0x10404, 'M', '𐐬'), + (0x10405, 'M', '𐐭'), + (0x10406, 'M', '𐐮'), + (0x10407, 'M', '𐐯'), + (0x10408, 'M', '𐐰'), + (0x10409, 'M', '𐐱'), + (0x1040A, 'M', '𐐲'), + (0x1040B, 'M', '𐐳'), + (0x1040C, 'M', '𐐴'), + (0x1040D, 'M', '𐐵'), + (0x1040E, 'M', '𐐶'), + (0x1040F, 'M', '𐐷'), + (0x10410, 'M', '𐐸'), + (0x10411, 'M', '𐐹'), + (0x10412, 'M', '𐐺'), + (0x10413, 'M', '𐐻'), + (0x10414, 'M', '𐐼'), + (0x10415, 'M', '𐐽'), + (0x10416, 'M', '𐐾'), + (0x10417, 'M', '𐐿'), + (0x10418, 'M', '𐑀'), + (0x10419, 'M', '𐑁'), + (0x1041A, 'M', '𐑂'), + (0x1041B, 'M', '𐑃'), + (0x1041C, 'M', '𐑄'), + (0x1041D, 'M', '𐑅'), + (0x1041E, 'M', '𐑆'), + (0x1041F, 'M', '𐑇'), + (0x10420, 'M', '𐑈'), + (0x10421, 'M', '𐑉'), + (0x10422, 'M', '𐑊'), + (0x10423, 'M', '𐑋'), + (0x10424, 'M', '𐑌'), + (0x10425, 'M', '𐑍'), + (0x10426, 'M', '𐑎'), + (0x10427, 'M', '𐑏'), (0x10428, 'V'), (0x1049E, 'X'), (0x104A0, 'V'), (0x104AA, 'X'), - (0x104B0, 'M', u'𐓘'), - (0x104B1, 'M', u'𐓙'), - (0x104B2, 'M', u'𐓚'), - (0x104B3, 'M', u'𐓛'), - (0x104B4, 'M', u'𐓜'), - (0x104B5, 'M', u'𐓝'), - (0x104B6, 'M', u'𐓞'), - (0x104B7, 'M', u'𐓟'), - (0x104B8, 'M', u'𐓠'), - (0x104B9, 'M', u'𐓡'), - (0x104BA, 'M', u'𐓢'), - (0x104BB, 'M', u'𐓣'), - (0x104BC, 'M', u'𐓤'), - (0x104BD, 'M', u'𐓥'), - (0x104BE, 'M', u'𐓦'), + (0x104B0, 'M', '𐓘'), + (0x104B1, 'M', '𐓙'), + (0x104B2, 'M', '𐓚'), + (0x104B3, 'M', '𐓛'), + (0x104B4, 'M', '𐓜'), + (0x104B5, 'M', '𐓝'), ] -def _seg_54(): +def _seg_54() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x104BF, 'M', u'𐓧'), - (0x104C0, 'M', u'𐓨'), - (0x104C1, 'M', u'𐓩'), - (0x104C2, 'M', u'𐓪'), - (0x104C3, 'M', u'𐓫'), - (0x104C4, 'M', u'𐓬'), - (0x104C5, 'M', u'𐓭'), - (0x104C6, 'M', u'𐓮'), - (0x104C7, 'M', u'𐓯'), - (0x104C8, 'M', u'𐓰'), - (0x104C9, 'M', u'𐓱'), - (0x104CA, 'M', u'𐓲'), - (0x104CB, 'M', u'𐓳'), - (0x104CC, 'M', u'𐓴'), - (0x104CD, 'M', u'𐓵'), - (0x104CE, 'M', u'𐓶'), - (0x104CF, 'M', u'𐓷'), - (0x104D0, 'M', u'𐓸'), - (0x104D1, 'M', u'𐓹'), - (0x104D2, 'M', u'𐓺'), - (0x104D3, 'M', u'𐓻'), + (0x104B6, 'M', '𐓞'), + (0x104B7, 'M', '𐓟'), + (0x104B8, 'M', '𐓠'), + (0x104B9, 'M', '𐓡'), + (0x104BA, 'M', '𐓢'), + (0x104BB, 'M', '𐓣'), + (0x104BC, 'M', '𐓤'), + (0x104BD, 'M', '𐓥'), + (0x104BE, 'M', '𐓦'), + (0x104BF, 'M', '𐓧'), + (0x104C0, 'M', '𐓨'), + (0x104C1, 'M', '𐓩'), + (0x104C2, 'M', '𐓪'), + (0x104C3, 'M', '𐓫'), + (0x104C4, 'M', '𐓬'), + (0x104C5, 'M', '𐓭'), + (0x104C6, 'M', '𐓮'), + (0x104C7, 'M', '𐓯'), + (0x104C8, 'M', '𐓰'), + (0x104C9, 'M', '𐓱'), + (0x104CA, 'M', '𐓲'), + (0x104CB, 'M', '𐓳'), + (0x104CC, 'M', '𐓴'), + (0x104CD, 'M', '𐓵'), + (0x104CE, 'M', '𐓶'), + (0x104CF, 'M', '𐓷'), + (0x104D0, 'M', '𐓸'), + (0x104D1, 'M', '𐓹'), + (0x104D2, 'M', '𐓺'), + (0x104D3, 'M', '𐓻'), (0x104D4, 'X'), (0x104D8, 'V'), (0x104FC, 'X'), @@ -5652,13 +5664,123 @@ def _seg_54(): (0x10530, 'V'), (0x10564, 'X'), (0x1056F, 'V'), - (0x10570, 'X'), + (0x10570, 'M', '𐖗'), + (0x10571, 'M', '𐖘'), + (0x10572, 'M', '𐖙'), + (0x10573, 'M', '𐖚'), + (0x10574, 'M', '𐖛'), + (0x10575, 'M', '𐖜'), + (0x10576, 'M', '𐖝'), + (0x10577, 'M', '𐖞'), + (0x10578, 'M', '𐖟'), + (0x10579, 'M', '𐖠'), + (0x1057A, 'M', '𐖡'), + (0x1057B, 'X'), + (0x1057C, 'M', '𐖣'), + (0x1057D, 'M', '𐖤'), + (0x1057E, 'M', '𐖥'), + (0x1057F, 'M', '𐖦'), + (0x10580, 'M', '𐖧'), + (0x10581, 'M', '𐖨'), + (0x10582, 'M', '𐖩'), + (0x10583, 'M', '𐖪'), + (0x10584, 'M', '𐖫'), + (0x10585, 'M', '𐖬'), + (0x10586, 'M', '𐖭'), + (0x10587, 'M', '𐖮'), + (0x10588, 'M', '𐖯'), + (0x10589, 'M', '𐖰'), + (0x1058A, 'M', '𐖱'), + (0x1058B, 'X'), + (0x1058C, 'M', '𐖳'), + (0x1058D, 'M', '𐖴'), + (0x1058E, 'M', '𐖵'), + (0x1058F, 'M', '𐖶'), + (0x10590, 'M', '𐖷'), + (0x10591, 'M', '𐖸'), + (0x10592, 'M', '𐖹'), + (0x10593, 'X'), + (0x10594, 'M', '𐖻'), + (0x10595, 'M', '𐖼'), + (0x10596, 'X'), + (0x10597, 'V'), + (0x105A2, 'X'), + (0x105A3, 'V'), + (0x105B2, 'X'), + (0x105B3, 'V'), + (0x105BA, 'X'), + (0x105BB, 'V'), + (0x105BD, 'X'), (0x10600, 'V'), (0x10737, 'X'), (0x10740, 'V'), (0x10756, 'X'), (0x10760, 'V'), (0x10768, 'X'), + (0x10780, 'V'), + (0x10781, 'M', 'ː'), + (0x10782, 'M', 'ˑ'), + (0x10783, 'M', 'æ'), + (0x10784, 'M', 'ʙ'), + (0x10785, 'M', 'ɓ'), + (0x10786, 'X'), + (0x10787, 'M', 'ʣ'), + (0x10788, 'M', 'ꭦ'), + ] + +def _seg_55() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x10789, 'M', 'ʥ'), + (0x1078A, 'M', 'ʤ'), + (0x1078B, 'M', 'ɖ'), + (0x1078C, 'M', 'ɗ'), + (0x1078D, 'M', 'ᶑ'), + (0x1078E, 'M', 'ɘ'), + (0x1078F, 'M', 'ɞ'), + (0x10790, 'M', 'ʩ'), + (0x10791, 'M', 'ɤ'), + (0x10792, 'M', 'ɢ'), + (0x10793, 'M', 'ɠ'), + (0x10794, 'M', 'ʛ'), + (0x10795, 'M', 'ħ'), + (0x10796, 'M', 'ʜ'), + (0x10797, 'M', 'ɧ'), + (0x10798, 'M', 'ʄ'), + (0x10799, 'M', 'ʪ'), + (0x1079A, 'M', 'ʫ'), + (0x1079B, 'M', 'ɬ'), + (0x1079C, 'M', '𝼄'), + (0x1079D, 'M', 'ꞎ'), + (0x1079E, 'M', 'ɮ'), + (0x1079F, 'M', '𝼅'), + (0x107A0, 'M', 'ʎ'), + (0x107A1, 'M', '𝼆'), + (0x107A2, 'M', 'ø'), + (0x107A3, 'M', 'ɶ'), + (0x107A4, 'M', 'ɷ'), + (0x107A5, 'M', 'q'), + (0x107A6, 'M', 'ɺ'), + (0x107A7, 'M', '𝼈'), + (0x107A8, 'M', 'ɽ'), + (0x107A9, 'M', 'ɾ'), + (0x107AA, 'M', 'ʀ'), + (0x107AB, 'M', 'ʨ'), + (0x107AC, 'M', 'ʦ'), + (0x107AD, 'M', 'ꭧ'), + (0x107AE, 'M', 'ʧ'), + (0x107AF, 'M', 'ʈ'), + (0x107B0, 'M', 'ⱱ'), + (0x107B1, 'X'), + (0x107B2, 'M', 'ʏ'), + (0x107B3, 'M', 'ʡ'), + (0x107B4, 'M', 'ʢ'), + (0x107B5, 'M', 'ʘ'), + (0x107B6, 'M', 'ǀ'), + (0x107B7, 'M', 'ǁ'), + (0x107B8, 'M', 'ǂ'), + (0x107B9, 'M', '𝼊'), + (0x107BA, 'M', '𝼞'), + (0x107BB, 'X'), (0x10800, 'V'), (0x10806, 'X'), (0x10808, 'V'), @@ -5708,6 +5830,10 @@ def _seg_54(): (0x10A60, 'V'), (0x10AA0, 'X'), (0x10AC0, 'V'), + ] + +def _seg_56() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x10AE7, 'X'), (0x10AEB, 'V'), (0x10AF7, 'X'), @@ -5723,63 +5849,59 @@ def _seg_54(): (0x10B9D, 'X'), (0x10BA9, 'V'), (0x10BB0, 'X'), - ] - -def _seg_55(): - return [ (0x10C00, 'V'), (0x10C49, 'X'), - (0x10C80, 'M', u'𐳀'), - (0x10C81, 'M', u'𐳁'), - (0x10C82, 'M', u'𐳂'), - (0x10C83, 'M', u'𐳃'), - (0x10C84, 'M', u'𐳄'), - (0x10C85, 'M', u'𐳅'), - (0x10C86, 'M', u'𐳆'), - (0x10C87, 'M', u'𐳇'), - (0x10C88, 'M', u'𐳈'), - (0x10C89, 'M', u'𐳉'), - (0x10C8A, 'M', u'𐳊'), - (0x10C8B, 'M', u'𐳋'), - (0x10C8C, 'M', u'𐳌'), - (0x10C8D, 'M', u'𐳍'), - (0x10C8E, 'M', u'𐳎'), - (0x10C8F, 'M', u'𐳏'), - (0x10C90, 'M', u'𐳐'), - (0x10C91, 'M', u'𐳑'), - (0x10C92, 'M', u'𐳒'), - (0x10C93, 'M', u'𐳓'), - (0x10C94, 'M', u'𐳔'), - (0x10C95, 'M', u'𐳕'), - (0x10C96, 'M', u'𐳖'), - (0x10C97, 'M', u'𐳗'), - (0x10C98, 'M', u'𐳘'), - (0x10C99, 'M', u'𐳙'), - (0x10C9A, 'M', u'𐳚'), - (0x10C9B, 'M', u'𐳛'), - (0x10C9C, 'M', u'𐳜'), - (0x10C9D, 'M', u'𐳝'), - (0x10C9E, 'M', u'𐳞'), - (0x10C9F, 'M', u'𐳟'), - (0x10CA0, 'M', u'𐳠'), - (0x10CA1, 'M', u'𐳡'), - (0x10CA2, 'M', u'𐳢'), - (0x10CA3, 'M', u'𐳣'), - (0x10CA4, 'M', u'𐳤'), - (0x10CA5, 'M', u'𐳥'), - (0x10CA6, 'M', u'𐳦'), - (0x10CA7, 'M', u'𐳧'), - (0x10CA8, 'M', u'𐳨'), - (0x10CA9, 'M', u'𐳩'), - (0x10CAA, 'M', u'𐳪'), - (0x10CAB, 'M', u'𐳫'), - (0x10CAC, 'M', u'𐳬'), - (0x10CAD, 'M', u'𐳭'), - (0x10CAE, 'M', u'𐳮'), - (0x10CAF, 'M', u'𐳯'), - (0x10CB0, 'M', u'𐳰'), - (0x10CB1, 'M', u'𐳱'), - (0x10CB2, 'M', u'𐳲'), + (0x10C80, 'M', '𐳀'), + (0x10C81, 'M', '𐳁'), + (0x10C82, 'M', '𐳂'), + (0x10C83, 'M', '𐳃'), + (0x10C84, 'M', '𐳄'), + (0x10C85, 'M', '𐳅'), + (0x10C86, 'M', '𐳆'), + (0x10C87, 'M', '𐳇'), + (0x10C88, 'M', '𐳈'), + (0x10C89, 'M', '𐳉'), + (0x10C8A, 'M', '𐳊'), + (0x10C8B, 'M', '𐳋'), + (0x10C8C, 'M', '𐳌'), + (0x10C8D, 'M', '𐳍'), + (0x10C8E, 'M', '𐳎'), + (0x10C8F, 'M', '𐳏'), + (0x10C90, 'M', '𐳐'), + (0x10C91, 'M', '𐳑'), + (0x10C92, 'M', '𐳒'), + (0x10C93, 'M', '𐳓'), + (0x10C94, 'M', '𐳔'), + (0x10C95, 'M', '𐳕'), + (0x10C96, 'M', '𐳖'), + (0x10C97, 'M', '𐳗'), + (0x10C98, 'M', '𐳘'), + (0x10C99, 'M', '𐳙'), + (0x10C9A, 'M', '𐳚'), + (0x10C9B, 'M', '𐳛'), + (0x10C9C, 'M', '𐳜'), + (0x10C9D, 'M', '𐳝'), + (0x10C9E, 'M', '𐳞'), + (0x10C9F, 'M', '𐳟'), + (0x10CA0, 'M', '𐳠'), + (0x10CA1, 'M', '𐳡'), + (0x10CA2, 'M', '𐳢'), + (0x10CA3, 'M', '𐳣'), + (0x10CA4, 'M', '𐳤'), + (0x10CA5, 'M', '𐳥'), + (0x10CA6, 'M', '𐳦'), + (0x10CA7, 'M', '𐳧'), + (0x10CA8, 'M', '𐳨'), + (0x10CA9, 'M', '𐳩'), + (0x10CAA, 'M', '𐳪'), + (0x10CAB, 'M', '𐳫'), + (0x10CAC, 'M', '𐳬'), + (0x10CAD, 'M', '𐳭'), + (0x10CAE, 'M', '𐳮'), + (0x10CAF, 'M', '𐳯'), + (0x10CB0, 'M', '𐳰'), + (0x10CB1, 'M', '𐳱'), + (0x10CB2, 'M', '𐳲'), (0x10CB3, 'X'), (0x10CC0, 'V'), (0x10CF3, 'X'), @@ -5795,10 +5917,12 @@ def _seg_55(): (0x10EAE, 'X'), (0x10EB0, 'V'), (0x10EB2, 'X'), - (0x10F00, 'V'), + (0x10EFD, 'V'), (0x10F28, 'X'), (0x10F30, 'V'), (0x10F5A, 'X'), + (0x10F70, 'V'), + (0x10F8A, 'X'), (0x10FB0, 'V'), (0x10FCC, 'X'), (0x10FE0, 'V'), @@ -5806,11 +5930,15 @@ def _seg_55(): (0x11000, 'V'), (0x1104E, 'X'), (0x11052, 'V'), - (0x11070, 'X'), + (0x11076, 'X'), (0x1107F, 'V'), (0x110BD, 'X'), (0x110BE, 'V'), - (0x110C2, 'X'), + ] + +def _seg_57() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x110C3, 'X'), (0x110D0, 'V'), (0x110E9, 'X'), (0x110F0, 'V'), @@ -5827,12 +5955,8 @@ def _seg_55(): (0x111F5, 'X'), (0x11200, 'V'), (0x11212, 'X'), - ] - -def _seg_56(): - return [ (0x11213, 'V'), - (0x1123F, 'X'), + (0x11242, 'X'), (0x11280, 'V'), (0x11287, 'X'), (0x11288, 'V'), @@ -5896,7 +6020,7 @@ def _seg_56(): (0x11660, 'V'), (0x1166D, 'X'), (0x11680, 'V'), - (0x116B9, 'X'), + (0x116BA, 'X'), (0x116C0, 'V'), (0x116CA, 'X'), (0x11700, 'V'), @@ -5904,45 +6028,45 @@ def _seg_56(): (0x1171D, 'V'), (0x1172C, 'X'), (0x11730, 'V'), - (0x11740, 'X'), + (0x11747, 'X'), (0x11800, 'V'), (0x1183C, 'X'), - (0x118A0, 'M', u'𑣀'), - (0x118A1, 'M', u'𑣁'), - (0x118A2, 'M', u'𑣂'), - (0x118A3, 'M', u'𑣃'), - (0x118A4, 'M', u'𑣄'), - (0x118A5, 'M', u'𑣅'), - (0x118A6, 'M', u'𑣆'), - (0x118A7, 'M', u'𑣇'), - (0x118A8, 'M', u'𑣈'), - (0x118A9, 'M', u'𑣉'), - (0x118AA, 'M', u'𑣊'), - (0x118AB, 'M', u'𑣋'), - (0x118AC, 'M', u'𑣌'), - (0x118AD, 'M', u'𑣍'), - (0x118AE, 'M', u'𑣎'), - (0x118AF, 'M', u'𑣏'), - (0x118B0, 'M', u'𑣐'), - (0x118B1, 'M', u'𑣑'), - (0x118B2, 'M', u'𑣒'), - (0x118B3, 'M', u'𑣓'), - (0x118B4, 'M', u'𑣔'), - (0x118B5, 'M', u'𑣕'), - (0x118B6, 'M', u'𑣖'), - (0x118B7, 'M', u'𑣗'), + (0x118A0, 'M', '𑣀'), + (0x118A1, 'M', '𑣁'), + (0x118A2, 'M', '𑣂'), + (0x118A3, 'M', '𑣃'), + (0x118A4, 'M', '𑣄'), + (0x118A5, 'M', '𑣅'), + (0x118A6, 'M', '𑣆'), ] -def _seg_57(): +def _seg_58() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x118B8, 'M', u'𑣘'), - (0x118B9, 'M', u'𑣙'), - (0x118BA, 'M', u'𑣚'), - (0x118BB, 'M', u'𑣛'), - (0x118BC, 'M', u'𑣜'), - (0x118BD, 'M', u'𑣝'), - (0x118BE, 'M', u'𑣞'), - (0x118BF, 'M', u'𑣟'), + (0x118A7, 'M', '𑣇'), + (0x118A8, 'M', '𑣈'), + (0x118A9, 'M', '𑣉'), + (0x118AA, 'M', '𑣊'), + (0x118AB, 'M', '𑣋'), + (0x118AC, 'M', '𑣌'), + (0x118AD, 'M', '𑣍'), + (0x118AE, 'M', '𑣎'), + (0x118AF, 'M', '𑣏'), + (0x118B0, 'M', '𑣐'), + (0x118B1, 'M', '𑣑'), + (0x118B2, 'M', '𑣒'), + (0x118B3, 'M', '𑣓'), + (0x118B4, 'M', '𑣔'), + (0x118B5, 'M', '𑣕'), + (0x118B6, 'M', '𑣖'), + (0x118B7, 'M', '𑣗'), + (0x118B8, 'M', '𑣘'), + (0x118B9, 'M', '𑣙'), + (0x118BA, 'M', '𑣚'), + (0x118BB, 'M', '𑣛'), + (0x118BC, 'M', '𑣜'), + (0x118BD, 'M', '𑣝'), + (0x118BE, 'M', '𑣞'), + (0x118BF, 'M', '𑣟'), (0x118C0, 'V'), (0x118F3, 'X'), (0x118FF, 'V'), @@ -5971,8 +6095,10 @@ def _seg_57(): (0x11A48, 'X'), (0x11A50, 'V'), (0x11AA3, 'X'), - (0x11AC0, 'V'), + (0x11AB0, 'V'), (0x11AF9, 'X'), + (0x11B00, 'V'), + (0x11B0A, 'X'), (0x11C00, 'V'), (0x11C09, 'X'), (0x11C0A, 'V'), @@ -6015,6 +6141,16 @@ def _seg_57(): (0x11DAA, 'X'), (0x11EE0, 'V'), (0x11EF9, 'X'), + (0x11F00, 'V'), + ] + +def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x11F11, 'X'), + (0x11F12, 'V'), + (0x11F3B, 'X'), + (0x11F3E, 'V'), + (0x11F5A, 'X'), (0x11FB0, 'V'), (0x11FB1, 'X'), (0x11FC0, 'V'), @@ -6027,22 +6163,24 @@ def _seg_57(): (0x12475, 'X'), (0x12480, 'V'), (0x12544, 'X'), + (0x12F90, 'V'), + (0x12FF3, 'X'), (0x13000, 'V'), - (0x1342F, 'X'), + (0x13430, 'X'), + (0x13440, 'V'), + (0x13456, 'X'), (0x14400, 'V'), (0x14647, 'X'), (0x16800, 'V'), (0x16A39, 'X'), (0x16A40, 'V'), (0x16A5F, 'X'), - ] - -def _seg_58(): - return [ (0x16A60, 'V'), (0x16A6A, 'X'), (0x16A6E, 'V'), - (0x16A70, 'X'), + (0x16ABF, 'X'), + (0x16AC0, 'V'), + (0x16ACA, 'X'), (0x16AD0, 'V'), (0x16AEE, 'X'), (0x16AF0, 'V'), @@ -6057,38 +6195,38 @@ def _seg_58(): (0x16B78, 'X'), (0x16B7D, 'V'), (0x16B90, 'X'), - (0x16E40, 'M', u'𖹠'), - (0x16E41, 'M', u'𖹡'), - (0x16E42, 'M', u'𖹢'), - (0x16E43, 'M', u'𖹣'), - (0x16E44, 'M', u'𖹤'), - (0x16E45, 'M', u'𖹥'), - (0x16E46, 'M', u'𖹦'), - (0x16E47, 'M', u'𖹧'), - (0x16E48, 'M', u'𖹨'), - (0x16E49, 'M', u'𖹩'), - (0x16E4A, 'M', u'𖹪'), - (0x16E4B, 'M', u'𖹫'), - (0x16E4C, 'M', u'𖹬'), - (0x16E4D, 'M', u'𖹭'), - (0x16E4E, 'M', u'𖹮'), - (0x16E4F, 'M', u'𖹯'), - (0x16E50, 'M', u'𖹰'), - (0x16E51, 'M', u'𖹱'), - (0x16E52, 'M', u'𖹲'), - (0x16E53, 'M', u'𖹳'), - (0x16E54, 'M', u'𖹴'), - (0x16E55, 'M', u'𖹵'), - (0x16E56, 'M', u'𖹶'), - (0x16E57, 'M', u'𖹷'), - (0x16E58, 'M', u'𖹸'), - (0x16E59, 'M', u'𖹹'), - (0x16E5A, 'M', u'𖹺'), - (0x16E5B, 'M', u'𖹻'), - (0x16E5C, 'M', u'𖹼'), - (0x16E5D, 'M', u'𖹽'), - (0x16E5E, 'M', u'𖹾'), - (0x16E5F, 'M', u'𖹿'), + (0x16E40, 'M', '𖹠'), + (0x16E41, 'M', '𖹡'), + (0x16E42, 'M', '𖹢'), + (0x16E43, 'M', '𖹣'), + (0x16E44, 'M', '𖹤'), + (0x16E45, 'M', '𖹥'), + (0x16E46, 'M', '𖹦'), + (0x16E47, 'M', '𖹧'), + (0x16E48, 'M', '𖹨'), + (0x16E49, 'M', '𖹩'), + (0x16E4A, 'M', '𖹪'), + (0x16E4B, 'M', '𖹫'), + (0x16E4C, 'M', '𖹬'), + (0x16E4D, 'M', '𖹭'), + (0x16E4E, 'M', '𖹮'), + (0x16E4F, 'M', '𖹯'), + (0x16E50, 'M', '𖹰'), + (0x16E51, 'M', '𖹱'), + (0x16E52, 'M', '𖹲'), + (0x16E53, 'M', '𖹳'), + (0x16E54, 'M', '𖹴'), + (0x16E55, 'M', '𖹵'), + (0x16E56, 'M', '𖹶'), + (0x16E57, 'M', '𖹷'), + (0x16E58, 'M', '𖹸'), + (0x16E59, 'M', '𖹹'), + (0x16E5A, 'M', '𖹺'), + (0x16E5B, 'M', '𖹻'), + (0x16E5C, 'M', '𖹼'), + (0x16E5D, 'M', '𖹽'), + (0x16E5E, 'M', '𖹾'), + (0x16E5F, 'M', '𖹿'), (0x16E60, 'V'), (0x16E9B, 'X'), (0x16F00, 'V'), @@ -6107,10 +6245,24 @@ def _seg_58(): (0x18CD6, 'X'), (0x18D00, 'V'), (0x18D09, 'X'), + (0x1AFF0, 'V'), + ] + +def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1AFF4, 'X'), + (0x1AFF5, 'V'), + (0x1AFFC, 'X'), + (0x1AFFD, 'V'), + (0x1AFFF, 'X'), (0x1B000, 'V'), - (0x1B11F, 'X'), + (0x1B123, 'X'), + (0x1B132, 'V'), + (0x1B133, 'X'), (0x1B150, 'V'), (0x1B153, 'X'), + (0x1B155, 'V'), + (0x1B156, 'X'), (0x1B164, 'V'), (0x1B168, 'X'), (0x1B170, 'V'), @@ -6126,1097 +6278,1105 @@ def _seg_58(): (0x1BC9C, 'V'), (0x1BCA0, 'I'), (0x1BCA4, 'X'), + (0x1CF00, 'V'), + (0x1CF2E, 'X'), + (0x1CF30, 'V'), + (0x1CF47, 'X'), + (0x1CF50, 'V'), + (0x1CFC4, 'X'), (0x1D000, 'V'), (0x1D0F6, 'X'), (0x1D100, 'V'), (0x1D127, 'X'), (0x1D129, 'V'), - (0x1D15E, 'M', u'𝅗𝅥'), - (0x1D15F, 'M', u'𝅘𝅥'), - (0x1D160, 'M', u'𝅘𝅥𝅮'), - (0x1D161, 'M', u'𝅘𝅥𝅯'), - (0x1D162, 'M', u'𝅘𝅥𝅰'), - (0x1D163, 'M', u'𝅘𝅥𝅱'), - (0x1D164, 'M', u'𝅘𝅥𝅲'), + (0x1D15E, 'M', '𝅗𝅥'), + (0x1D15F, 'M', '𝅘𝅥'), + (0x1D160, 'M', '𝅘𝅥𝅮'), + (0x1D161, 'M', '𝅘𝅥𝅯'), + (0x1D162, 'M', '𝅘𝅥𝅰'), + (0x1D163, 'M', '𝅘𝅥𝅱'), + (0x1D164, 'M', '𝅘𝅥𝅲'), (0x1D165, 'V'), - ] - -def _seg_59(): - return [ (0x1D173, 'X'), (0x1D17B, 'V'), - (0x1D1BB, 'M', u'𝆹𝅥'), - (0x1D1BC, 'M', u'𝆺𝅥'), - (0x1D1BD, 'M', u'𝆹𝅥𝅮'), - (0x1D1BE, 'M', u'𝆺𝅥𝅮'), - (0x1D1BF, 'M', u'𝆹𝅥𝅯'), - (0x1D1C0, 'M', u'𝆺𝅥𝅯'), + (0x1D1BB, 'M', '𝆹𝅥'), + (0x1D1BC, 'M', '𝆺𝅥'), + (0x1D1BD, 'M', '𝆹𝅥𝅮'), + (0x1D1BE, 'M', '𝆺𝅥𝅮'), + (0x1D1BF, 'M', '𝆹𝅥𝅯'), + (0x1D1C0, 'M', '𝆺𝅥𝅯'), (0x1D1C1, 'V'), - (0x1D1E9, 'X'), + (0x1D1EB, 'X'), (0x1D200, 'V'), (0x1D246, 'X'), + (0x1D2C0, 'V'), + (0x1D2D4, 'X'), (0x1D2E0, 'V'), (0x1D2F4, 'X'), (0x1D300, 'V'), (0x1D357, 'X'), (0x1D360, 'V'), (0x1D379, 'X'), - (0x1D400, 'M', u'a'), - (0x1D401, 'M', u'b'), - (0x1D402, 'M', u'c'), - (0x1D403, 'M', u'd'), - (0x1D404, 'M', u'e'), - (0x1D405, 'M', u'f'), - (0x1D406, 'M', u'g'), - (0x1D407, 'M', u'h'), - (0x1D408, 'M', u'i'), - (0x1D409, 'M', u'j'), - (0x1D40A, 'M', u'k'), - (0x1D40B, 'M', u'l'), - (0x1D40C, 'M', u'm'), - (0x1D40D, 'M', u'n'), - (0x1D40E, 'M', u'o'), - (0x1D40F, 'M', u'p'), - (0x1D410, 'M', u'q'), - (0x1D411, 'M', u'r'), - (0x1D412, 'M', u's'), - (0x1D413, 'M', u't'), - (0x1D414, 'M', u'u'), - (0x1D415, 'M', u'v'), - (0x1D416, 'M', u'w'), - (0x1D417, 'M', u'x'), - (0x1D418, 'M', u'y'), - (0x1D419, 'M', u'z'), - (0x1D41A, 'M', u'a'), - (0x1D41B, 'M', u'b'), - (0x1D41C, 'M', u'c'), - (0x1D41D, 'M', u'd'), - (0x1D41E, 'M', u'e'), - (0x1D41F, 'M', u'f'), - (0x1D420, 'M', u'g'), - (0x1D421, 'M', u'h'), - (0x1D422, 'M', u'i'), - (0x1D423, 'M', u'j'), - (0x1D424, 'M', u'k'), - (0x1D425, 'M', u'l'), - (0x1D426, 'M', u'm'), - (0x1D427, 'M', u'n'), - (0x1D428, 'M', u'o'), - (0x1D429, 'M', u'p'), - (0x1D42A, 'M', u'q'), - (0x1D42B, 'M', u'r'), - (0x1D42C, 'M', u's'), - (0x1D42D, 'M', u't'), - (0x1D42E, 'M', u'u'), - (0x1D42F, 'M', u'v'), - (0x1D430, 'M', u'w'), - (0x1D431, 'M', u'x'), - (0x1D432, 'M', u'y'), - (0x1D433, 'M', u'z'), - (0x1D434, 'M', u'a'), - (0x1D435, 'M', u'b'), - (0x1D436, 'M', u'c'), - (0x1D437, 'M', u'd'), - (0x1D438, 'M', u'e'), - (0x1D439, 'M', u'f'), - (0x1D43A, 'M', u'g'), - (0x1D43B, 'M', u'h'), - (0x1D43C, 'M', u'i'), - (0x1D43D, 'M', u'j'), - (0x1D43E, 'M', u'k'), - (0x1D43F, 'M', u'l'), - (0x1D440, 'M', u'm'), - (0x1D441, 'M', u'n'), - (0x1D442, 'M', u'o'), - (0x1D443, 'M', u'p'), - (0x1D444, 'M', u'q'), - (0x1D445, 'M', u'r'), - (0x1D446, 'M', u's'), - (0x1D447, 'M', u't'), - (0x1D448, 'M', u'u'), - (0x1D449, 'M', u'v'), - (0x1D44A, 'M', u'w'), - (0x1D44B, 'M', u'x'), - (0x1D44C, 'M', u'y'), - (0x1D44D, 'M', u'z'), - (0x1D44E, 'M', u'a'), - (0x1D44F, 'M', u'b'), - (0x1D450, 'M', u'c'), - (0x1D451, 'M', u'd'), + (0x1D400, 'M', 'a'), + (0x1D401, 'M', 'b'), + (0x1D402, 'M', 'c'), + (0x1D403, 'M', 'd'), + (0x1D404, 'M', 'e'), + (0x1D405, 'M', 'f'), + (0x1D406, 'M', 'g'), + (0x1D407, 'M', 'h'), + (0x1D408, 'M', 'i'), + (0x1D409, 'M', 'j'), + (0x1D40A, 'M', 'k'), + (0x1D40B, 'M', 'l'), + (0x1D40C, 'M', 'm'), + (0x1D40D, 'M', 'n'), + (0x1D40E, 'M', 'o'), + (0x1D40F, 'M', 'p'), + (0x1D410, 'M', 'q'), + (0x1D411, 'M', 'r'), + (0x1D412, 'M', 's'), + (0x1D413, 'M', 't'), + (0x1D414, 'M', 'u'), + (0x1D415, 'M', 'v'), + (0x1D416, 'M', 'w'), + (0x1D417, 'M', 'x'), + (0x1D418, 'M', 'y'), + (0x1D419, 'M', 'z'), + (0x1D41A, 'M', 'a'), + (0x1D41B, 'M', 'b'), + (0x1D41C, 'M', 'c'), + (0x1D41D, 'M', 'd'), + (0x1D41E, 'M', 'e'), + (0x1D41F, 'M', 'f'), + (0x1D420, 'M', 'g'), ] -def _seg_60(): +def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D452, 'M', u'e'), - (0x1D453, 'M', u'f'), - (0x1D454, 'M', u'g'), + (0x1D421, 'M', 'h'), + (0x1D422, 'M', 'i'), + (0x1D423, 'M', 'j'), + (0x1D424, 'M', 'k'), + (0x1D425, 'M', 'l'), + (0x1D426, 'M', 'm'), + (0x1D427, 'M', 'n'), + (0x1D428, 'M', 'o'), + (0x1D429, 'M', 'p'), + (0x1D42A, 'M', 'q'), + (0x1D42B, 'M', 'r'), + (0x1D42C, 'M', 's'), + (0x1D42D, 'M', 't'), + (0x1D42E, 'M', 'u'), + (0x1D42F, 'M', 'v'), + (0x1D430, 'M', 'w'), + (0x1D431, 'M', 'x'), + (0x1D432, 'M', 'y'), + (0x1D433, 'M', 'z'), + (0x1D434, 'M', 'a'), + (0x1D435, 'M', 'b'), + (0x1D436, 'M', 'c'), + (0x1D437, 'M', 'd'), + (0x1D438, 'M', 'e'), + (0x1D439, 'M', 'f'), + (0x1D43A, 'M', 'g'), + (0x1D43B, 'M', 'h'), + (0x1D43C, 'M', 'i'), + (0x1D43D, 'M', 'j'), + (0x1D43E, 'M', 'k'), + (0x1D43F, 'M', 'l'), + (0x1D440, 'M', 'm'), + (0x1D441, 'M', 'n'), + (0x1D442, 'M', 'o'), + (0x1D443, 'M', 'p'), + (0x1D444, 'M', 'q'), + (0x1D445, 'M', 'r'), + (0x1D446, 'M', 's'), + (0x1D447, 'M', 't'), + (0x1D448, 'M', 'u'), + (0x1D449, 'M', 'v'), + (0x1D44A, 'M', 'w'), + (0x1D44B, 'M', 'x'), + (0x1D44C, 'M', 'y'), + (0x1D44D, 'M', 'z'), + (0x1D44E, 'M', 'a'), + (0x1D44F, 'M', 'b'), + (0x1D450, 'M', 'c'), + (0x1D451, 'M', 'd'), + (0x1D452, 'M', 'e'), + (0x1D453, 'M', 'f'), + (0x1D454, 'M', 'g'), (0x1D455, 'X'), - (0x1D456, 'M', u'i'), - (0x1D457, 'M', u'j'), - (0x1D458, 'M', u'k'), - (0x1D459, 'M', u'l'), - (0x1D45A, 'M', u'm'), - (0x1D45B, 'M', u'n'), - (0x1D45C, 'M', u'o'), - (0x1D45D, 'M', u'p'), - (0x1D45E, 'M', u'q'), - (0x1D45F, 'M', u'r'), - (0x1D460, 'M', u's'), - (0x1D461, 'M', u't'), - (0x1D462, 'M', u'u'), - (0x1D463, 'M', u'v'), - (0x1D464, 'M', u'w'), - (0x1D465, 'M', u'x'), - (0x1D466, 'M', u'y'), - (0x1D467, 'M', u'z'), - (0x1D468, 'M', u'a'), - (0x1D469, 'M', u'b'), - (0x1D46A, 'M', u'c'), - (0x1D46B, 'M', u'd'), - (0x1D46C, 'M', u'e'), - (0x1D46D, 'M', u'f'), - (0x1D46E, 'M', u'g'), - (0x1D46F, 'M', u'h'), - (0x1D470, 'M', u'i'), - (0x1D471, 'M', u'j'), - (0x1D472, 'M', u'k'), - (0x1D473, 'M', u'l'), - (0x1D474, 'M', u'm'), - (0x1D475, 'M', u'n'), - (0x1D476, 'M', u'o'), - (0x1D477, 'M', u'p'), - (0x1D478, 'M', u'q'), - (0x1D479, 'M', u'r'), - (0x1D47A, 'M', u's'), - (0x1D47B, 'M', u't'), - (0x1D47C, 'M', u'u'), - (0x1D47D, 'M', u'v'), - (0x1D47E, 'M', u'w'), - (0x1D47F, 'M', u'x'), - (0x1D480, 'M', u'y'), - (0x1D481, 'M', u'z'), - (0x1D482, 'M', u'a'), - (0x1D483, 'M', u'b'), - (0x1D484, 'M', u'c'), - (0x1D485, 'M', u'd'), - (0x1D486, 'M', u'e'), - (0x1D487, 'M', u'f'), - (0x1D488, 'M', u'g'), - (0x1D489, 'M', u'h'), - (0x1D48A, 'M', u'i'), - (0x1D48B, 'M', u'j'), - (0x1D48C, 'M', u'k'), - (0x1D48D, 'M', u'l'), - (0x1D48E, 'M', u'm'), - (0x1D48F, 'M', u'n'), - (0x1D490, 'M', u'o'), - (0x1D491, 'M', u'p'), - (0x1D492, 'M', u'q'), - (0x1D493, 'M', u'r'), - (0x1D494, 'M', u's'), - (0x1D495, 'M', u't'), - (0x1D496, 'M', u'u'), - (0x1D497, 'M', u'v'), - (0x1D498, 'M', u'w'), - (0x1D499, 'M', u'x'), - (0x1D49A, 'M', u'y'), - (0x1D49B, 'M', u'z'), - (0x1D49C, 'M', u'a'), + (0x1D456, 'M', 'i'), + (0x1D457, 'M', 'j'), + (0x1D458, 'M', 'k'), + (0x1D459, 'M', 'l'), + (0x1D45A, 'M', 'm'), + (0x1D45B, 'M', 'n'), + (0x1D45C, 'M', 'o'), + (0x1D45D, 'M', 'p'), + (0x1D45E, 'M', 'q'), + (0x1D45F, 'M', 'r'), + (0x1D460, 'M', 's'), + (0x1D461, 'M', 't'), + (0x1D462, 'M', 'u'), + (0x1D463, 'M', 'v'), + (0x1D464, 'M', 'w'), + (0x1D465, 'M', 'x'), + (0x1D466, 'M', 'y'), + (0x1D467, 'M', 'z'), + (0x1D468, 'M', 'a'), + (0x1D469, 'M', 'b'), + (0x1D46A, 'M', 'c'), + (0x1D46B, 'M', 'd'), + (0x1D46C, 'M', 'e'), + (0x1D46D, 'M', 'f'), + (0x1D46E, 'M', 'g'), + (0x1D46F, 'M', 'h'), + (0x1D470, 'M', 'i'), + (0x1D471, 'M', 'j'), + (0x1D472, 'M', 'k'), + (0x1D473, 'M', 'l'), + (0x1D474, 'M', 'm'), + (0x1D475, 'M', 'n'), + (0x1D476, 'M', 'o'), + (0x1D477, 'M', 'p'), + (0x1D478, 'M', 'q'), + (0x1D479, 'M', 'r'), + (0x1D47A, 'M', 's'), + (0x1D47B, 'M', 't'), + (0x1D47C, 'M', 'u'), + (0x1D47D, 'M', 'v'), + (0x1D47E, 'M', 'w'), + (0x1D47F, 'M', 'x'), + (0x1D480, 'M', 'y'), + (0x1D481, 'M', 'z'), + (0x1D482, 'M', 'a'), + (0x1D483, 'M', 'b'), + (0x1D484, 'M', 'c'), + ] + +def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D485, 'M', 'd'), + (0x1D486, 'M', 'e'), + (0x1D487, 'M', 'f'), + (0x1D488, 'M', 'g'), + (0x1D489, 'M', 'h'), + (0x1D48A, 'M', 'i'), + (0x1D48B, 'M', 'j'), + (0x1D48C, 'M', 'k'), + (0x1D48D, 'M', 'l'), + (0x1D48E, 'M', 'm'), + (0x1D48F, 'M', 'n'), + (0x1D490, 'M', 'o'), + (0x1D491, 'M', 'p'), + (0x1D492, 'M', 'q'), + (0x1D493, 'M', 'r'), + (0x1D494, 'M', 's'), + (0x1D495, 'M', 't'), + (0x1D496, 'M', 'u'), + (0x1D497, 'M', 'v'), + (0x1D498, 'M', 'w'), + (0x1D499, 'M', 'x'), + (0x1D49A, 'M', 'y'), + (0x1D49B, 'M', 'z'), + (0x1D49C, 'M', 'a'), (0x1D49D, 'X'), - (0x1D49E, 'M', u'c'), - (0x1D49F, 'M', u'd'), + (0x1D49E, 'M', 'c'), + (0x1D49F, 'M', 'd'), (0x1D4A0, 'X'), - (0x1D4A2, 'M', u'g'), + (0x1D4A2, 'M', 'g'), (0x1D4A3, 'X'), - (0x1D4A5, 'M', u'j'), - (0x1D4A6, 'M', u'k'), + (0x1D4A5, 'M', 'j'), + (0x1D4A6, 'M', 'k'), (0x1D4A7, 'X'), - (0x1D4A9, 'M', u'n'), - (0x1D4AA, 'M', u'o'), - (0x1D4AB, 'M', u'p'), - (0x1D4AC, 'M', u'q'), + (0x1D4A9, 'M', 'n'), + (0x1D4AA, 'M', 'o'), + (0x1D4AB, 'M', 'p'), + (0x1D4AC, 'M', 'q'), (0x1D4AD, 'X'), - (0x1D4AE, 'M', u's'), - (0x1D4AF, 'M', u't'), - (0x1D4B0, 'M', u'u'), - (0x1D4B1, 'M', u'v'), - (0x1D4B2, 'M', u'w'), - (0x1D4B3, 'M', u'x'), - (0x1D4B4, 'M', u'y'), - (0x1D4B5, 'M', u'z'), - (0x1D4B6, 'M', u'a'), - (0x1D4B7, 'M', u'b'), - (0x1D4B8, 'M', u'c'), - ] - -def _seg_61(): - return [ - (0x1D4B9, 'M', u'd'), + (0x1D4AE, 'M', 's'), + (0x1D4AF, 'M', 't'), + (0x1D4B0, 'M', 'u'), + (0x1D4B1, 'M', 'v'), + (0x1D4B2, 'M', 'w'), + (0x1D4B3, 'M', 'x'), + (0x1D4B4, 'M', 'y'), + (0x1D4B5, 'M', 'z'), + (0x1D4B6, 'M', 'a'), + (0x1D4B7, 'M', 'b'), + (0x1D4B8, 'M', 'c'), + (0x1D4B9, 'M', 'd'), (0x1D4BA, 'X'), - (0x1D4BB, 'M', u'f'), + (0x1D4BB, 'M', 'f'), (0x1D4BC, 'X'), - (0x1D4BD, 'M', u'h'), - (0x1D4BE, 'M', u'i'), - (0x1D4BF, 'M', u'j'), - (0x1D4C0, 'M', u'k'), - (0x1D4C1, 'M', u'l'), - (0x1D4C2, 'M', u'm'), - (0x1D4C3, 'M', u'n'), + (0x1D4BD, 'M', 'h'), + (0x1D4BE, 'M', 'i'), + (0x1D4BF, 'M', 'j'), + (0x1D4C0, 'M', 'k'), + (0x1D4C1, 'M', 'l'), + (0x1D4C2, 'M', 'm'), + (0x1D4C3, 'M', 'n'), (0x1D4C4, 'X'), - (0x1D4C5, 'M', u'p'), - (0x1D4C6, 'M', u'q'), - (0x1D4C7, 'M', u'r'), - (0x1D4C8, 'M', u's'), - (0x1D4C9, 'M', u't'), - (0x1D4CA, 'M', u'u'), - (0x1D4CB, 'M', u'v'), - (0x1D4CC, 'M', u'w'), - (0x1D4CD, 'M', u'x'), - (0x1D4CE, 'M', u'y'), - (0x1D4CF, 'M', u'z'), - (0x1D4D0, 'M', u'a'), - (0x1D4D1, 'M', u'b'), - (0x1D4D2, 'M', u'c'), - (0x1D4D3, 'M', u'd'), - (0x1D4D4, 'M', u'e'), - (0x1D4D5, 'M', u'f'), - (0x1D4D6, 'M', u'g'), - (0x1D4D7, 'M', u'h'), - (0x1D4D8, 'M', u'i'), - (0x1D4D9, 'M', u'j'), - (0x1D4DA, 'M', u'k'), - (0x1D4DB, 'M', u'l'), - (0x1D4DC, 'M', u'm'), - (0x1D4DD, 'M', u'n'), - (0x1D4DE, 'M', u'o'), - (0x1D4DF, 'M', u'p'), - (0x1D4E0, 'M', u'q'), - (0x1D4E1, 'M', u'r'), - (0x1D4E2, 'M', u's'), - (0x1D4E3, 'M', u't'), - (0x1D4E4, 'M', u'u'), - (0x1D4E5, 'M', u'v'), - (0x1D4E6, 'M', u'w'), - (0x1D4E7, 'M', u'x'), - (0x1D4E8, 'M', u'y'), - (0x1D4E9, 'M', u'z'), - (0x1D4EA, 'M', u'a'), - (0x1D4EB, 'M', u'b'), - (0x1D4EC, 'M', u'c'), - (0x1D4ED, 'M', u'd'), - (0x1D4EE, 'M', u'e'), - (0x1D4EF, 'M', u'f'), - (0x1D4F0, 'M', u'g'), - (0x1D4F1, 'M', u'h'), - (0x1D4F2, 'M', u'i'), - (0x1D4F3, 'M', u'j'), - (0x1D4F4, 'M', u'k'), - (0x1D4F5, 'M', u'l'), - (0x1D4F6, 'M', u'm'), - (0x1D4F7, 'M', u'n'), - (0x1D4F8, 'M', u'o'), - (0x1D4F9, 'M', u'p'), - (0x1D4FA, 'M', u'q'), - (0x1D4FB, 'M', u'r'), - (0x1D4FC, 'M', u's'), - (0x1D4FD, 'M', u't'), - (0x1D4FE, 'M', u'u'), - (0x1D4FF, 'M', u'v'), - (0x1D500, 'M', u'w'), - (0x1D501, 'M', u'x'), - (0x1D502, 'M', u'y'), - (0x1D503, 'M', u'z'), - (0x1D504, 'M', u'a'), - (0x1D505, 'M', u'b'), + (0x1D4C5, 'M', 'p'), + (0x1D4C6, 'M', 'q'), + (0x1D4C7, 'M', 'r'), + (0x1D4C8, 'M', 's'), + (0x1D4C9, 'M', 't'), + (0x1D4CA, 'M', 'u'), + (0x1D4CB, 'M', 'v'), + (0x1D4CC, 'M', 'w'), + (0x1D4CD, 'M', 'x'), + (0x1D4CE, 'M', 'y'), + (0x1D4CF, 'M', 'z'), + (0x1D4D0, 'M', 'a'), + (0x1D4D1, 'M', 'b'), + (0x1D4D2, 'M', 'c'), + (0x1D4D3, 'M', 'd'), + (0x1D4D4, 'M', 'e'), + (0x1D4D5, 'M', 'f'), + (0x1D4D6, 'M', 'g'), + (0x1D4D7, 'M', 'h'), + (0x1D4D8, 'M', 'i'), + (0x1D4D9, 'M', 'j'), + (0x1D4DA, 'M', 'k'), + (0x1D4DB, 'M', 'l'), + (0x1D4DC, 'M', 'm'), + (0x1D4DD, 'M', 'n'), + (0x1D4DE, 'M', 'o'), + (0x1D4DF, 'M', 'p'), + (0x1D4E0, 'M', 'q'), + (0x1D4E1, 'M', 'r'), + (0x1D4E2, 'M', 's'), + (0x1D4E3, 'M', 't'), + (0x1D4E4, 'M', 'u'), + (0x1D4E5, 'M', 'v'), + (0x1D4E6, 'M', 'w'), + (0x1D4E7, 'M', 'x'), + (0x1D4E8, 'M', 'y'), + (0x1D4E9, 'M', 'z'), + (0x1D4EA, 'M', 'a'), + (0x1D4EB, 'M', 'b'), + ] + +def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D4EC, 'M', 'c'), + (0x1D4ED, 'M', 'd'), + (0x1D4EE, 'M', 'e'), + (0x1D4EF, 'M', 'f'), + (0x1D4F0, 'M', 'g'), + (0x1D4F1, 'M', 'h'), + (0x1D4F2, 'M', 'i'), + (0x1D4F3, 'M', 'j'), + (0x1D4F4, 'M', 'k'), + (0x1D4F5, 'M', 'l'), + (0x1D4F6, 'M', 'm'), + (0x1D4F7, 'M', 'n'), + (0x1D4F8, 'M', 'o'), + (0x1D4F9, 'M', 'p'), + (0x1D4FA, 'M', 'q'), + (0x1D4FB, 'M', 'r'), + (0x1D4FC, 'M', 's'), + (0x1D4FD, 'M', 't'), + (0x1D4FE, 'M', 'u'), + (0x1D4FF, 'M', 'v'), + (0x1D500, 'M', 'w'), + (0x1D501, 'M', 'x'), + (0x1D502, 'M', 'y'), + (0x1D503, 'M', 'z'), + (0x1D504, 'M', 'a'), + (0x1D505, 'M', 'b'), (0x1D506, 'X'), - (0x1D507, 'M', u'd'), - (0x1D508, 'M', u'e'), - (0x1D509, 'M', u'f'), - (0x1D50A, 'M', u'g'), + (0x1D507, 'M', 'd'), + (0x1D508, 'M', 'e'), + (0x1D509, 'M', 'f'), + (0x1D50A, 'M', 'g'), (0x1D50B, 'X'), - (0x1D50D, 'M', u'j'), - (0x1D50E, 'M', u'k'), - (0x1D50F, 'M', u'l'), - (0x1D510, 'M', u'm'), - (0x1D511, 'M', u'n'), - (0x1D512, 'M', u'o'), - (0x1D513, 'M', u'p'), - (0x1D514, 'M', u'q'), + (0x1D50D, 'M', 'j'), + (0x1D50E, 'M', 'k'), + (0x1D50F, 'M', 'l'), + (0x1D510, 'M', 'm'), + (0x1D511, 'M', 'n'), + (0x1D512, 'M', 'o'), + (0x1D513, 'M', 'p'), + (0x1D514, 'M', 'q'), (0x1D515, 'X'), - (0x1D516, 'M', u's'), - (0x1D517, 'M', u't'), - (0x1D518, 'M', u'u'), - (0x1D519, 'M', u'v'), - (0x1D51A, 'M', u'w'), - (0x1D51B, 'M', u'x'), - (0x1D51C, 'M', u'y'), + (0x1D516, 'M', 's'), + (0x1D517, 'M', 't'), + (0x1D518, 'M', 'u'), + (0x1D519, 'M', 'v'), + (0x1D51A, 'M', 'w'), + (0x1D51B, 'M', 'x'), + (0x1D51C, 'M', 'y'), (0x1D51D, 'X'), - ] - -def _seg_62(): - return [ - (0x1D51E, 'M', u'a'), - (0x1D51F, 'M', u'b'), - (0x1D520, 'M', u'c'), - (0x1D521, 'M', u'd'), - (0x1D522, 'M', u'e'), - (0x1D523, 'M', u'f'), - (0x1D524, 'M', u'g'), - (0x1D525, 'M', u'h'), - (0x1D526, 'M', u'i'), - (0x1D527, 'M', u'j'), - (0x1D528, 'M', u'k'), - (0x1D529, 'M', u'l'), - (0x1D52A, 'M', u'm'), - (0x1D52B, 'M', u'n'), - (0x1D52C, 'M', u'o'), - (0x1D52D, 'M', u'p'), - (0x1D52E, 'M', u'q'), - (0x1D52F, 'M', u'r'), - (0x1D530, 'M', u's'), - (0x1D531, 'M', u't'), - (0x1D532, 'M', u'u'), - (0x1D533, 'M', u'v'), - (0x1D534, 'M', u'w'), - (0x1D535, 'M', u'x'), - (0x1D536, 'M', u'y'), - (0x1D537, 'M', u'z'), - (0x1D538, 'M', u'a'), - (0x1D539, 'M', u'b'), + (0x1D51E, 'M', 'a'), + (0x1D51F, 'M', 'b'), + (0x1D520, 'M', 'c'), + (0x1D521, 'M', 'd'), + (0x1D522, 'M', 'e'), + (0x1D523, 'M', 'f'), + (0x1D524, 'M', 'g'), + (0x1D525, 'M', 'h'), + (0x1D526, 'M', 'i'), + (0x1D527, 'M', 'j'), + (0x1D528, 'M', 'k'), + (0x1D529, 'M', 'l'), + (0x1D52A, 'M', 'm'), + (0x1D52B, 'M', 'n'), + (0x1D52C, 'M', 'o'), + (0x1D52D, 'M', 'p'), + (0x1D52E, 'M', 'q'), + (0x1D52F, 'M', 'r'), + (0x1D530, 'M', 's'), + (0x1D531, 'M', 't'), + (0x1D532, 'M', 'u'), + (0x1D533, 'M', 'v'), + (0x1D534, 'M', 'w'), + (0x1D535, 'M', 'x'), + (0x1D536, 'M', 'y'), + (0x1D537, 'M', 'z'), + (0x1D538, 'M', 'a'), + (0x1D539, 'M', 'b'), (0x1D53A, 'X'), - (0x1D53B, 'M', u'd'), - (0x1D53C, 'M', u'e'), - (0x1D53D, 'M', u'f'), - (0x1D53E, 'M', u'g'), + (0x1D53B, 'M', 'd'), + (0x1D53C, 'M', 'e'), + (0x1D53D, 'M', 'f'), + (0x1D53E, 'M', 'g'), (0x1D53F, 'X'), - (0x1D540, 'M', u'i'), - (0x1D541, 'M', u'j'), - (0x1D542, 'M', u'k'), - (0x1D543, 'M', u'l'), - (0x1D544, 'M', u'm'), + (0x1D540, 'M', 'i'), + (0x1D541, 'M', 'j'), + (0x1D542, 'M', 'k'), + (0x1D543, 'M', 'l'), + (0x1D544, 'M', 'm'), (0x1D545, 'X'), - (0x1D546, 'M', u'o'), + (0x1D546, 'M', 'o'), (0x1D547, 'X'), - (0x1D54A, 'M', u's'), - (0x1D54B, 'M', u't'), - (0x1D54C, 'M', u'u'), - (0x1D54D, 'M', u'v'), - (0x1D54E, 'M', u'w'), - (0x1D54F, 'M', u'x'), - (0x1D550, 'M', u'y'), + (0x1D54A, 'M', 's'), + (0x1D54B, 'M', 't'), + (0x1D54C, 'M', 'u'), + (0x1D54D, 'M', 'v'), + (0x1D54E, 'M', 'w'), + (0x1D54F, 'M', 'x'), + (0x1D550, 'M', 'y'), (0x1D551, 'X'), - (0x1D552, 'M', u'a'), - (0x1D553, 'M', u'b'), - (0x1D554, 'M', u'c'), - (0x1D555, 'M', u'd'), - (0x1D556, 'M', u'e'), - (0x1D557, 'M', u'f'), - (0x1D558, 'M', u'g'), - (0x1D559, 'M', u'h'), - (0x1D55A, 'M', u'i'), - (0x1D55B, 'M', u'j'), - (0x1D55C, 'M', u'k'), - (0x1D55D, 'M', u'l'), - (0x1D55E, 'M', u'm'), - (0x1D55F, 'M', u'n'), - (0x1D560, 'M', u'o'), - (0x1D561, 'M', u'p'), - (0x1D562, 'M', u'q'), - (0x1D563, 'M', u'r'), - (0x1D564, 'M', u's'), - (0x1D565, 'M', u't'), - (0x1D566, 'M', u'u'), - (0x1D567, 'M', u'v'), - (0x1D568, 'M', u'w'), - (0x1D569, 'M', u'x'), - (0x1D56A, 'M', u'y'), - (0x1D56B, 'M', u'z'), - (0x1D56C, 'M', u'a'), - (0x1D56D, 'M', u'b'), - (0x1D56E, 'M', u'c'), - (0x1D56F, 'M', u'd'), - (0x1D570, 'M', u'e'), - (0x1D571, 'M', u'f'), - (0x1D572, 'M', u'g'), - (0x1D573, 'M', u'h'), - (0x1D574, 'M', u'i'), - (0x1D575, 'M', u'j'), - (0x1D576, 'M', u'k'), - (0x1D577, 'M', u'l'), - (0x1D578, 'M', u'm'), - (0x1D579, 'M', u'n'), - (0x1D57A, 'M', u'o'), - (0x1D57B, 'M', u'p'), - (0x1D57C, 'M', u'q'), - (0x1D57D, 'M', u'r'), - (0x1D57E, 'M', u's'), - (0x1D57F, 'M', u't'), - (0x1D580, 'M', u'u'), - (0x1D581, 'M', u'v'), - (0x1D582, 'M', u'w'), - (0x1D583, 'M', u'x'), + (0x1D552, 'M', 'a'), ] -def _seg_63(): +def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D584, 'M', u'y'), - (0x1D585, 'M', u'z'), - (0x1D586, 'M', u'a'), - (0x1D587, 'M', u'b'), - (0x1D588, 'M', u'c'), - (0x1D589, 'M', u'd'), - (0x1D58A, 'M', u'e'), - (0x1D58B, 'M', u'f'), - (0x1D58C, 'M', u'g'), - (0x1D58D, 'M', u'h'), - (0x1D58E, 'M', u'i'), - (0x1D58F, 'M', u'j'), - (0x1D590, 'M', u'k'), - (0x1D591, 'M', u'l'), - (0x1D592, 'M', u'm'), - (0x1D593, 'M', u'n'), - (0x1D594, 'M', u'o'), - (0x1D595, 'M', u'p'), - (0x1D596, 'M', u'q'), - (0x1D597, 'M', u'r'), - (0x1D598, 'M', u's'), - (0x1D599, 'M', u't'), - (0x1D59A, 'M', u'u'), - (0x1D59B, 'M', u'v'), - (0x1D59C, 'M', u'w'), - (0x1D59D, 'M', u'x'), - (0x1D59E, 'M', u'y'), - (0x1D59F, 'M', u'z'), - (0x1D5A0, 'M', u'a'), - (0x1D5A1, 'M', u'b'), - (0x1D5A2, 'M', u'c'), - (0x1D5A3, 'M', u'd'), - (0x1D5A4, 'M', u'e'), - (0x1D5A5, 'M', u'f'), - (0x1D5A6, 'M', u'g'), - (0x1D5A7, 'M', u'h'), - (0x1D5A8, 'M', u'i'), - (0x1D5A9, 'M', u'j'), - (0x1D5AA, 'M', u'k'), - (0x1D5AB, 'M', u'l'), - (0x1D5AC, 'M', u'm'), - (0x1D5AD, 'M', u'n'), - (0x1D5AE, 'M', u'o'), - (0x1D5AF, 'M', u'p'), - (0x1D5B0, 'M', u'q'), - (0x1D5B1, 'M', u'r'), - (0x1D5B2, 'M', u's'), - (0x1D5B3, 'M', u't'), - (0x1D5B4, 'M', u'u'), - (0x1D5B5, 'M', u'v'), - (0x1D5B6, 'M', u'w'), - (0x1D5B7, 'M', u'x'), - (0x1D5B8, 'M', u'y'), - (0x1D5B9, 'M', u'z'), - (0x1D5BA, 'M', u'a'), - (0x1D5BB, 'M', u'b'), - (0x1D5BC, 'M', u'c'), - (0x1D5BD, 'M', u'd'), - (0x1D5BE, 'M', u'e'), - (0x1D5BF, 'M', u'f'), - (0x1D5C0, 'M', u'g'), - (0x1D5C1, 'M', u'h'), - (0x1D5C2, 'M', u'i'), - (0x1D5C3, 'M', u'j'), - (0x1D5C4, 'M', u'k'), - (0x1D5C5, 'M', u'l'), - (0x1D5C6, 'M', u'm'), - (0x1D5C7, 'M', u'n'), - (0x1D5C8, 'M', u'o'), - (0x1D5C9, 'M', u'p'), - (0x1D5CA, 'M', u'q'), - (0x1D5CB, 'M', u'r'), - (0x1D5CC, 'M', u's'), - (0x1D5CD, 'M', u't'), - (0x1D5CE, 'M', u'u'), - (0x1D5CF, 'M', u'v'), - (0x1D5D0, 'M', u'w'), - (0x1D5D1, 'M', u'x'), - (0x1D5D2, 'M', u'y'), - (0x1D5D3, 'M', u'z'), - (0x1D5D4, 'M', u'a'), - (0x1D5D5, 'M', u'b'), - (0x1D5D6, 'M', u'c'), - (0x1D5D7, 'M', u'd'), - (0x1D5D8, 'M', u'e'), - (0x1D5D9, 'M', u'f'), - (0x1D5DA, 'M', u'g'), - (0x1D5DB, 'M', u'h'), - (0x1D5DC, 'M', u'i'), - (0x1D5DD, 'M', u'j'), - (0x1D5DE, 'M', u'k'), - (0x1D5DF, 'M', u'l'), - (0x1D5E0, 'M', u'm'), - (0x1D5E1, 'M', u'n'), - (0x1D5E2, 'M', u'o'), - (0x1D5E3, 'M', u'p'), - (0x1D5E4, 'M', u'q'), - (0x1D5E5, 'M', u'r'), - (0x1D5E6, 'M', u's'), - (0x1D5E7, 'M', u't'), + (0x1D553, 'M', 'b'), + (0x1D554, 'M', 'c'), + (0x1D555, 'M', 'd'), + (0x1D556, 'M', 'e'), + (0x1D557, 'M', 'f'), + (0x1D558, 'M', 'g'), + (0x1D559, 'M', 'h'), + (0x1D55A, 'M', 'i'), + (0x1D55B, 'M', 'j'), + (0x1D55C, 'M', 'k'), + (0x1D55D, 'M', 'l'), + (0x1D55E, 'M', 'm'), + (0x1D55F, 'M', 'n'), + (0x1D560, 'M', 'o'), + (0x1D561, 'M', 'p'), + (0x1D562, 'M', 'q'), + (0x1D563, 'M', 'r'), + (0x1D564, 'M', 's'), + (0x1D565, 'M', 't'), + (0x1D566, 'M', 'u'), + (0x1D567, 'M', 'v'), + (0x1D568, 'M', 'w'), + (0x1D569, 'M', 'x'), + (0x1D56A, 'M', 'y'), + (0x1D56B, 'M', 'z'), + (0x1D56C, 'M', 'a'), + (0x1D56D, 'M', 'b'), + (0x1D56E, 'M', 'c'), + (0x1D56F, 'M', 'd'), + (0x1D570, 'M', 'e'), + (0x1D571, 'M', 'f'), + (0x1D572, 'M', 'g'), + (0x1D573, 'M', 'h'), + (0x1D574, 'M', 'i'), + (0x1D575, 'M', 'j'), + (0x1D576, 'M', 'k'), + (0x1D577, 'M', 'l'), + (0x1D578, 'M', 'm'), + (0x1D579, 'M', 'n'), + (0x1D57A, 'M', 'o'), + (0x1D57B, 'M', 'p'), + (0x1D57C, 'M', 'q'), + (0x1D57D, 'M', 'r'), + (0x1D57E, 'M', 's'), + (0x1D57F, 'M', 't'), + (0x1D580, 'M', 'u'), + (0x1D581, 'M', 'v'), + (0x1D582, 'M', 'w'), + (0x1D583, 'M', 'x'), + (0x1D584, 'M', 'y'), + (0x1D585, 'M', 'z'), + (0x1D586, 'M', 'a'), + (0x1D587, 'M', 'b'), + (0x1D588, 'M', 'c'), + (0x1D589, 'M', 'd'), + (0x1D58A, 'M', 'e'), + (0x1D58B, 'M', 'f'), + (0x1D58C, 'M', 'g'), + (0x1D58D, 'M', 'h'), + (0x1D58E, 'M', 'i'), + (0x1D58F, 'M', 'j'), + (0x1D590, 'M', 'k'), + (0x1D591, 'M', 'l'), + (0x1D592, 'M', 'm'), + (0x1D593, 'M', 'n'), + (0x1D594, 'M', 'o'), + (0x1D595, 'M', 'p'), + (0x1D596, 'M', 'q'), + (0x1D597, 'M', 'r'), + (0x1D598, 'M', 's'), + (0x1D599, 'M', 't'), + (0x1D59A, 'M', 'u'), + (0x1D59B, 'M', 'v'), + (0x1D59C, 'M', 'w'), + (0x1D59D, 'M', 'x'), + (0x1D59E, 'M', 'y'), + (0x1D59F, 'M', 'z'), + (0x1D5A0, 'M', 'a'), + (0x1D5A1, 'M', 'b'), + (0x1D5A2, 'M', 'c'), + (0x1D5A3, 'M', 'd'), + (0x1D5A4, 'M', 'e'), + (0x1D5A5, 'M', 'f'), + (0x1D5A6, 'M', 'g'), + (0x1D5A7, 'M', 'h'), + (0x1D5A8, 'M', 'i'), + (0x1D5A9, 'M', 'j'), + (0x1D5AA, 'M', 'k'), + (0x1D5AB, 'M', 'l'), + (0x1D5AC, 'M', 'm'), + (0x1D5AD, 'M', 'n'), + (0x1D5AE, 'M', 'o'), + (0x1D5AF, 'M', 'p'), + (0x1D5B0, 'M', 'q'), + (0x1D5B1, 'M', 'r'), + (0x1D5B2, 'M', 's'), + (0x1D5B3, 'M', 't'), + (0x1D5B4, 'M', 'u'), + (0x1D5B5, 'M', 'v'), + (0x1D5B6, 'M', 'w'), ] -def _seg_64(): +def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D5E8, 'M', u'u'), - (0x1D5E9, 'M', u'v'), - (0x1D5EA, 'M', u'w'), - (0x1D5EB, 'M', u'x'), - (0x1D5EC, 'M', u'y'), - (0x1D5ED, 'M', u'z'), - (0x1D5EE, 'M', u'a'), - (0x1D5EF, 'M', u'b'), - (0x1D5F0, 'M', u'c'), - (0x1D5F1, 'M', u'd'), - (0x1D5F2, 'M', u'e'), - (0x1D5F3, 'M', u'f'), - (0x1D5F4, 'M', u'g'), - (0x1D5F5, 'M', u'h'), - (0x1D5F6, 'M', u'i'), - (0x1D5F7, 'M', u'j'), - (0x1D5F8, 'M', u'k'), - (0x1D5F9, 'M', u'l'), - (0x1D5FA, 'M', u'm'), - (0x1D5FB, 'M', u'n'), - (0x1D5FC, 'M', u'o'), - (0x1D5FD, 'M', u'p'), - (0x1D5FE, 'M', u'q'), - (0x1D5FF, 'M', u'r'), - (0x1D600, 'M', u's'), - (0x1D601, 'M', u't'), - (0x1D602, 'M', u'u'), - (0x1D603, 'M', u'v'), - (0x1D604, 'M', u'w'), - (0x1D605, 'M', u'x'), - (0x1D606, 'M', u'y'), - (0x1D607, 'M', u'z'), - (0x1D608, 'M', u'a'), - (0x1D609, 'M', u'b'), - (0x1D60A, 'M', u'c'), - (0x1D60B, 'M', u'd'), - (0x1D60C, 'M', u'e'), - (0x1D60D, 'M', u'f'), - (0x1D60E, 'M', u'g'), - (0x1D60F, 'M', u'h'), - (0x1D610, 'M', u'i'), - (0x1D611, 'M', u'j'), - (0x1D612, 'M', u'k'), - (0x1D613, 'M', u'l'), - (0x1D614, 'M', u'm'), - (0x1D615, 'M', u'n'), - (0x1D616, 'M', u'o'), - (0x1D617, 'M', u'p'), - (0x1D618, 'M', u'q'), - (0x1D619, 'M', u'r'), - (0x1D61A, 'M', u's'), - (0x1D61B, 'M', u't'), - (0x1D61C, 'M', u'u'), - (0x1D61D, 'M', u'v'), - (0x1D61E, 'M', u'w'), - (0x1D61F, 'M', u'x'), - (0x1D620, 'M', u'y'), - (0x1D621, 'M', u'z'), - (0x1D622, 'M', u'a'), - (0x1D623, 'M', u'b'), - (0x1D624, 'M', u'c'), - (0x1D625, 'M', u'd'), - (0x1D626, 'M', u'e'), - (0x1D627, 'M', u'f'), - (0x1D628, 'M', u'g'), - (0x1D629, 'M', u'h'), - (0x1D62A, 'M', u'i'), - (0x1D62B, 'M', u'j'), - (0x1D62C, 'M', u'k'), - (0x1D62D, 'M', u'l'), - (0x1D62E, 'M', u'm'), - (0x1D62F, 'M', u'n'), - (0x1D630, 'M', u'o'), - (0x1D631, 'M', u'p'), - (0x1D632, 'M', u'q'), - (0x1D633, 'M', u'r'), - (0x1D634, 'M', u's'), - (0x1D635, 'M', u't'), - (0x1D636, 'M', u'u'), - (0x1D637, 'M', u'v'), - (0x1D638, 'M', u'w'), - (0x1D639, 'M', u'x'), - (0x1D63A, 'M', u'y'), - (0x1D63B, 'M', u'z'), - (0x1D63C, 'M', u'a'), - (0x1D63D, 'M', u'b'), - (0x1D63E, 'M', u'c'), - (0x1D63F, 'M', u'd'), - (0x1D640, 'M', u'e'), - (0x1D641, 'M', u'f'), - (0x1D642, 'M', u'g'), - (0x1D643, 'M', u'h'), - (0x1D644, 'M', u'i'), - (0x1D645, 'M', u'j'), - (0x1D646, 'M', u'k'), - (0x1D647, 'M', u'l'), - (0x1D648, 'M', u'm'), - (0x1D649, 'M', u'n'), - (0x1D64A, 'M', u'o'), - (0x1D64B, 'M', u'p'), + (0x1D5B7, 'M', 'x'), + (0x1D5B8, 'M', 'y'), + (0x1D5B9, 'M', 'z'), + (0x1D5BA, 'M', 'a'), + (0x1D5BB, 'M', 'b'), + (0x1D5BC, 'M', 'c'), + (0x1D5BD, 'M', 'd'), + (0x1D5BE, 'M', 'e'), + (0x1D5BF, 'M', 'f'), + (0x1D5C0, 'M', 'g'), + (0x1D5C1, 'M', 'h'), + (0x1D5C2, 'M', 'i'), + (0x1D5C3, 'M', 'j'), + (0x1D5C4, 'M', 'k'), + (0x1D5C5, 'M', 'l'), + (0x1D5C6, 'M', 'm'), + (0x1D5C7, 'M', 'n'), + (0x1D5C8, 'M', 'o'), + (0x1D5C9, 'M', 'p'), + (0x1D5CA, 'M', 'q'), + (0x1D5CB, 'M', 'r'), + (0x1D5CC, 'M', 's'), + (0x1D5CD, 'M', 't'), + (0x1D5CE, 'M', 'u'), + (0x1D5CF, 'M', 'v'), + (0x1D5D0, 'M', 'w'), + (0x1D5D1, 'M', 'x'), + (0x1D5D2, 'M', 'y'), + (0x1D5D3, 'M', 'z'), + (0x1D5D4, 'M', 'a'), + (0x1D5D5, 'M', 'b'), + (0x1D5D6, 'M', 'c'), + (0x1D5D7, 'M', 'd'), + (0x1D5D8, 'M', 'e'), + (0x1D5D9, 'M', 'f'), + (0x1D5DA, 'M', 'g'), + (0x1D5DB, 'M', 'h'), + (0x1D5DC, 'M', 'i'), + (0x1D5DD, 'M', 'j'), + (0x1D5DE, 'M', 'k'), + (0x1D5DF, 'M', 'l'), + (0x1D5E0, 'M', 'm'), + (0x1D5E1, 'M', 'n'), + (0x1D5E2, 'M', 'o'), + (0x1D5E3, 'M', 'p'), + (0x1D5E4, 'M', 'q'), + (0x1D5E5, 'M', 'r'), + (0x1D5E6, 'M', 's'), + (0x1D5E7, 'M', 't'), + (0x1D5E8, 'M', 'u'), + (0x1D5E9, 'M', 'v'), + (0x1D5EA, 'M', 'w'), + (0x1D5EB, 'M', 'x'), + (0x1D5EC, 'M', 'y'), + (0x1D5ED, 'M', 'z'), + (0x1D5EE, 'M', 'a'), + (0x1D5EF, 'M', 'b'), + (0x1D5F0, 'M', 'c'), + (0x1D5F1, 'M', 'd'), + (0x1D5F2, 'M', 'e'), + (0x1D5F3, 'M', 'f'), + (0x1D5F4, 'M', 'g'), + (0x1D5F5, 'M', 'h'), + (0x1D5F6, 'M', 'i'), + (0x1D5F7, 'M', 'j'), + (0x1D5F8, 'M', 'k'), + (0x1D5F9, 'M', 'l'), + (0x1D5FA, 'M', 'm'), + (0x1D5FB, 'M', 'n'), + (0x1D5FC, 'M', 'o'), + (0x1D5FD, 'M', 'p'), + (0x1D5FE, 'M', 'q'), + (0x1D5FF, 'M', 'r'), + (0x1D600, 'M', 's'), + (0x1D601, 'M', 't'), + (0x1D602, 'M', 'u'), + (0x1D603, 'M', 'v'), + (0x1D604, 'M', 'w'), + (0x1D605, 'M', 'x'), + (0x1D606, 'M', 'y'), + (0x1D607, 'M', 'z'), + (0x1D608, 'M', 'a'), + (0x1D609, 'M', 'b'), + (0x1D60A, 'M', 'c'), + (0x1D60B, 'M', 'd'), + (0x1D60C, 'M', 'e'), + (0x1D60D, 'M', 'f'), + (0x1D60E, 'M', 'g'), + (0x1D60F, 'M', 'h'), + (0x1D610, 'M', 'i'), + (0x1D611, 'M', 'j'), + (0x1D612, 'M', 'k'), + (0x1D613, 'M', 'l'), + (0x1D614, 'M', 'm'), + (0x1D615, 'M', 'n'), + (0x1D616, 'M', 'o'), + (0x1D617, 'M', 'p'), + (0x1D618, 'M', 'q'), + (0x1D619, 'M', 'r'), + (0x1D61A, 'M', 's'), ] -def _seg_65(): +def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D64C, 'M', u'q'), - (0x1D64D, 'M', u'r'), - (0x1D64E, 'M', u's'), - (0x1D64F, 'M', u't'), - (0x1D650, 'M', u'u'), - (0x1D651, 'M', u'v'), - (0x1D652, 'M', u'w'), - (0x1D653, 'M', u'x'), - (0x1D654, 'M', u'y'), - (0x1D655, 'M', u'z'), - (0x1D656, 'M', u'a'), - (0x1D657, 'M', u'b'), - (0x1D658, 'M', u'c'), - (0x1D659, 'M', u'd'), - (0x1D65A, 'M', u'e'), - (0x1D65B, 'M', u'f'), - (0x1D65C, 'M', u'g'), - (0x1D65D, 'M', u'h'), - (0x1D65E, 'M', u'i'), - (0x1D65F, 'M', u'j'), - (0x1D660, 'M', u'k'), - (0x1D661, 'M', u'l'), - (0x1D662, 'M', u'm'), - (0x1D663, 'M', u'n'), - (0x1D664, 'M', u'o'), - (0x1D665, 'M', u'p'), - (0x1D666, 'M', u'q'), - (0x1D667, 'M', u'r'), - (0x1D668, 'M', u's'), - (0x1D669, 'M', u't'), - (0x1D66A, 'M', u'u'), - (0x1D66B, 'M', u'v'), - (0x1D66C, 'M', u'w'), - (0x1D66D, 'M', u'x'), - (0x1D66E, 'M', u'y'), - (0x1D66F, 'M', u'z'), - (0x1D670, 'M', u'a'), - (0x1D671, 'M', u'b'), - (0x1D672, 'M', u'c'), - (0x1D673, 'M', u'd'), - (0x1D674, 'M', u'e'), - (0x1D675, 'M', u'f'), - (0x1D676, 'M', u'g'), - (0x1D677, 'M', u'h'), - (0x1D678, 'M', u'i'), - (0x1D679, 'M', u'j'), - (0x1D67A, 'M', u'k'), - (0x1D67B, 'M', u'l'), - (0x1D67C, 'M', u'm'), - (0x1D67D, 'M', u'n'), - (0x1D67E, 'M', u'o'), - (0x1D67F, 'M', u'p'), - (0x1D680, 'M', u'q'), - (0x1D681, 'M', u'r'), - (0x1D682, 'M', u's'), - (0x1D683, 'M', u't'), - (0x1D684, 'M', u'u'), - (0x1D685, 'M', u'v'), - (0x1D686, 'M', u'w'), - (0x1D687, 'M', u'x'), - (0x1D688, 'M', u'y'), - (0x1D689, 'M', u'z'), - (0x1D68A, 'M', u'a'), - (0x1D68B, 'M', u'b'), - (0x1D68C, 'M', u'c'), - (0x1D68D, 'M', u'd'), - (0x1D68E, 'M', u'e'), - (0x1D68F, 'M', u'f'), - (0x1D690, 'M', u'g'), - (0x1D691, 'M', u'h'), - (0x1D692, 'M', u'i'), - (0x1D693, 'M', u'j'), - (0x1D694, 'M', u'k'), - (0x1D695, 'M', u'l'), - (0x1D696, 'M', u'm'), - (0x1D697, 'M', u'n'), - (0x1D698, 'M', u'o'), - (0x1D699, 'M', u'p'), - (0x1D69A, 'M', u'q'), - (0x1D69B, 'M', u'r'), - (0x1D69C, 'M', u's'), - (0x1D69D, 'M', u't'), - (0x1D69E, 'M', u'u'), - (0x1D69F, 'M', u'v'), - (0x1D6A0, 'M', u'w'), - (0x1D6A1, 'M', u'x'), - (0x1D6A2, 'M', u'y'), - (0x1D6A3, 'M', u'z'), - (0x1D6A4, 'M', u'ı'), - (0x1D6A5, 'M', u'ȷ'), + (0x1D61B, 'M', 't'), + (0x1D61C, 'M', 'u'), + (0x1D61D, 'M', 'v'), + (0x1D61E, 'M', 'w'), + (0x1D61F, 'M', 'x'), + (0x1D620, 'M', 'y'), + (0x1D621, 'M', 'z'), + (0x1D622, 'M', 'a'), + (0x1D623, 'M', 'b'), + (0x1D624, 'M', 'c'), + (0x1D625, 'M', 'd'), + (0x1D626, 'M', 'e'), + (0x1D627, 'M', 'f'), + (0x1D628, 'M', 'g'), + (0x1D629, 'M', 'h'), + (0x1D62A, 'M', 'i'), + (0x1D62B, 'M', 'j'), + (0x1D62C, 'M', 'k'), + (0x1D62D, 'M', 'l'), + (0x1D62E, 'M', 'm'), + (0x1D62F, 'M', 'n'), + (0x1D630, 'M', 'o'), + (0x1D631, 'M', 'p'), + (0x1D632, 'M', 'q'), + (0x1D633, 'M', 'r'), + (0x1D634, 'M', 's'), + (0x1D635, 'M', 't'), + (0x1D636, 'M', 'u'), + (0x1D637, 'M', 'v'), + (0x1D638, 'M', 'w'), + (0x1D639, 'M', 'x'), + (0x1D63A, 'M', 'y'), + (0x1D63B, 'M', 'z'), + (0x1D63C, 'M', 'a'), + (0x1D63D, 'M', 'b'), + (0x1D63E, 'M', 'c'), + (0x1D63F, 'M', 'd'), + (0x1D640, 'M', 'e'), + (0x1D641, 'M', 'f'), + (0x1D642, 'M', 'g'), + (0x1D643, 'M', 'h'), + (0x1D644, 'M', 'i'), + (0x1D645, 'M', 'j'), + (0x1D646, 'M', 'k'), + (0x1D647, 'M', 'l'), + (0x1D648, 'M', 'm'), + (0x1D649, 'M', 'n'), + (0x1D64A, 'M', 'o'), + (0x1D64B, 'M', 'p'), + (0x1D64C, 'M', 'q'), + (0x1D64D, 'M', 'r'), + (0x1D64E, 'M', 's'), + (0x1D64F, 'M', 't'), + (0x1D650, 'M', 'u'), + (0x1D651, 'M', 'v'), + (0x1D652, 'M', 'w'), + (0x1D653, 'M', 'x'), + (0x1D654, 'M', 'y'), + (0x1D655, 'M', 'z'), + (0x1D656, 'M', 'a'), + (0x1D657, 'M', 'b'), + (0x1D658, 'M', 'c'), + (0x1D659, 'M', 'd'), + (0x1D65A, 'M', 'e'), + (0x1D65B, 'M', 'f'), + (0x1D65C, 'M', 'g'), + (0x1D65D, 'M', 'h'), + (0x1D65E, 'M', 'i'), + (0x1D65F, 'M', 'j'), + (0x1D660, 'M', 'k'), + (0x1D661, 'M', 'l'), + (0x1D662, 'M', 'm'), + (0x1D663, 'M', 'n'), + (0x1D664, 'M', 'o'), + (0x1D665, 'M', 'p'), + (0x1D666, 'M', 'q'), + (0x1D667, 'M', 'r'), + (0x1D668, 'M', 's'), + (0x1D669, 'M', 't'), + (0x1D66A, 'M', 'u'), + (0x1D66B, 'M', 'v'), + (0x1D66C, 'M', 'w'), + (0x1D66D, 'M', 'x'), + (0x1D66E, 'M', 'y'), + (0x1D66F, 'M', 'z'), + (0x1D670, 'M', 'a'), + (0x1D671, 'M', 'b'), + (0x1D672, 'M', 'c'), + (0x1D673, 'M', 'd'), + (0x1D674, 'M', 'e'), + (0x1D675, 'M', 'f'), + (0x1D676, 'M', 'g'), + (0x1D677, 'M', 'h'), + (0x1D678, 'M', 'i'), + (0x1D679, 'M', 'j'), + (0x1D67A, 'M', 'k'), + (0x1D67B, 'M', 'l'), + (0x1D67C, 'M', 'm'), + (0x1D67D, 'M', 'n'), + (0x1D67E, 'M', 'o'), + ] + +def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D67F, 'M', 'p'), + (0x1D680, 'M', 'q'), + (0x1D681, 'M', 'r'), + (0x1D682, 'M', 's'), + (0x1D683, 'M', 't'), + (0x1D684, 'M', 'u'), + (0x1D685, 'M', 'v'), + (0x1D686, 'M', 'w'), + (0x1D687, 'M', 'x'), + (0x1D688, 'M', 'y'), + (0x1D689, 'M', 'z'), + (0x1D68A, 'M', 'a'), + (0x1D68B, 'M', 'b'), + (0x1D68C, 'M', 'c'), + (0x1D68D, 'M', 'd'), + (0x1D68E, 'M', 'e'), + (0x1D68F, 'M', 'f'), + (0x1D690, 'M', 'g'), + (0x1D691, 'M', 'h'), + (0x1D692, 'M', 'i'), + (0x1D693, 'M', 'j'), + (0x1D694, 'M', 'k'), + (0x1D695, 'M', 'l'), + (0x1D696, 'M', 'm'), + (0x1D697, 'M', 'n'), + (0x1D698, 'M', 'o'), + (0x1D699, 'M', 'p'), + (0x1D69A, 'M', 'q'), + (0x1D69B, 'M', 'r'), + (0x1D69C, 'M', 's'), + (0x1D69D, 'M', 't'), + (0x1D69E, 'M', 'u'), + (0x1D69F, 'M', 'v'), + (0x1D6A0, 'M', 'w'), + (0x1D6A1, 'M', 'x'), + (0x1D6A2, 'M', 'y'), + (0x1D6A3, 'M', 'z'), + (0x1D6A4, 'M', 'ı'), + (0x1D6A5, 'M', 'ȷ'), (0x1D6A6, 'X'), - (0x1D6A8, 'M', u'α'), - (0x1D6A9, 'M', u'β'), - (0x1D6AA, 'M', u'γ'), - (0x1D6AB, 'M', u'δ'), - (0x1D6AC, 'M', u'ε'), - (0x1D6AD, 'M', u'ζ'), - (0x1D6AE, 'M', u'η'), - (0x1D6AF, 'M', u'θ'), - (0x1D6B0, 'M', u'ι'), + (0x1D6A8, 'M', 'α'), + (0x1D6A9, 'M', 'β'), + (0x1D6AA, 'M', 'γ'), + (0x1D6AB, 'M', 'δ'), + (0x1D6AC, 'M', 'ε'), + (0x1D6AD, 'M', 'ζ'), + (0x1D6AE, 'M', 'η'), + (0x1D6AF, 'M', 'θ'), + (0x1D6B0, 'M', 'ι'), + (0x1D6B1, 'M', 'κ'), + (0x1D6B2, 'M', 'λ'), + (0x1D6B3, 'M', 'μ'), + (0x1D6B4, 'M', 'ν'), + (0x1D6B5, 'M', 'ξ'), + (0x1D6B6, 'M', 'ο'), + (0x1D6B7, 'M', 'π'), + (0x1D6B8, 'M', 'ρ'), + (0x1D6B9, 'M', 'θ'), + (0x1D6BA, 'M', 'σ'), + (0x1D6BB, 'M', 'τ'), + (0x1D6BC, 'M', 'υ'), + (0x1D6BD, 'M', 'φ'), + (0x1D6BE, 'M', 'χ'), + (0x1D6BF, 'M', 'ψ'), + (0x1D6C0, 'M', 'ω'), + (0x1D6C1, 'M', '∇'), + (0x1D6C2, 'M', 'α'), + (0x1D6C3, 'M', 'β'), + (0x1D6C4, 'M', 'γ'), + (0x1D6C5, 'M', 'δ'), + (0x1D6C6, 'M', 'ε'), + (0x1D6C7, 'M', 'ζ'), + (0x1D6C8, 'M', 'η'), + (0x1D6C9, 'M', 'θ'), + (0x1D6CA, 'M', 'ι'), + (0x1D6CB, 'M', 'κ'), + (0x1D6CC, 'M', 'λ'), + (0x1D6CD, 'M', 'μ'), + (0x1D6CE, 'M', 'ν'), + (0x1D6CF, 'M', 'ξ'), + (0x1D6D0, 'M', 'ο'), + (0x1D6D1, 'M', 'π'), + (0x1D6D2, 'M', 'ρ'), + (0x1D6D3, 'M', 'σ'), + (0x1D6D5, 'M', 'τ'), + (0x1D6D6, 'M', 'υ'), + (0x1D6D7, 'M', 'φ'), + (0x1D6D8, 'M', 'χ'), + (0x1D6D9, 'M', 'ψ'), + (0x1D6DA, 'M', 'ω'), + (0x1D6DB, 'M', '∂'), + (0x1D6DC, 'M', 'ε'), + (0x1D6DD, 'M', 'θ'), + (0x1D6DE, 'M', 'κ'), + (0x1D6DF, 'M', 'φ'), + (0x1D6E0, 'M', 'ρ'), + (0x1D6E1, 'M', 'π'), + (0x1D6E2, 'M', 'α'), + (0x1D6E3, 'M', 'β'), + (0x1D6E4, 'M', 'γ'), ] -def _seg_66(): +def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D6B1, 'M', u'κ'), - (0x1D6B2, 'M', u'λ'), - (0x1D6B3, 'M', u'μ'), - (0x1D6B4, 'M', u'ν'), - (0x1D6B5, 'M', u'ξ'), - (0x1D6B6, 'M', u'ο'), - (0x1D6B7, 'M', u'π'), - (0x1D6B8, 'M', u'ρ'), - (0x1D6B9, 'M', u'θ'), - (0x1D6BA, 'M', u'σ'), - (0x1D6BB, 'M', u'τ'), - (0x1D6BC, 'M', u'υ'), - (0x1D6BD, 'M', u'φ'), - (0x1D6BE, 'M', u'χ'), - (0x1D6BF, 'M', u'ψ'), - (0x1D6C0, 'M', u'ω'), - (0x1D6C1, 'M', u'∇'), - (0x1D6C2, 'M', u'α'), - (0x1D6C3, 'M', u'β'), - (0x1D6C4, 'M', u'γ'), - (0x1D6C5, 'M', u'δ'), - (0x1D6C6, 'M', u'ε'), - (0x1D6C7, 'M', u'ζ'), - (0x1D6C8, 'M', u'η'), - (0x1D6C9, 'M', u'θ'), - (0x1D6CA, 'M', u'ι'), - (0x1D6CB, 'M', u'κ'), - (0x1D6CC, 'M', u'λ'), - (0x1D6CD, 'M', u'μ'), - (0x1D6CE, 'M', u'ν'), - (0x1D6CF, 'M', u'ξ'), - (0x1D6D0, 'M', u'ο'), - (0x1D6D1, 'M', u'π'), - (0x1D6D2, 'M', u'ρ'), - (0x1D6D3, 'M', u'σ'), - (0x1D6D5, 'M', u'τ'), - (0x1D6D6, 'M', u'υ'), - (0x1D6D7, 'M', u'φ'), - (0x1D6D8, 'M', u'χ'), - (0x1D6D9, 'M', u'ψ'), - (0x1D6DA, 'M', u'ω'), - (0x1D6DB, 'M', u'∂'), - (0x1D6DC, 'M', u'ε'), - (0x1D6DD, 'M', u'θ'), - (0x1D6DE, 'M', u'κ'), - (0x1D6DF, 'M', u'φ'), - (0x1D6E0, 'M', u'ρ'), - (0x1D6E1, 'M', u'π'), - (0x1D6E2, 'M', u'α'), - (0x1D6E3, 'M', u'β'), - (0x1D6E4, 'M', u'γ'), - (0x1D6E5, 'M', u'δ'), - (0x1D6E6, 'M', u'ε'), - (0x1D6E7, 'M', u'ζ'), - (0x1D6E8, 'M', u'η'), - (0x1D6E9, 'M', u'θ'), - (0x1D6EA, 'M', u'ι'), - (0x1D6EB, 'M', u'κ'), - (0x1D6EC, 'M', u'λ'), - (0x1D6ED, 'M', u'μ'), - (0x1D6EE, 'M', u'ν'), - (0x1D6EF, 'M', u'ξ'), - (0x1D6F0, 'M', u'ο'), - (0x1D6F1, 'M', u'π'), - (0x1D6F2, 'M', u'ρ'), - (0x1D6F3, 'M', u'θ'), - (0x1D6F4, 'M', u'σ'), - (0x1D6F5, 'M', u'τ'), - (0x1D6F6, 'M', u'υ'), - (0x1D6F7, 'M', u'φ'), - (0x1D6F8, 'M', u'χ'), - (0x1D6F9, 'M', u'ψ'), - (0x1D6FA, 'M', u'ω'), - (0x1D6FB, 'M', u'∇'), - (0x1D6FC, 'M', u'α'), - (0x1D6FD, 'M', u'β'), - (0x1D6FE, 'M', u'γ'), - (0x1D6FF, 'M', u'δ'), - (0x1D700, 'M', u'ε'), - (0x1D701, 'M', u'ζ'), - (0x1D702, 'M', u'η'), - (0x1D703, 'M', u'θ'), - (0x1D704, 'M', u'ι'), - (0x1D705, 'M', u'κ'), - (0x1D706, 'M', u'λ'), - (0x1D707, 'M', u'μ'), - (0x1D708, 'M', u'ν'), - (0x1D709, 'M', u'ξ'), - (0x1D70A, 'M', u'ο'), - (0x1D70B, 'M', u'π'), - (0x1D70C, 'M', u'ρ'), - (0x1D70D, 'M', u'σ'), - (0x1D70F, 'M', u'τ'), - (0x1D710, 'M', u'υ'), - (0x1D711, 'M', u'φ'), - (0x1D712, 'M', u'χ'), - (0x1D713, 'M', u'ψ'), - (0x1D714, 'M', u'ω'), - (0x1D715, 'M', u'∂'), - (0x1D716, 'M', u'ε'), + (0x1D6E5, 'M', 'δ'), + (0x1D6E6, 'M', 'ε'), + (0x1D6E7, 'M', 'ζ'), + (0x1D6E8, 'M', 'η'), + (0x1D6E9, 'M', 'θ'), + (0x1D6EA, 'M', 'ι'), + (0x1D6EB, 'M', 'κ'), + (0x1D6EC, 'M', 'λ'), + (0x1D6ED, 'M', 'μ'), + (0x1D6EE, 'M', 'ν'), + (0x1D6EF, 'M', 'ξ'), + (0x1D6F0, 'M', 'ο'), + (0x1D6F1, 'M', 'π'), + (0x1D6F2, 'M', 'ρ'), + (0x1D6F3, 'M', 'θ'), + (0x1D6F4, 'M', 'σ'), + (0x1D6F5, 'M', 'τ'), + (0x1D6F6, 'M', 'υ'), + (0x1D6F7, 'M', 'φ'), + (0x1D6F8, 'M', 'χ'), + (0x1D6F9, 'M', 'ψ'), + (0x1D6FA, 'M', 'ω'), + (0x1D6FB, 'M', '∇'), + (0x1D6FC, 'M', 'α'), + (0x1D6FD, 'M', 'β'), + (0x1D6FE, 'M', 'γ'), + (0x1D6FF, 'M', 'δ'), + (0x1D700, 'M', 'ε'), + (0x1D701, 'M', 'ζ'), + (0x1D702, 'M', 'η'), + (0x1D703, 'M', 'θ'), + (0x1D704, 'M', 'ι'), + (0x1D705, 'M', 'κ'), + (0x1D706, 'M', 'λ'), + (0x1D707, 'M', 'μ'), + (0x1D708, 'M', 'ν'), + (0x1D709, 'M', 'ξ'), + (0x1D70A, 'M', 'ο'), + (0x1D70B, 'M', 'π'), + (0x1D70C, 'M', 'ρ'), + (0x1D70D, 'M', 'σ'), + (0x1D70F, 'M', 'τ'), + (0x1D710, 'M', 'υ'), + (0x1D711, 'M', 'φ'), + (0x1D712, 'M', 'χ'), + (0x1D713, 'M', 'ψ'), + (0x1D714, 'M', 'ω'), + (0x1D715, 'M', '∂'), + (0x1D716, 'M', 'ε'), + (0x1D717, 'M', 'θ'), + (0x1D718, 'M', 'κ'), + (0x1D719, 'M', 'φ'), + (0x1D71A, 'M', 'ρ'), + (0x1D71B, 'M', 'π'), + (0x1D71C, 'M', 'α'), + (0x1D71D, 'M', 'β'), + (0x1D71E, 'M', 'γ'), + (0x1D71F, 'M', 'δ'), + (0x1D720, 'M', 'ε'), + (0x1D721, 'M', 'ζ'), + (0x1D722, 'M', 'η'), + (0x1D723, 'M', 'θ'), + (0x1D724, 'M', 'ι'), + (0x1D725, 'M', 'κ'), + (0x1D726, 'M', 'λ'), + (0x1D727, 'M', 'μ'), + (0x1D728, 'M', 'ν'), + (0x1D729, 'M', 'ξ'), + (0x1D72A, 'M', 'ο'), + (0x1D72B, 'M', 'π'), + (0x1D72C, 'M', 'ρ'), + (0x1D72D, 'M', 'θ'), + (0x1D72E, 'M', 'σ'), + (0x1D72F, 'M', 'τ'), + (0x1D730, 'M', 'υ'), + (0x1D731, 'M', 'φ'), + (0x1D732, 'M', 'χ'), + (0x1D733, 'M', 'ψ'), + (0x1D734, 'M', 'ω'), + (0x1D735, 'M', '∇'), + (0x1D736, 'M', 'α'), + (0x1D737, 'M', 'β'), + (0x1D738, 'M', 'γ'), + (0x1D739, 'M', 'δ'), + (0x1D73A, 'M', 'ε'), + (0x1D73B, 'M', 'ζ'), + (0x1D73C, 'M', 'η'), + (0x1D73D, 'M', 'θ'), + (0x1D73E, 'M', 'ι'), + (0x1D73F, 'M', 'κ'), + (0x1D740, 'M', 'λ'), + (0x1D741, 'M', 'μ'), + (0x1D742, 'M', 'ν'), + (0x1D743, 'M', 'ξ'), + (0x1D744, 'M', 'ο'), + (0x1D745, 'M', 'π'), + (0x1D746, 'M', 'ρ'), + (0x1D747, 'M', 'σ'), + (0x1D749, 'M', 'τ'), + (0x1D74A, 'M', 'υ'), ] -def _seg_67(): +def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D717, 'M', u'θ'), - (0x1D718, 'M', u'κ'), - (0x1D719, 'M', u'φ'), - (0x1D71A, 'M', u'ρ'), - (0x1D71B, 'M', u'π'), - (0x1D71C, 'M', u'α'), - (0x1D71D, 'M', u'β'), - (0x1D71E, 'M', u'γ'), - (0x1D71F, 'M', u'δ'), - (0x1D720, 'M', u'ε'), - (0x1D721, 'M', u'ζ'), - (0x1D722, 'M', u'η'), - (0x1D723, 'M', u'θ'), - (0x1D724, 'M', u'ι'), - (0x1D725, 'M', u'κ'), - (0x1D726, 'M', u'λ'), - (0x1D727, 'M', u'μ'), - (0x1D728, 'M', u'ν'), - (0x1D729, 'M', u'ξ'), - (0x1D72A, 'M', u'ο'), - (0x1D72B, 'M', u'π'), - (0x1D72C, 'M', u'ρ'), - (0x1D72D, 'M', u'θ'), - (0x1D72E, 'M', u'σ'), - (0x1D72F, 'M', u'τ'), - (0x1D730, 'M', u'υ'), - (0x1D731, 'M', u'φ'), - (0x1D732, 'M', u'χ'), - (0x1D733, 'M', u'ψ'), - (0x1D734, 'M', u'ω'), - (0x1D735, 'M', u'∇'), - (0x1D736, 'M', u'α'), - (0x1D737, 'M', u'β'), - (0x1D738, 'M', u'γ'), - (0x1D739, 'M', u'δ'), - (0x1D73A, 'M', u'ε'), - (0x1D73B, 'M', u'ζ'), - (0x1D73C, 'M', u'η'), - (0x1D73D, 'M', u'θ'), - (0x1D73E, 'M', u'ι'), - (0x1D73F, 'M', u'κ'), - (0x1D740, 'M', u'λ'), - (0x1D741, 'M', u'μ'), - (0x1D742, 'M', u'ν'), - (0x1D743, 'M', u'ξ'), - (0x1D744, 'M', u'ο'), - (0x1D745, 'M', u'π'), - (0x1D746, 'M', u'ρ'), - (0x1D747, 'M', u'σ'), - (0x1D749, 'M', u'τ'), - (0x1D74A, 'M', u'υ'), - (0x1D74B, 'M', u'φ'), - (0x1D74C, 'M', u'χ'), - (0x1D74D, 'M', u'ψ'), - (0x1D74E, 'M', u'ω'), - (0x1D74F, 'M', u'∂'), - (0x1D750, 'M', u'ε'), - (0x1D751, 'M', u'θ'), - (0x1D752, 'M', u'κ'), - (0x1D753, 'M', u'φ'), - (0x1D754, 'M', u'ρ'), - (0x1D755, 'M', u'π'), - (0x1D756, 'M', u'α'), - (0x1D757, 'M', u'β'), - (0x1D758, 'M', u'γ'), - (0x1D759, 'M', u'δ'), - (0x1D75A, 'M', u'ε'), - (0x1D75B, 'M', u'ζ'), - (0x1D75C, 'M', u'η'), - (0x1D75D, 'M', u'θ'), - (0x1D75E, 'M', u'ι'), - (0x1D75F, 'M', u'κ'), - (0x1D760, 'M', u'λ'), - (0x1D761, 'M', u'μ'), - (0x1D762, 'M', u'ν'), - (0x1D763, 'M', u'ξ'), - (0x1D764, 'M', u'ο'), - (0x1D765, 'M', u'π'), - (0x1D766, 'M', u'ρ'), - (0x1D767, 'M', u'θ'), - (0x1D768, 'M', u'σ'), - (0x1D769, 'M', u'τ'), - (0x1D76A, 'M', u'υ'), - (0x1D76B, 'M', u'φ'), - (0x1D76C, 'M', u'χ'), - (0x1D76D, 'M', u'ψ'), - (0x1D76E, 'M', u'ω'), - (0x1D76F, 'M', u'∇'), - (0x1D770, 'M', u'α'), - (0x1D771, 'M', u'β'), - (0x1D772, 'M', u'γ'), - (0x1D773, 'M', u'δ'), - (0x1D774, 'M', u'ε'), - (0x1D775, 'M', u'ζ'), - (0x1D776, 'M', u'η'), - (0x1D777, 'M', u'θ'), - (0x1D778, 'M', u'ι'), - (0x1D779, 'M', u'κ'), - (0x1D77A, 'M', u'λ'), - (0x1D77B, 'M', u'μ'), + (0x1D74B, 'M', 'φ'), + (0x1D74C, 'M', 'χ'), + (0x1D74D, 'M', 'ψ'), + (0x1D74E, 'M', 'ω'), + (0x1D74F, 'M', '∂'), + (0x1D750, 'M', 'ε'), + (0x1D751, 'M', 'θ'), + (0x1D752, 'M', 'κ'), + (0x1D753, 'M', 'φ'), + (0x1D754, 'M', 'ρ'), + (0x1D755, 'M', 'π'), + (0x1D756, 'M', 'α'), + (0x1D757, 'M', 'β'), + (0x1D758, 'M', 'γ'), + (0x1D759, 'M', 'δ'), + (0x1D75A, 'M', 'ε'), + (0x1D75B, 'M', 'ζ'), + (0x1D75C, 'M', 'η'), + (0x1D75D, 'M', 'θ'), + (0x1D75E, 'M', 'ι'), + (0x1D75F, 'M', 'κ'), + (0x1D760, 'M', 'λ'), + (0x1D761, 'M', 'μ'), + (0x1D762, 'M', 'ν'), + (0x1D763, 'M', 'ξ'), + (0x1D764, 'M', 'ο'), + (0x1D765, 'M', 'π'), + (0x1D766, 'M', 'ρ'), + (0x1D767, 'M', 'θ'), + (0x1D768, 'M', 'σ'), + (0x1D769, 'M', 'τ'), + (0x1D76A, 'M', 'υ'), + (0x1D76B, 'M', 'φ'), + (0x1D76C, 'M', 'χ'), + (0x1D76D, 'M', 'ψ'), + (0x1D76E, 'M', 'ω'), + (0x1D76F, 'M', '∇'), + (0x1D770, 'M', 'α'), + (0x1D771, 'M', 'β'), + (0x1D772, 'M', 'γ'), + (0x1D773, 'M', 'δ'), + (0x1D774, 'M', 'ε'), + (0x1D775, 'M', 'ζ'), + (0x1D776, 'M', 'η'), + (0x1D777, 'M', 'θ'), + (0x1D778, 'M', 'ι'), + (0x1D779, 'M', 'κ'), + (0x1D77A, 'M', 'λ'), + (0x1D77B, 'M', 'μ'), + (0x1D77C, 'M', 'ν'), + (0x1D77D, 'M', 'ξ'), + (0x1D77E, 'M', 'ο'), + (0x1D77F, 'M', 'π'), + (0x1D780, 'M', 'ρ'), + (0x1D781, 'M', 'σ'), + (0x1D783, 'M', 'τ'), + (0x1D784, 'M', 'υ'), + (0x1D785, 'M', 'φ'), + (0x1D786, 'M', 'χ'), + (0x1D787, 'M', 'ψ'), + (0x1D788, 'M', 'ω'), + (0x1D789, 'M', '∂'), + (0x1D78A, 'M', 'ε'), + (0x1D78B, 'M', 'θ'), + (0x1D78C, 'M', 'κ'), + (0x1D78D, 'M', 'φ'), + (0x1D78E, 'M', 'ρ'), + (0x1D78F, 'M', 'π'), + (0x1D790, 'M', 'α'), + (0x1D791, 'M', 'β'), + (0x1D792, 'M', 'γ'), + (0x1D793, 'M', 'δ'), + (0x1D794, 'M', 'ε'), + (0x1D795, 'M', 'ζ'), + (0x1D796, 'M', 'η'), + (0x1D797, 'M', 'θ'), + (0x1D798, 'M', 'ι'), + (0x1D799, 'M', 'κ'), + (0x1D79A, 'M', 'λ'), + (0x1D79B, 'M', 'μ'), + (0x1D79C, 'M', 'ν'), + (0x1D79D, 'M', 'ξ'), + (0x1D79E, 'M', 'ο'), + (0x1D79F, 'M', 'π'), + (0x1D7A0, 'M', 'ρ'), + (0x1D7A1, 'M', 'θ'), + (0x1D7A2, 'M', 'σ'), + (0x1D7A3, 'M', 'τ'), + (0x1D7A4, 'M', 'υ'), + (0x1D7A5, 'M', 'φ'), + (0x1D7A6, 'M', 'χ'), + (0x1D7A7, 'M', 'ψ'), + (0x1D7A8, 'M', 'ω'), + (0x1D7A9, 'M', '∇'), + (0x1D7AA, 'M', 'α'), + (0x1D7AB, 'M', 'β'), + (0x1D7AC, 'M', 'γ'), + (0x1D7AD, 'M', 'δ'), + (0x1D7AE, 'M', 'ε'), + (0x1D7AF, 'M', 'ζ'), ] -def _seg_68(): +def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1D77C, 'M', u'ν'), - (0x1D77D, 'M', u'ξ'), - (0x1D77E, 'M', u'ο'), - (0x1D77F, 'M', u'π'), - (0x1D780, 'M', u'ρ'), - (0x1D781, 'M', u'σ'), - (0x1D783, 'M', u'τ'), - (0x1D784, 'M', u'υ'), - (0x1D785, 'M', u'φ'), - (0x1D786, 'M', u'χ'), - (0x1D787, 'M', u'ψ'), - (0x1D788, 'M', u'ω'), - (0x1D789, 'M', u'∂'), - (0x1D78A, 'M', u'ε'), - (0x1D78B, 'M', u'θ'), - (0x1D78C, 'M', u'κ'), - (0x1D78D, 'M', u'φ'), - (0x1D78E, 'M', u'ρ'), - (0x1D78F, 'M', u'π'), - (0x1D790, 'M', u'α'), - (0x1D791, 'M', u'β'), - (0x1D792, 'M', u'γ'), - (0x1D793, 'M', u'δ'), - (0x1D794, 'M', u'ε'), - (0x1D795, 'M', u'ζ'), - (0x1D796, 'M', u'η'), - (0x1D797, 'M', u'θ'), - (0x1D798, 'M', u'ι'), - (0x1D799, 'M', u'κ'), - (0x1D79A, 'M', u'λ'), - (0x1D79B, 'M', u'μ'), - (0x1D79C, 'M', u'ν'), - (0x1D79D, 'M', u'ξ'), - (0x1D79E, 'M', u'ο'), - (0x1D79F, 'M', u'π'), - (0x1D7A0, 'M', u'ρ'), - (0x1D7A1, 'M', u'θ'), - (0x1D7A2, 'M', u'σ'), - (0x1D7A3, 'M', u'τ'), - (0x1D7A4, 'M', u'υ'), - (0x1D7A5, 'M', u'φ'), - (0x1D7A6, 'M', u'χ'), - (0x1D7A7, 'M', u'ψ'), - (0x1D7A8, 'M', u'ω'), - (0x1D7A9, 'M', u'∇'), - (0x1D7AA, 'M', u'α'), - (0x1D7AB, 'M', u'β'), - (0x1D7AC, 'M', u'γ'), - (0x1D7AD, 'M', u'δ'), - (0x1D7AE, 'M', u'ε'), - (0x1D7AF, 'M', u'ζ'), - (0x1D7B0, 'M', u'η'), - (0x1D7B1, 'M', u'θ'), - (0x1D7B2, 'M', u'ι'), - (0x1D7B3, 'M', u'κ'), - (0x1D7B4, 'M', u'λ'), - (0x1D7B5, 'M', u'μ'), - (0x1D7B6, 'M', u'ν'), - (0x1D7B7, 'M', u'ξ'), - (0x1D7B8, 'M', u'ο'), - (0x1D7B9, 'M', u'π'), - (0x1D7BA, 'M', u'ρ'), - (0x1D7BB, 'M', u'σ'), - (0x1D7BD, 'M', u'τ'), - (0x1D7BE, 'M', u'υ'), - (0x1D7BF, 'M', u'φ'), - (0x1D7C0, 'M', u'χ'), - (0x1D7C1, 'M', u'ψ'), - (0x1D7C2, 'M', u'ω'), - (0x1D7C3, 'M', u'∂'), - (0x1D7C4, 'M', u'ε'), - (0x1D7C5, 'M', u'θ'), - (0x1D7C6, 'M', u'κ'), - (0x1D7C7, 'M', u'φ'), - (0x1D7C8, 'M', u'ρ'), - (0x1D7C9, 'M', u'π'), - (0x1D7CA, 'M', u'ϝ'), + (0x1D7B0, 'M', 'η'), + (0x1D7B1, 'M', 'θ'), + (0x1D7B2, 'M', 'ι'), + (0x1D7B3, 'M', 'κ'), + (0x1D7B4, 'M', 'λ'), + (0x1D7B5, 'M', 'μ'), + (0x1D7B6, 'M', 'ν'), + (0x1D7B7, 'M', 'ξ'), + (0x1D7B8, 'M', 'ο'), + (0x1D7B9, 'M', 'π'), + (0x1D7BA, 'M', 'ρ'), + (0x1D7BB, 'M', 'σ'), + (0x1D7BD, 'M', 'τ'), + (0x1D7BE, 'M', 'υ'), + (0x1D7BF, 'M', 'φ'), + (0x1D7C0, 'M', 'χ'), + (0x1D7C1, 'M', 'ψ'), + (0x1D7C2, 'M', 'ω'), + (0x1D7C3, 'M', '∂'), + (0x1D7C4, 'M', 'ε'), + (0x1D7C5, 'M', 'θ'), + (0x1D7C6, 'M', 'κ'), + (0x1D7C7, 'M', 'φ'), + (0x1D7C8, 'M', 'ρ'), + (0x1D7C9, 'M', 'π'), + (0x1D7CA, 'M', 'ϝ'), (0x1D7CC, 'X'), - (0x1D7CE, 'M', u'0'), - (0x1D7CF, 'M', u'1'), - (0x1D7D0, 'M', u'2'), - (0x1D7D1, 'M', u'3'), - (0x1D7D2, 'M', u'4'), - (0x1D7D3, 'M', u'5'), - (0x1D7D4, 'M', u'6'), - (0x1D7D5, 'M', u'7'), - (0x1D7D6, 'M', u'8'), - (0x1D7D7, 'M', u'9'), - (0x1D7D8, 'M', u'0'), - (0x1D7D9, 'M', u'1'), - (0x1D7DA, 'M', u'2'), - (0x1D7DB, 'M', u'3'), - (0x1D7DC, 'M', u'4'), - (0x1D7DD, 'M', u'5'), - (0x1D7DE, 'M', u'6'), - (0x1D7DF, 'M', u'7'), - (0x1D7E0, 'M', u'8'), - (0x1D7E1, 'M', u'9'), - (0x1D7E2, 'M', u'0'), - (0x1D7E3, 'M', u'1'), - ] - -def _seg_69(): - return [ - (0x1D7E4, 'M', u'2'), - (0x1D7E5, 'M', u'3'), - (0x1D7E6, 'M', u'4'), - (0x1D7E7, 'M', u'5'), - (0x1D7E8, 'M', u'6'), - (0x1D7E9, 'M', u'7'), - (0x1D7EA, 'M', u'8'), - (0x1D7EB, 'M', u'9'), - (0x1D7EC, 'M', u'0'), - (0x1D7ED, 'M', u'1'), - (0x1D7EE, 'M', u'2'), - (0x1D7EF, 'M', u'3'), - (0x1D7F0, 'M', u'4'), - (0x1D7F1, 'M', u'5'), - (0x1D7F2, 'M', u'6'), - (0x1D7F3, 'M', u'7'), - (0x1D7F4, 'M', u'8'), - (0x1D7F5, 'M', u'9'), - (0x1D7F6, 'M', u'0'), - (0x1D7F7, 'M', u'1'), - (0x1D7F8, 'M', u'2'), - (0x1D7F9, 'M', u'3'), - (0x1D7FA, 'M', u'4'), - (0x1D7FB, 'M', u'5'), - (0x1D7FC, 'M', u'6'), - (0x1D7FD, 'M', u'7'), - (0x1D7FE, 'M', u'8'), - (0x1D7FF, 'M', u'9'), + (0x1D7CE, 'M', '0'), + (0x1D7CF, 'M', '1'), + (0x1D7D0, 'M', '2'), + (0x1D7D1, 'M', '3'), + (0x1D7D2, 'M', '4'), + (0x1D7D3, 'M', '5'), + (0x1D7D4, 'M', '6'), + (0x1D7D5, 'M', '7'), + (0x1D7D6, 'M', '8'), + (0x1D7D7, 'M', '9'), + (0x1D7D8, 'M', '0'), + (0x1D7D9, 'M', '1'), + (0x1D7DA, 'M', '2'), + (0x1D7DB, 'M', '3'), + (0x1D7DC, 'M', '4'), + (0x1D7DD, 'M', '5'), + (0x1D7DE, 'M', '6'), + (0x1D7DF, 'M', '7'), + (0x1D7E0, 'M', '8'), + (0x1D7E1, 'M', '9'), + (0x1D7E2, 'M', '0'), + (0x1D7E3, 'M', '1'), + (0x1D7E4, 'M', '2'), + (0x1D7E5, 'M', '3'), + (0x1D7E6, 'M', '4'), + (0x1D7E7, 'M', '5'), + (0x1D7E8, 'M', '6'), + (0x1D7E9, 'M', '7'), + (0x1D7EA, 'M', '8'), + (0x1D7EB, 'M', '9'), + (0x1D7EC, 'M', '0'), + (0x1D7ED, 'M', '1'), + (0x1D7EE, 'M', '2'), + (0x1D7EF, 'M', '3'), + (0x1D7F0, 'M', '4'), + (0x1D7F1, 'M', '5'), + (0x1D7F2, 'M', '6'), + (0x1D7F3, 'M', '7'), + (0x1D7F4, 'M', '8'), + (0x1D7F5, 'M', '9'), + (0x1D7F6, 'M', '0'), + (0x1D7F7, 'M', '1'), + (0x1D7F8, 'M', '2'), + (0x1D7F9, 'M', '3'), + (0x1D7FA, 'M', '4'), + (0x1D7FB, 'M', '5'), + (0x1D7FC, 'M', '6'), + (0x1D7FD, 'M', '7'), + (0x1D7FE, 'M', '8'), + (0x1D7FF, 'M', '9'), (0x1D800, 'V'), (0x1DA8C, 'X'), (0x1DA9B, 'V'), (0x1DAA0, 'X'), (0x1DAA1, 'V'), (0x1DAB0, 'X'), + (0x1DF00, 'V'), + (0x1DF1F, 'X'), + (0x1DF25, 'V'), + (0x1DF2B, 'X'), (0x1E000, 'V'), (0x1E007, 'X'), (0x1E008, 'V'), @@ -7227,6 +7387,75 @@ def _seg_69(): (0x1E025, 'X'), (0x1E026, 'V'), (0x1E02B, 'X'), + (0x1E030, 'M', 'а'), + (0x1E031, 'M', 'б'), + (0x1E032, 'M', 'в'), + ] + +def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E033, 'M', 'г'), + (0x1E034, 'M', 'д'), + (0x1E035, 'M', 'е'), + (0x1E036, 'M', 'ж'), + (0x1E037, 'M', 'з'), + (0x1E038, 'M', 'и'), + (0x1E039, 'M', 'к'), + (0x1E03A, 'M', 'л'), + (0x1E03B, 'M', 'м'), + (0x1E03C, 'M', 'о'), + (0x1E03D, 'M', 'п'), + (0x1E03E, 'M', 'р'), + (0x1E03F, 'M', 'с'), + (0x1E040, 'M', 'т'), + (0x1E041, 'M', 'у'), + (0x1E042, 'M', 'ф'), + (0x1E043, 'M', 'х'), + (0x1E044, 'M', 'ц'), + (0x1E045, 'M', 'ч'), + (0x1E046, 'M', 'ш'), + (0x1E047, 'M', 'ы'), + (0x1E048, 'M', 'э'), + (0x1E049, 'M', 'ю'), + (0x1E04A, 'M', 'ꚉ'), + (0x1E04B, 'M', 'ә'), + (0x1E04C, 'M', 'і'), + (0x1E04D, 'M', 'ј'), + (0x1E04E, 'M', 'ө'), + (0x1E04F, 'M', 'ү'), + (0x1E050, 'M', 'ӏ'), + (0x1E051, 'M', 'а'), + (0x1E052, 'M', 'б'), + (0x1E053, 'M', 'в'), + (0x1E054, 'M', 'г'), + (0x1E055, 'M', 'д'), + (0x1E056, 'M', 'е'), + (0x1E057, 'M', 'ж'), + (0x1E058, 'M', 'з'), + (0x1E059, 'M', 'и'), + (0x1E05A, 'M', 'к'), + (0x1E05B, 'M', 'л'), + (0x1E05C, 'M', 'о'), + (0x1E05D, 'M', 'п'), + (0x1E05E, 'M', 'с'), + (0x1E05F, 'M', 'у'), + (0x1E060, 'M', 'ф'), + (0x1E061, 'M', 'х'), + (0x1E062, 'M', 'ц'), + (0x1E063, 'M', 'ч'), + (0x1E064, 'M', 'ш'), + (0x1E065, 'M', 'ъ'), + (0x1E066, 'M', 'ы'), + (0x1E067, 'M', 'ґ'), + (0x1E068, 'M', 'і'), + (0x1E069, 'M', 'ѕ'), + (0x1E06A, 'M', 'џ'), + (0x1E06B, 'M', 'ҫ'), + (0x1E06C, 'M', 'ꙑ'), + (0x1E06D, 'M', 'ұ'), + (0x1E06E, 'X'), + (0x1E08F, 'V'), + (0x1E090, 'X'), (0x1E100, 'V'), (0x1E12D, 'X'), (0x1E130, 'V'), @@ -7235,239 +7464,255 @@ def _seg_69(): (0x1E14A, 'X'), (0x1E14E, 'V'), (0x1E150, 'X'), + (0x1E290, 'V'), + (0x1E2AF, 'X'), (0x1E2C0, 'V'), (0x1E2FA, 'X'), (0x1E2FF, 'V'), (0x1E300, 'X'), + (0x1E4D0, 'V'), + (0x1E4FA, 'X'), + (0x1E7E0, 'V'), + (0x1E7E7, 'X'), + (0x1E7E8, 'V'), + (0x1E7EC, 'X'), + (0x1E7ED, 'V'), + (0x1E7EF, 'X'), + (0x1E7F0, 'V'), + (0x1E7FF, 'X'), (0x1E800, 'V'), (0x1E8C5, 'X'), (0x1E8C7, 'V'), (0x1E8D7, 'X'), - (0x1E900, 'M', u'𞤢'), - (0x1E901, 'M', u'𞤣'), - (0x1E902, 'M', u'𞤤'), - (0x1E903, 'M', u'𞤥'), - (0x1E904, 'M', u'𞤦'), - (0x1E905, 'M', u'𞤧'), - (0x1E906, 'M', u'𞤨'), - (0x1E907, 'M', u'𞤩'), - (0x1E908, 'M', u'𞤪'), - (0x1E909, 'M', u'𞤫'), - (0x1E90A, 'M', u'𞤬'), - (0x1E90B, 'M', u'𞤭'), - (0x1E90C, 'M', u'𞤮'), - (0x1E90D, 'M', u'𞤯'), - (0x1E90E, 'M', u'𞤰'), - (0x1E90F, 'M', u'𞤱'), - (0x1E910, 'M', u'𞤲'), - (0x1E911, 'M', u'𞤳'), - (0x1E912, 'M', u'𞤴'), - (0x1E913, 'M', u'𞤵'), - (0x1E914, 'M', u'𞤶'), - (0x1E915, 'M', u'𞤷'), - (0x1E916, 'M', u'𞤸'), - (0x1E917, 'M', u'𞤹'), - (0x1E918, 'M', u'𞤺'), - (0x1E919, 'M', u'𞤻'), - (0x1E91A, 'M', u'𞤼'), - (0x1E91B, 'M', u'𞤽'), - (0x1E91C, 'M', u'𞤾'), - (0x1E91D, 'M', u'𞤿'), - (0x1E91E, 'M', u'𞥀'), - (0x1E91F, 'M', u'𞥁'), - (0x1E920, 'M', u'𞥂'), - (0x1E921, 'M', u'𞥃'), + (0x1E900, 'M', '𞤢'), + (0x1E901, 'M', '𞤣'), + (0x1E902, 'M', '𞤤'), + (0x1E903, 'M', '𞤥'), + (0x1E904, 'M', '𞤦'), + (0x1E905, 'M', '𞤧'), + (0x1E906, 'M', '𞤨'), + (0x1E907, 'M', '𞤩'), + (0x1E908, 'M', '𞤪'), + (0x1E909, 'M', '𞤫'), + ] + +def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E90A, 'M', '𞤬'), + (0x1E90B, 'M', '𞤭'), + (0x1E90C, 'M', '𞤮'), + (0x1E90D, 'M', '𞤯'), + (0x1E90E, 'M', '𞤰'), + (0x1E90F, 'M', '𞤱'), + (0x1E910, 'M', '𞤲'), + (0x1E911, 'M', '𞤳'), + (0x1E912, 'M', '𞤴'), + (0x1E913, 'M', '𞤵'), + (0x1E914, 'M', '𞤶'), + (0x1E915, 'M', '𞤷'), + (0x1E916, 'M', '𞤸'), + (0x1E917, 'M', '𞤹'), + (0x1E918, 'M', '𞤺'), + (0x1E919, 'M', '𞤻'), + (0x1E91A, 'M', '𞤼'), + (0x1E91B, 'M', '𞤽'), + (0x1E91C, 'M', '𞤾'), + (0x1E91D, 'M', '𞤿'), + (0x1E91E, 'M', '𞥀'), + (0x1E91F, 'M', '𞥁'), + (0x1E920, 'M', '𞥂'), + (0x1E921, 'M', '𞥃'), (0x1E922, 'V'), (0x1E94C, 'X'), (0x1E950, 'V'), (0x1E95A, 'X'), (0x1E95E, 'V'), (0x1E960, 'X'), - ] - -def _seg_70(): - return [ (0x1EC71, 'V'), (0x1ECB5, 'X'), (0x1ED01, 'V'), (0x1ED3E, 'X'), - (0x1EE00, 'M', u'ا'), - (0x1EE01, 'M', u'ب'), - (0x1EE02, 'M', u'ج'), - (0x1EE03, 'M', u'د'), + (0x1EE00, 'M', 'ا'), + (0x1EE01, 'M', 'ب'), + (0x1EE02, 'M', 'ج'), + (0x1EE03, 'M', 'د'), (0x1EE04, 'X'), - (0x1EE05, 'M', u'و'), - (0x1EE06, 'M', u'ز'), - (0x1EE07, 'M', u'ح'), - (0x1EE08, 'M', u'ط'), - (0x1EE09, 'M', u'ي'), - (0x1EE0A, 'M', u'ك'), - (0x1EE0B, 'M', u'ل'), - (0x1EE0C, 'M', u'م'), - (0x1EE0D, 'M', u'ن'), - (0x1EE0E, 'M', u'س'), - (0x1EE0F, 'M', u'ع'), - (0x1EE10, 'M', u'ف'), - (0x1EE11, 'M', u'ص'), - (0x1EE12, 'M', u'ق'), - (0x1EE13, 'M', u'ر'), - (0x1EE14, 'M', u'ش'), - (0x1EE15, 'M', u'ت'), - (0x1EE16, 'M', u'ث'), - (0x1EE17, 'M', u'خ'), - (0x1EE18, 'M', u'ذ'), - (0x1EE19, 'M', u'ض'), - (0x1EE1A, 'M', u'ظ'), - (0x1EE1B, 'M', u'غ'), - (0x1EE1C, 'M', u'ٮ'), - (0x1EE1D, 'M', u'ں'), - (0x1EE1E, 'M', u'ڡ'), - (0x1EE1F, 'M', u'ٯ'), + (0x1EE05, 'M', 'و'), + (0x1EE06, 'M', 'ز'), + (0x1EE07, 'M', 'ح'), + (0x1EE08, 'M', 'ط'), + (0x1EE09, 'M', 'ي'), + (0x1EE0A, 'M', 'ك'), + (0x1EE0B, 'M', 'ل'), + (0x1EE0C, 'M', 'م'), + (0x1EE0D, 'M', 'ن'), + (0x1EE0E, 'M', 'س'), + (0x1EE0F, 'M', 'ع'), + (0x1EE10, 'M', 'ف'), + (0x1EE11, 'M', 'ص'), + (0x1EE12, 'M', 'ق'), + (0x1EE13, 'M', 'ر'), + (0x1EE14, 'M', 'ش'), + (0x1EE15, 'M', 'ت'), + (0x1EE16, 'M', 'ث'), + (0x1EE17, 'M', 'خ'), + (0x1EE18, 'M', 'ذ'), + (0x1EE19, 'M', 'ض'), + (0x1EE1A, 'M', 'ظ'), + (0x1EE1B, 'M', 'غ'), + (0x1EE1C, 'M', 'ٮ'), + (0x1EE1D, 'M', 'ں'), + (0x1EE1E, 'M', 'ڡ'), + (0x1EE1F, 'M', 'ٯ'), (0x1EE20, 'X'), - (0x1EE21, 'M', u'ب'), - (0x1EE22, 'M', u'ج'), + (0x1EE21, 'M', 'ب'), + (0x1EE22, 'M', 'ج'), (0x1EE23, 'X'), - (0x1EE24, 'M', u'ه'), + (0x1EE24, 'M', 'ه'), (0x1EE25, 'X'), - (0x1EE27, 'M', u'ح'), + (0x1EE27, 'M', 'ح'), (0x1EE28, 'X'), - (0x1EE29, 'M', u'ي'), - (0x1EE2A, 'M', u'ك'), - (0x1EE2B, 'M', u'ل'), - (0x1EE2C, 'M', u'م'), - (0x1EE2D, 'M', u'ن'), - (0x1EE2E, 'M', u'س'), - (0x1EE2F, 'M', u'ع'), - (0x1EE30, 'M', u'ف'), - (0x1EE31, 'M', u'ص'), - (0x1EE32, 'M', u'ق'), + (0x1EE29, 'M', 'ي'), + (0x1EE2A, 'M', 'ك'), + (0x1EE2B, 'M', 'ل'), + (0x1EE2C, 'M', 'م'), + (0x1EE2D, 'M', 'ن'), + (0x1EE2E, 'M', 'س'), + (0x1EE2F, 'M', 'ع'), + (0x1EE30, 'M', 'ف'), + (0x1EE31, 'M', 'ص'), + (0x1EE32, 'M', 'ق'), (0x1EE33, 'X'), - (0x1EE34, 'M', u'ش'), - (0x1EE35, 'M', u'ت'), - (0x1EE36, 'M', u'ث'), - (0x1EE37, 'M', u'خ'), + (0x1EE34, 'M', 'ش'), + (0x1EE35, 'M', 'ت'), + (0x1EE36, 'M', 'ث'), + (0x1EE37, 'M', 'خ'), (0x1EE38, 'X'), - (0x1EE39, 'M', u'ض'), + (0x1EE39, 'M', 'ض'), (0x1EE3A, 'X'), - (0x1EE3B, 'M', u'غ'), + (0x1EE3B, 'M', 'غ'), (0x1EE3C, 'X'), - (0x1EE42, 'M', u'ج'), + (0x1EE42, 'M', 'ج'), (0x1EE43, 'X'), - (0x1EE47, 'M', u'ح'), + (0x1EE47, 'M', 'ح'), (0x1EE48, 'X'), - (0x1EE49, 'M', u'ي'), + (0x1EE49, 'M', 'ي'), (0x1EE4A, 'X'), - (0x1EE4B, 'M', u'ل'), - (0x1EE4C, 'X'), - (0x1EE4D, 'M', u'ن'), - (0x1EE4E, 'M', u'س'), - (0x1EE4F, 'M', u'ع'), - (0x1EE50, 'X'), - (0x1EE51, 'M', u'ص'), - (0x1EE52, 'M', u'ق'), - (0x1EE53, 'X'), - (0x1EE54, 'M', u'ش'), - (0x1EE55, 'X'), - (0x1EE57, 'M', u'خ'), - (0x1EE58, 'X'), - (0x1EE59, 'M', u'ض'), - (0x1EE5A, 'X'), - (0x1EE5B, 'M', u'غ'), - (0x1EE5C, 'X'), - (0x1EE5D, 'M', u'ں'), - (0x1EE5E, 'X'), - (0x1EE5F, 'M', u'ٯ'), - (0x1EE60, 'X'), - (0x1EE61, 'M', u'ب'), - (0x1EE62, 'M', u'ج'), - (0x1EE63, 'X'), - (0x1EE64, 'M', u'ه'), - (0x1EE65, 'X'), - (0x1EE67, 'M', u'ح'), - (0x1EE68, 'M', u'ط'), - (0x1EE69, 'M', u'ي'), - (0x1EE6A, 'M', u'ك'), ] -def _seg_71(): +def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ + (0x1EE4B, 'M', 'ل'), + (0x1EE4C, 'X'), + (0x1EE4D, 'M', 'ن'), + (0x1EE4E, 'M', 'س'), + (0x1EE4F, 'M', 'ع'), + (0x1EE50, 'X'), + (0x1EE51, 'M', 'ص'), + (0x1EE52, 'M', 'ق'), + (0x1EE53, 'X'), + (0x1EE54, 'M', 'ش'), + (0x1EE55, 'X'), + (0x1EE57, 'M', 'خ'), + (0x1EE58, 'X'), + (0x1EE59, 'M', 'ض'), + (0x1EE5A, 'X'), + (0x1EE5B, 'M', 'غ'), + (0x1EE5C, 'X'), + (0x1EE5D, 'M', 'ں'), + (0x1EE5E, 'X'), + (0x1EE5F, 'M', 'ٯ'), + (0x1EE60, 'X'), + (0x1EE61, 'M', 'ب'), + (0x1EE62, 'M', 'ج'), + (0x1EE63, 'X'), + (0x1EE64, 'M', 'ه'), + (0x1EE65, 'X'), + (0x1EE67, 'M', 'ح'), + (0x1EE68, 'M', 'ط'), + (0x1EE69, 'M', 'ي'), + (0x1EE6A, 'M', 'ك'), (0x1EE6B, 'X'), - (0x1EE6C, 'M', u'م'), - (0x1EE6D, 'M', u'ن'), - (0x1EE6E, 'M', u'س'), - (0x1EE6F, 'M', u'ع'), - (0x1EE70, 'M', u'ف'), - (0x1EE71, 'M', u'ص'), - (0x1EE72, 'M', u'ق'), + (0x1EE6C, 'M', 'م'), + (0x1EE6D, 'M', 'ن'), + (0x1EE6E, 'M', 'س'), + (0x1EE6F, 'M', 'ع'), + (0x1EE70, 'M', 'ف'), + (0x1EE71, 'M', 'ص'), + (0x1EE72, 'M', 'ق'), (0x1EE73, 'X'), - (0x1EE74, 'M', u'ش'), - (0x1EE75, 'M', u'ت'), - (0x1EE76, 'M', u'ث'), - (0x1EE77, 'M', u'خ'), + (0x1EE74, 'M', 'ش'), + (0x1EE75, 'M', 'ت'), + (0x1EE76, 'M', 'ث'), + (0x1EE77, 'M', 'خ'), (0x1EE78, 'X'), - (0x1EE79, 'M', u'ض'), - (0x1EE7A, 'M', u'ظ'), - (0x1EE7B, 'M', u'غ'), - (0x1EE7C, 'M', u'ٮ'), + (0x1EE79, 'M', 'ض'), + (0x1EE7A, 'M', 'ظ'), + (0x1EE7B, 'M', 'غ'), + (0x1EE7C, 'M', 'ٮ'), (0x1EE7D, 'X'), - (0x1EE7E, 'M', u'ڡ'), + (0x1EE7E, 'M', 'ڡ'), (0x1EE7F, 'X'), - (0x1EE80, 'M', u'ا'), - (0x1EE81, 'M', u'ب'), - (0x1EE82, 'M', u'ج'), - (0x1EE83, 'M', u'د'), - (0x1EE84, 'M', u'ه'), - (0x1EE85, 'M', u'و'), - (0x1EE86, 'M', u'ز'), - (0x1EE87, 'M', u'ح'), - (0x1EE88, 'M', u'ط'), - (0x1EE89, 'M', u'ي'), + (0x1EE80, 'M', 'ا'), + (0x1EE81, 'M', 'ب'), + (0x1EE82, 'M', 'ج'), + (0x1EE83, 'M', 'د'), + (0x1EE84, 'M', 'ه'), + (0x1EE85, 'M', 'و'), + (0x1EE86, 'M', 'ز'), + (0x1EE87, 'M', 'ح'), + (0x1EE88, 'M', 'ط'), + (0x1EE89, 'M', 'ي'), (0x1EE8A, 'X'), - (0x1EE8B, 'M', u'ل'), - (0x1EE8C, 'M', u'م'), - (0x1EE8D, 'M', u'ن'), - (0x1EE8E, 'M', u'س'), - (0x1EE8F, 'M', u'ع'), - (0x1EE90, 'M', u'ف'), - (0x1EE91, 'M', u'ص'), - (0x1EE92, 'M', u'ق'), - (0x1EE93, 'M', u'ر'), - (0x1EE94, 'M', u'ش'), - (0x1EE95, 'M', u'ت'), - (0x1EE96, 'M', u'ث'), - (0x1EE97, 'M', u'خ'), - (0x1EE98, 'M', u'ذ'), - (0x1EE99, 'M', u'ض'), - (0x1EE9A, 'M', u'ظ'), - (0x1EE9B, 'M', u'غ'), + (0x1EE8B, 'M', 'ل'), + (0x1EE8C, 'M', 'م'), + (0x1EE8D, 'M', 'ن'), + (0x1EE8E, 'M', 'س'), + (0x1EE8F, 'M', 'ع'), + (0x1EE90, 'M', 'ف'), + (0x1EE91, 'M', 'ص'), + (0x1EE92, 'M', 'ق'), + (0x1EE93, 'M', 'ر'), + (0x1EE94, 'M', 'ش'), + (0x1EE95, 'M', 'ت'), + (0x1EE96, 'M', 'ث'), + (0x1EE97, 'M', 'خ'), + (0x1EE98, 'M', 'ذ'), + (0x1EE99, 'M', 'ض'), + (0x1EE9A, 'M', 'ظ'), + (0x1EE9B, 'M', 'غ'), (0x1EE9C, 'X'), - (0x1EEA1, 'M', u'ب'), - (0x1EEA2, 'M', u'ج'), - (0x1EEA3, 'M', u'د'), + (0x1EEA1, 'M', 'ب'), + (0x1EEA2, 'M', 'ج'), + (0x1EEA3, 'M', 'د'), (0x1EEA4, 'X'), - (0x1EEA5, 'M', u'و'), - (0x1EEA6, 'M', u'ز'), - (0x1EEA7, 'M', u'ح'), - (0x1EEA8, 'M', u'ط'), - (0x1EEA9, 'M', u'ي'), + (0x1EEA5, 'M', 'و'), + (0x1EEA6, 'M', 'ز'), + (0x1EEA7, 'M', 'ح'), + (0x1EEA8, 'M', 'ط'), + (0x1EEA9, 'M', 'ي'), (0x1EEAA, 'X'), - (0x1EEAB, 'M', u'ل'), - (0x1EEAC, 'M', u'م'), - (0x1EEAD, 'M', u'ن'), - (0x1EEAE, 'M', u'س'), - (0x1EEAF, 'M', u'ع'), - (0x1EEB0, 'M', u'ف'), - (0x1EEB1, 'M', u'ص'), - (0x1EEB2, 'M', u'ق'), - (0x1EEB3, 'M', u'ر'), - (0x1EEB4, 'M', u'ش'), - (0x1EEB5, 'M', u'ت'), - (0x1EEB6, 'M', u'ث'), - (0x1EEB7, 'M', u'خ'), - (0x1EEB8, 'M', u'ذ'), - (0x1EEB9, 'M', u'ض'), - (0x1EEBA, 'M', u'ظ'), - (0x1EEBB, 'M', u'غ'), + (0x1EEAB, 'M', 'ل'), + (0x1EEAC, 'M', 'م'), + (0x1EEAD, 'M', 'ن'), + (0x1EEAE, 'M', 'س'), + (0x1EEAF, 'M', 'ع'), + (0x1EEB0, 'M', 'ف'), + (0x1EEB1, 'M', 'ص'), + (0x1EEB2, 'M', 'ق'), + (0x1EEB3, 'M', 'ر'), + (0x1EEB4, 'M', 'ش'), + ] + +def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EEB5, 'M', 'ت'), + (0x1EEB6, 'M', 'ث'), + (0x1EEB7, 'M', 'خ'), + (0x1EEB8, 'M', 'ذ'), + (0x1EEB9, 'M', 'ض'), + (0x1EEBA, 'M', 'ظ'), + (0x1EEBB, 'M', 'غ'), (0x1EEBC, 'X'), (0x1EEF0, 'V'), (0x1EEF2, 'X'), @@ -7483,174 +7728,172 @@ def _seg_71(): (0x1F0D0, 'X'), (0x1F0D1, 'V'), (0x1F0F6, 'X'), - (0x1F101, '3', u'0,'), - (0x1F102, '3', u'1,'), - (0x1F103, '3', u'2,'), - (0x1F104, '3', u'3,'), - (0x1F105, '3', u'4,'), - (0x1F106, '3', u'5,'), - (0x1F107, '3', u'6,'), - (0x1F108, '3', u'7,'), + (0x1F101, '3', '0,'), + (0x1F102, '3', '1,'), + (0x1F103, '3', '2,'), + (0x1F104, '3', '3,'), + (0x1F105, '3', '4,'), + (0x1F106, '3', '5,'), + (0x1F107, '3', '6,'), + (0x1F108, '3', '7,'), + (0x1F109, '3', '8,'), + (0x1F10A, '3', '9,'), + (0x1F10B, 'V'), + (0x1F110, '3', '(a)'), + (0x1F111, '3', '(b)'), + (0x1F112, '3', '(c)'), + (0x1F113, '3', '(d)'), + (0x1F114, '3', '(e)'), + (0x1F115, '3', '(f)'), + (0x1F116, '3', '(g)'), + (0x1F117, '3', '(h)'), + (0x1F118, '3', '(i)'), + (0x1F119, '3', '(j)'), + (0x1F11A, '3', '(k)'), + (0x1F11B, '3', '(l)'), + (0x1F11C, '3', '(m)'), + (0x1F11D, '3', '(n)'), + (0x1F11E, '3', '(o)'), + (0x1F11F, '3', '(p)'), + (0x1F120, '3', '(q)'), + (0x1F121, '3', '(r)'), + (0x1F122, '3', '(s)'), + (0x1F123, '3', '(t)'), + (0x1F124, '3', '(u)'), + (0x1F125, '3', '(v)'), + (0x1F126, '3', '(w)'), + (0x1F127, '3', '(x)'), + (0x1F128, '3', '(y)'), + (0x1F129, '3', '(z)'), + (0x1F12A, 'M', '〔s〕'), + (0x1F12B, 'M', 'c'), + (0x1F12C, 'M', 'r'), + (0x1F12D, 'M', 'cd'), + (0x1F12E, 'M', 'wz'), + (0x1F12F, 'V'), + (0x1F130, 'M', 'a'), + (0x1F131, 'M', 'b'), + (0x1F132, 'M', 'c'), + (0x1F133, 'M', 'd'), + (0x1F134, 'M', 'e'), + (0x1F135, 'M', 'f'), + (0x1F136, 'M', 'g'), + (0x1F137, 'M', 'h'), + (0x1F138, 'M', 'i'), + (0x1F139, 'M', 'j'), + (0x1F13A, 'M', 'k'), + (0x1F13B, 'M', 'l'), + (0x1F13C, 'M', 'm'), + (0x1F13D, 'M', 'n'), + (0x1F13E, 'M', 'o'), + (0x1F13F, 'M', 'p'), + (0x1F140, 'M', 'q'), + (0x1F141, 'M', 'r'), + (0x1F142, 'M', 's'), + (0x1F143, 'M', 't'), + (0x1F144, 'M', 'u'), + (0x1F145, 'M', 'v'), + (0x1F146, 'M', 'w'), + (0x1F147, 'M', 'x'), + (0x1F148, 'M', 'y'), + (0x1F149, 'M', 'z'), + (0x1F14A, 'M', 'hv'), + (0x1F14B, 'M', 'mv'), + (0x1F14C, 'M', 'sd'), + (0x1F14D, 'M', 'ss'), + (0x1F14E, 'M', 'ppv'), + (0x1F14F, 'M', 'wc'), + (0x1F150, 'V'), + (0x1F16A, 'M', 'mc'), + (0x1F16B, 'M', 'md'), ] -def _seg_72(): +def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x1F109, '3', u'8,'), - (0x1F10A, '3', u'9,'), - (0x1F10B, 'V'), - (0x1F110, '3', u'(a)'), - (0x1F111, '3', u'(b)'), - (0x1F112, '3', u'(c)'), - (0x1F113, '3', u'(d)'), - (0x1F114, '3', u'(e)'), - (0x1F115, '3', u'(f)'), - (0x1F116, '3', u'(g)'), - (0x1F117, '3', u'(h)'), - (0x1F118, '3', u'(i)'), - (0x1F119, '3', u'(j)'), - (0x1F11A, '3', u'(k)'), - (0x1F11B, '3', u'(l)'), - (0x1F11C, '3', u'(m)'), - (0x1F11D, '3', u'(n)'), - (0x1F11E, '3', u'(o)'), - (0x1F11F, '3', u'(p)'), - (0x1F120, '3', u'(q)'), - (0x1F121, '3', u'(r)'), - (0x1F122, '3', u'(s)'), - (0x1F123, '3', u'(t)'), - (0x1F124, '3', u'(u)'), - (0x1F125, '3', u'(v)'), - (0x1F126, '3', u'(w)'), - (0x1F127, '3', u'(x)'), - (0x1F128, '3', u'(y)'), - (0x1F129, '3', u'(z)'), - (0x1F12A, 'M', u'〔s〕'), - (0x1F12B, 'M', u'c'), - (0x1F12C, 'M', u'r'), - (0x1F12D, 'M', u'cd'), - (0x1F12E, 'M', u'wz'), - (0x1F12F, 'V'), - (0x1F130, 'M', u'a'), - (0x1F131, 'M', u'b'), - (0x1F132, 'M', u'c'), - (0x1F133, 'M', u'd'), - (0x1F134, 'M', u'e'), - (0x1F135, 'M', u'f'), - (0x1F136, 'M', u'g'), - (0x1F137, 'M', u'h'), - (0x1F138, 'M', u'i'), - (0x1F139, 'M', u'j'), - (0x1F13A, 'M', u'k'), - (0x1F13B, 'M', u'l'), - (0x1F13C, 'M', u'm'), - (0x1F13D, 'M', u'n'), - (0x1F13E, 'M', u'o'), - (0x1F13F, 'M', u'p'), - (0x1F140, 'M', u'q'), - (0x1F141, 'M', u'r'), - (0x1F142, 'M', u's'), - (0x1F143, 'M', u't'), - (0x1F144, 'M', u'u'), - (0x1F145, 'M', u'v'), - (0x1F146, 'M', u'w'), - (0x1F147, 'M', u'x'), - (0x1F148, 'M', u'y'), - (0x1F149, 'M', u'z'), - (0x1F14A, 'M', u'hv'), - (0x1F14B, 'M', u'mv'), - (0x1F14C, 'M', u'sd'), - (0x1F14D, 'M', u'ss'), - (0x1F14E, 'M', u'ppv'), - (0x1F14F, 'M', u'wc'), - (0x1F150, 'V'), - (0x1F16A, 'M', u'mc'), - (0x1F16B, 'M', u'md'), - (0x1F16C, 'M', u'mr'), + (0x1F16C, 'M', 'mr'), (0x1F16D, 'V'), - (0x1F190, 'M', u'dj'), + (0x1F190, 'M', 'dj'), (0x1F191, 'V'), (0x1F1AE, 'X'), (0x1F1E6, 'V'), - (0x1F200, 'M', u'ほか'), - (0x1F201, 'M', u'ココ'), - (0x1F202, 'M', u'サ'), + (0x1F200, 'M', 'ほか'), + (0x1F201, 'M', 'ココ'), + (0x1F202, 'M', 'サ'), (0x1F203, 'X'), - (0x1F210, 'M', u'手'), - (0x1F211, 'M', u'字'), - (0x1F212, 'M', u'双'), - (0x1F213, 'M', u'デ'), - (0x1F214, 'M', u'二'), - (0x1F215, 'M', u'多'), - (0x1F216, 'M', u'解'), - (0x1F217, 'M', u'天'), - (0x1F218, 'M', u'交'), - (0x1F219, 'M', u'映'), - (0x1F21A, 'M', u'無'), - (0x1F21B, 'M', u'料'), - (0x1F21C, 'M', u'前'), - (0x1F21D, 'M', u'後'), - (0x1F21E, 'M', u'再'), - (0x1F21F, 'M', u'新'), - (0x1F220, 'M', u'初'), - (0x1F221, 'M', u'終'), - (0x1F222, 'M', u'生'), - (0x1F223, 'M', u'販'), - ] - -def _seg_73(): - return [ - (0x1F224, 'M', u'声'), - (0x1F225, 'M', u'吹'), - (0x1F226, 'M', u'演'), - (0x1F227, 'M', u'投'), - (0x1F228, 'M', u'捕'), - (0x1F229, 'M', u'一'), - (0x1F22A, 'M', u'三'), - (0x1F22B, 'M', u'遊'), - (0x1F22C, 'M', u'左'), - (0x1F22D, 'M', u'中'), - (0x1F22E, 'M', u'右'), - (0x1F22F, 'M', u'指'), - (0x1F230, 'M', u'走'), - (0x1F231, 'M', u'打'), - (0x1F232, 'M', u'禁'), - (0x1F233, 'M', u'空'), - (0x1F234, 'M', u'合'), - (0x1F235, 'M', u'満'), - (0x1F236, 'M', u'有'), - (0x1F237, 'M', u'月'), - (0x1F238, 'M', u'申'), - (0x1F239, 'M', u'割'), - (0x1F23A, 'M', u'営'), - (0x1F23B, 'M', u'配'), + (0x1F210, 'M', '手'), + (0x1F211, 'M', '字'), + (0x1F212, 'M', '双'), + (0x1F213, 'M', 'デ'), + (0x1F214, 'M', '二'), + (0x1F215, 'M', '多'), + (0x1F216, 'M', '解'), + (0x1F217, 'M', '天'), + (0x1F218, 'M', '交'), + (0x1F219, 'M', '映'), + (0x1F21A, 'M', '無'), + (0x1F21B, 'M', '料'), + (0x1F21C, 'M', '前'), + (0x1F21D, 'M', '後'), + (0x1F21E, 'M', '再'), + (0x1F21F, 'M', '新'), + (0x1F220, 'M', '初'), + (0x1F221, 'M', '終'), + (0x1F222, 'M', '生'), + (0x1F223, 'M', '販'), + (0x1F224, 'M', '声'), + (0x1F225, 'M', '吹'), + (0x1F226, 'M', '演'), + (0x1F227, 'M', '投'), + (0x1F228, 'M', '捕'), + (0x1F229, 'M', '一'), + (0x1F22A, 'M', '三'), + (0x1F22B, 'M', '遊'), + (0x1F22C, 'M', '左'), + (0x1F22D, 'M', '中'), + (0x1F22E, 'M', '右'), + (0x1F22F, 'M', '指'), + (0x1F230, 'M', '走'), + (0x1F231, 'M', '打'), + (0x1F232, 'M', '禁'), + (0x1F233, 'M', '空'), + (0x1F234, 'M', '合'), + (0x1F235, 'M', '満'), + (0x1F236, 'M', '有'), + (0x1F237, 'M', '月'), + (0x1F238, 'M', '申'), + (0x1F239, 'M', '割'), + (0x1F23A, 'M', '営'), + (0x1F23B, 'M', '配'), (0x1F23C, 'X'), - (0x1F240, 'M', u'〔本〕'), - (0x1F241, 'M', u'〔三〕'), - (0x1F242, 'M', u'〔二〕'), - (0x1F243, 'M', u'〔安〕'), - (0x1F244, 'M', u'〔点〕'), - (0x1F245, 'M', u'〔打〕'), - (0x1F246, 'M', u'〔盗〕'), - (0x1F247, 'M', u'〔勝〕'), - (0x1F248, 'M', u'〔敗〕'), + (0x1F240, 'M', '〔本〕'), + (0x1F241, 'M', '〔三〕'), + (0x1F242, 'M', '〔二〕'), + (0x1F243, 'M', '〔安〕'), + (0x1F244, 'M', '〔点〕'), + (0x1F245, 'M', '〔打〕'), + (0x1F246, 'M', '〔盗〕'), + (0x1F247, 'M', '〔勝〕'), + (0x1F248, 'M', '〔敗〕'), (0x1F249, 'X'), - (0x1F250, 'M', u'得'), - (0x1F251, 'M', u'可'), + (0x1F250, 'M', '得'), + (0x1F251, 'M', '可'), (0x1F252, 'X'), (0x1F260, 'V'), (0x1F266, 'X'), (0x1F300, 'V'), (0x1F6D8, 'X'), - (0x1F6E0, 'V'), + (0x1F6DC, 'V'), (0x1F6ED, 'X'), (0x1F6F0, 'V'), (0x1F6FD, 'X'), (0x1F700, 'V'), - (0x1F774, 'X'), - (0x1F780, 'V'), - (0x1F7D9, 'X'), + (0x1F777, 'X'), + (0x1F77B, 'V'), + (0x1F7DA, 'X'), (0x1F7E0, 'V'), (0x1F7EC, 'X'), + (0x1F7F0, 'V'), + (0x1F7F1, 'X'), (0x1F800, 'V'), (0x1F80C, 'X'), (0x1F810, 'V'), @@ -7664,611 +7907,609 @@ def _seg_73(): (0x1F8B0, 'V'), (0x1F8B2, 'X'), (0x1F900, 'V'), - (0x1F979, 'X'), - (0x1F97A, 'V'), - (0x1F9CC, 'X'), - (0x1F9CD, 'V'), (0x1FA54, 'X'), (0x1FA60, 'V'), (0x1FA6E, 'X'), + ] + +def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1FA70, 'V'), - (0x1FA75, 'X'), - (0x1FA78, 'V'), - (0x1FA7B, 'X'), + (0x1FA7D, 'X'), (0x1FA80, 'V'), - (0x1FA87, 'X'), + (0x1FA89, 'X'), (0x1FA90, 'V'), - (0x1FAA9, 'X'), - (0x1FAB0, 'V'), - (0x1FAB7, 'X'), - (0x1FAC0, 'V'), - (0x1FAC3, 'X'), - (0x1FAD0, 'V'), - (0x1FAD7, 'X'), + (0x1FABE, 'X'), + (0x1FABF, 'V'), + (0x1FAC6, 'X'), + (0x1FACE, 'V'), + (0x1FADC, 'X'), + (0x1FAE0, 'V'), + (0x1FAE9, 'X'), + (0x1FAF0, 'V'), + (0x1FAF9, 'X'), (0x1FB00, 'V'), (0x1FB93, 'X'), (0x1FB94, 'V'), (0x1FBCB, 'X'), - (0x1FBF0, 'M', u'0'), - (0x1FBF1, 'M', u'1'), - (0x1FBF2, 'M', u'2'), - (0x1FBF3, 'M', u'3'), - (0x1FBF4, 'M', u'4'), - (0x1FBF5, 'M', u'5'), - (0x1FBF6, 'M', u'6'), - (0x1FBF7, 'M', u'7'), - (0x1FBF8, 'M', u'8'), - (0x1FBF9, 'M', u'9'), - ] - -def _seg_74(): - return [ + (0x1FBF0, 'M', '0'), + (0x1FBF1, 'M', '1'), + (0x1FBF2, 'M', '2'), + (0x1FBF3, 'M', '3'), + (0x1FBF4, 'M', '4'), + (0x1FBF5, 'M', '5'), + (0x1FBF6, 'M', '6'), + (0x1FBF7, 'M', '7'), + (0x1FBF8, 'M', '8'), + (0x1FBF9, 'M', '9'), (0x1FBFA, 'X'), (0x20000, 'V'), - (0x2A6DE, 'X'), + (0x2A6E0, 'X'), (0x2A700, 'V'), - (0x2B735, 'X'), + (0x2B73A, 'X'), (0x2B740, 'V'), (0x2B81E, 'X'), (0x2B820, 'V'), (0x2CEA2, 'X'), (0x2CEB0, 'V'), (0x2EBE1, 'X'), - (0x2F800, 'M', u'丽'), - (0x2F801, 'M', u'丸'), - (0x2F802, 'M', u'乁'), - (0x2F803, 'M', u'𠄢'), - (0x2F804, 'M', u'你'), - (0x2F805, 'M', u'侮'), - (0x2F806, 'M', u'侻'), - (0x2F807, 'M', u'倂'), - (0x2F808, 'M', u'偺'), - (0x2F809, 'M', u'備'), - (0x2F80A, 'M', u'僧'), - (0x2F80B, 'M', u'像'), - (0x2F80C, 'M', u'㒞'), - (0x2F80D, 'M', u'𠘺'), - (0x2F80E, 'M', u'免'), - (0x2F80F, 'M', u'兔'), - (0x2F810, 'M', u'兤'), - (0x2F811, 'M', u'具'), - (0x2F812, 'M', u'𠔜'), - (0x2F813, 'M', u'㒹'), - (0x2F814, 'M', u'內'), - (0x2F815, 'M', u'再'), - (0x2F816, 'M', u'𠕋'), - (0x2F817, 'M', u'冗'), - (0x2F818, 'M', u'冤'), - (0x2F819, 'M', u'仌'), - (0x2F81A, 'M', u'冬'), - (0x2F81B, 'M', u'况'), - (0x2F81C, 'M', u'𩇟'), - (0x2F81D, 'M', u'凵'), - (0x2F81E, 'M', u'刃'), - (0x2F81F, 'M', u'㓟'), - (0x2F820, 'M', u'刻'), - (0x2F821, 'M', u'剆'), - (0x2F822, 'M', u'割'), - (0x2F823, 'M', u'剷'), - (0x2F824, 'M', u'㔕'), - (0x2F825, 'M', u'勇'), - (0x2F826, 'M', u'勉'), - (0x2F827, 'M', u'勤'), - (0x2F828, 'M', u'勺'), - (0x2F829, 'M', u'包'), - (0x2F82A, 'M', u'匆'), - (0x2F82B, 'M', u'北'), - (0x2F82C, 'M', u'卉'), - (0x2F82D, 'M', u'卑'), - (0x2F82E, 'M', u'博'), - (0x2F82F, 'M', u'即'), - (0x2F830, 'M', u'卽'), - (0x2F831, 'M', u'卿'), - (0x2F834, 'M', u'𠨬'), - (0x2F835, 'M', u'灰'), - (0x2F836, 'M', u'及'), - (0x2F837, 'M', u'叟'), - (0x2F838, 'M', u'𠭣'), - (0x2F839, 'M', u'叫'), - (0x2F83A, 'M', u'叱'), - (0x2F83B, 'M', u'吆'), - (0x2F83C, 'M', u'咞'), - (0x2F83D, 'M', u'吸'), - (0x2F83E, 'M', u'呈'), - (0x2F83F, 'M', u'周'), - (0x2F840, 'M', u'咢'), - (0x2F841, 'M', u'哶'), - (0x2F842, 'M', u'唐'), - (0x2F843, 'M', u'啓'), - (0x2F844, 'M', u'啣'), - (0x2F845, 'M', u'善'), - (0x2F847, 'M', u'喙'), - (0x2F848, 'M', u'喫'), - (0x2F849, 'M', u'喳'), - (0x2F84A, 'M', u'嗂'), - (0x2F84B, 'M', u'圖'), - (0x2F84C, 'M', u'嘆'), - (0x2F84D, 'M', u'圗'), - (0x2F84E, 'M', u'噑'), - (0x2F84F, 'M', u'噴'), - (0x2F850, 'M', u'切'), - (0x2F851, 'M', u'壮'), - (0x2F852, 'M', u'城'), - (0x2F853, 'M', u'埴'), - (0x2F854, 'M', u'堍'), - (0x2F855, 'M', u'型'), - (0x2F856, 'M', u'堲'), - (0x2F857, 'M', u'報'), - (0x2F858, 'M', u'墬'), - (0x2F859, 'M', u'𡓤'), - (0x2F85A, 'M', u'売'), - (0x2F85B, 'M', u'壷'), + (0x2F800, 'M', '丽'), + (0x2F801, 'M', '丸'), + (0x2F802, 'M', '乁'), + (0x2F803, 'M', '𠄢'), + (0x2F804, 'M', '你'), + (0x2F805, 'M', '侮'), + (0x2F806, 'M', '侻'), + (0x2F807, 'M', '倂'), + (0x2F808, 'M', '偺'), + (0x2F809, 'M', '備'), + (0x2F80A, 'M', '僧'), + (0x2F80B, 'M', '像'), + (0x2F80C, 'M', '㒞'), + (0x2F80D, 'M', '𠘺'), + (0x2F80E, 'M', '免'), + (0x2F80F, 'M', '兔'), + (0x2F810, 'M', '兤'), + (0x2F811, 'M', '具'), + (0x2F812, 'M', '𠔜'), + (0x2F813, 'M', '㒹'), + (0x2F814, 'M', '內'), + (0x2F815, 'M', '再'), + (0x2F816, 'M', '𠕋'), + (0x2F817, 'M', '冗'), + (0x2F818, 'M', '冤'), + (0x2F819, 'M', '仌'), + (0x2F81A, 'M', '冬'), + (0x2F81B, 'M', '况'), + (0x2F81C, 'M', '𩇟'), + (0x2F81D, 'M', '凵'), + (0x2F81E, 'M', '刃'), + (0x2F81F, 'M', '㓟'), + (0x2F820, 'M', '刻'), + (0x2F821, 'M', '剆'), + (0x2F822, 'M', '割'), + (0x2F823, 'M', '剷'), + (0x2F824, 'M', '㔕'), + (0x2F825, 'M', '勇'), + (0x2F826, 'M', '勉'), + (0x2F827, 'M', '勤'), + (0x2F828, 'M', '勺'), + (0x2F829, 'M', '包'), + (0x2F82A, 'M', '匆'), + (0x2F82B, 'M', '北'), + (0x2F82C, 'M', '卉'), + (0x2F82D, 'M', '卑'), + (0x2F82E, 'M', '博'), + (0x2F82F, 'M', '即'), + (0x2F830, 'M', '卽'), + (0x2F831, 'M', '卿'), + (0x2F834, 'M', '𠨬'), + (0x2F835, 'M', '灰'), + (0x2F836, 'M', '及'), + (0x2F837, 'M', '叟'), + (0x2F838, 'M', '𠭣'), + (0x2F839, 'M', '叫'), + (0x2F83A, 'M', '叱'), + (0x2F83B, 'M', '吆'), + (0x2F83C, 'M', '咞'), + (0x2F83D, 'M', '吸'), + (0x2F83E, 'M', '呈'), ] -def _seg_75(): +def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2F85C, 'M', u'夆'), - (0x2F85D, 'M', u'多'), - (0x2F85E, 'M', u'夢'), - (0x2F85F, 'M', u'奢'), - (0x2F860, 'M', u'𡚨'), - (0x2F861, 'M', u'𡛪'), - (0x2F862, 'M', u'姬'), - (0x2F863, 'M', u'娛'), - (0x2F864, 'M', u'娧'), - (0x2F865, 'M', u'姘'), - (0x2F866, 'M', u'婦'), - (0x2F867, 'M', u'㛮'), + (0x2F83F, 'M', '周'), + (0x2F840, 'M', '咢'), + (0x2F841, 'M', '哶'), + (0x2F842, 'M', '唐'), + (0x2F843, 'M', '啓'), + (0x2F844, 'M', '啣'), + (0x2F845, 'M', '善'), + (0x2F847, 'M', '喙'), + (0x2F848, 'M', '喫'), + (0x2F849, 'M', '喳'), + (0x2F84A, 'M', '嗂'), + (0x2F84B, 'M', '圖'), + (0x2F84C, 'M', '嘆'), + (0x2F84D, 'M', '圗'), + (0x2F84E, 'M', '噑'), + (0x2F84F, 'M', '噴'), + (0x2F850, 'M', '切'), + (0x2F851, 'M', '壮'), + (0x2F852, 'M', '城'), + (0x2F853, 'M', '埴'), + (0x2F854, 'M', '堍'), + (0x2F855, 'M', '型'), + (0x2F856, 'M', '堲'), + (0x2F857, 'M', '報'), + (0x2F858, 'M', '墬'), + (0x2F859, 'M', '𡓤'), + (0x2F85A, 'M', '売'), + (0x2F85B, 'M', '壷'), + (0x2F85C, 'M', '夆'), + (0x2F85D, 'M', '多'), + (0x2F85E, 'M', '夢'), + (0x2F85F, 'M', '奢'), + (0x2F860, 'M', '𡚨'), + (0x2F861, 'M', '𡛪'), + (0x2F862, 'M', '姬'), + (0x2F863, 'M', '娛'), + (0x2F864, 'M', '娧'), + (0x2F865, 'M', '姘'), + (0x2F866, 'M', '婦'), + (0x2F867, 'M', '㛮'), (0x2F868, 'X'), - (0x2F869, 'M', u'嬈'), - (0x2F86A, 'M', u'嬾'), - (0x2F86C, 'M', u'𡧈'), - (0x2F86D, 'M', u'寃'), - (0x2F86E, 'M', u'寘'), - (0x2F86F, 'M', u'寧'), - (0x2F870, 'M', u'寳'), - (0x2F871, 'M', u'𡬘'), - (0x2F872, 'M', u'寿'), - (0x2F873, 'M', u'将'), + (0x2F869, 'M', '嬈'), + (0x2F86A, 'M', '嬾'), + (0x2F86C, 'M', '𡧈'), + (0x2F86D, 'M', '寃'), + (0x2F86E, 'M', '寘'), + (0x2F86F, 'M', '寧'), + (0x2F870, 'M', '寳'), + (0x2F871, 'M', '𡬘'), + (0x2F872, 'M', '寿'), + (0x2F873, 'M', '将'), (0x2F874, 'X'), - (0x2F875, 'M', u'尢'), - (0x2F876, 'M', u'㞁'), - (0x2F877, 'M', u'屠'), - (0x2F878, 'M', u'屮'), - (0x2F879, 'M', u'峀'), - (0x2F87A, 'M', u'岍'), - (0x2F87B, 'M', u'𡷤'), - (0x2F87C, 'M', u'嵃'), - (0x2F87D, 'M', u'𡷦'), - (0x2F87E, 'M', u'嵮'), - (0x2F87F, 'M', u'嵫'), - (0x2F880, 'M', u'嵼'), - (0x2F881, 'M', u'巡'), - (0x2F882, 'M', u'巢'), - (0x2F883, 'M', u'㠯'), - (0x2F884, 'M', u'巽'), - (0x2F885, 'M', u'帨'), - (0x2F886, 'M', u'帽'), - (0x2F887, 'M', u'幩'), - (0x2F888, 'M', u'㡢'), - (0x2F889, 'M', u'𢆃'), - (0x2F88A, 'M', u'㡼'), - (0x2F88B, 'M', u'庰'), - (0x2F88C, 'M', u'庳'), - (0x2F88D, 'M', u'庶'), - (0x2F88E, 'M', u'廊'), - (0x2F88F, 'M', u'𪎒'), - (0x2F890, 'M', u'廾'), - (0x2F891, 'M', u'𢌱'), - (0x2F893, 'M', u'舁'), - (0x2F894, 'M', u'弢'), - (0x2F896, 'M', u'㣇'), - (0x2F897, 'M', u'𣊸'), - (0x2F898, 'M', u'𦇚'), - (0x2F899, 'M', u'形'), - (0x2F89A, 'M', u'彫'), - (0x2F89B, 'M', u'㣣'), - (0x2F89C, 'M', u'徚'), - (0x2F89D, 'M', u'忍'), - (0x2F89E, 'M', u'志'), - (0x2F89F, 'M', u'忹'), - (0x2F8A0, 'M', u'悁'), - (0x2F8A1, 'M', u'㤺'), - (0x2F8A2, 'M', u'㤜'), - (0x2F8A3, 'M', u'悔'), - (0x2F8A4, 'M', u'𢛔'), - (0x2F8A5, 'M', u'惇'), - (0x2F8A6, 'M', u'慈'), - (0x2F8A7, 'M', u'慌'), - (0x2F8A8, 'M', u'慎'), - (0x2F8A9, 'M', u'慌'), - (0x2F8AA, 'M', u'慺'), - (0x2F8AB, 'M', u'憎'), - (0x2F8AC, 'M', u'憲'), - (0x2F8AD, 'M', u'憤'), - (0x2F8AE, 'M', u'憯'), - (0x2F8AF, 'M', u'懞'), - (0x2F8B0, 'M', u'懲'), - (0x2F8B1, 'M', u'懶'), - (0x2F8B2, 'M', u'成'), - (0x2F8B3, 'M', u'戛'), - (0x2F8B4, 'M', u'扝'), - (0x2F8B5, 'M', u'抱'), - (0x2F8B6, 'M', u'拔'), - (0x2F8B7, 'M', u'捐'), - (0x2F8B8, 'M', u'𢬌'), - (0x2F8B9, 'M', u'挽'), - (0x2F8BA, 'M', u'拼'), - (0x2F8BB, 'M', u'捨'), - (0x2F8BC, 'M', u'掃'), - (0x2F8BD, 'M', u'揤'), - (0x2F8BE, 'M', u'𢯱'), - (0x2F8BF, 'M', u'搢'), - (0x2F8C0, 'M', u'揅'), - (0x2F8C1, 'M', u'掩'), - (0x2F8C2, 'M', u'㨮'), + (0x2F875, 'M', '尢'), + (0x2F876, 'M', '㞁'), + (0x2F877, 'M', '屠'), + (0x2F878, 'M', '屮'), + (0x2F879, 'M', '峀'), + (0x2F87A, 'M', '岍'), + (0x2F87B, 'M', '𡷤'), + (0x2F87C, 'M', '嵃'), + (0x2F87D, 'M', '𡷦'), + (0x2F87E, 'M', '嵮'), + (0x2F87F, 'M', '嵫'), + (0x2F880, 'M', '嵼'), + (0x2F881, 'M', '巡'), + (0x2F882, 'M', '巢'), + (0x2F883, 'M', '㠯'), + (0x2F884, 'M', '巽'), + (0x2F885, 'M', '帨'), + (0x2F886, 'M', '帽'), + (0x2F887, 'M', '幩'), + (0x2F888, 'M', '㡢'), + (0x2F889, 'M', '𢆃'), + (0x2F88A, 'M', '㡼'), + (0x2F88B, 'M', '庰'), + (0x2F88C, 'M', '庳'), + (0x2F88D, 'M', '庶'), + (0x2F88E, 'M', '廊'), + (0x2F88F, 'M', '𪎒'), + (0x2F890, 'M', '廾'), + (0x2F891, 'M', '𢌱'), + (0x2F893, 'M', '舁'), + (0x2F894, 'M', '弢'), + (0x2F896, 'M', '㣇'), + (0x2F897, 'M', '𣊸'), + (0x2F898, 'M', '𦇚'), + (0x2F899, 'M', '形'), + (0x2F89A, 'M', '彫'), + (0x2F89B, 'M', '㣣'), + (0x2F89C, 'M', '徚'), + (0x2F89D, 'M', '忍'), + (0x2F89E, 'M', '志'), + (0x2F89F, 'M', '忹'), + (0x2F8A0, 'M', '悁'), + (0x2F8A1, 'M', '㤺'), + (0x2F8A2, 'M', '㤜'), + (0x2F8A3, 'M', '悔'), + (0x2F8A4, 'M', '𢛔'), + (0x2F8A5, 'M', '惇'), + (0x2F8A6, 'M', '慈'), ] -def _seg_76(): +def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2F8C3, 'M', u'摩'), - (0x2F8C4, 'M', u'摾'), - (0x2F8C5, 'M', u'撝'), - (0x2F8C6, 'M', u'摷'), - (0x2F8C7, 'M', u'㩬'), - (0x2F8C8, 'M', u'敏'), - (0x2F8C9, 'M', u'敬'), - (0x2F8CA, 'M', u'𣀊'), - (0x2F8CB, 'M', u'旣'), - (0x2F8CC, 'M', u'書'), - (0x2F8CD, 'M', u'晉'), - (0x2F8CE, 'M', u'㬙'), - (0x2F8CF, 'M', u'暑'), - (0x2F8D0, 'M', u'㬈'), - (0x2F8D1, 'M', u'㫤'), - (0x2F8D2, 'M', u'冒'), - (0x2F8D3, 'M', u'冕'), - (0x2F8D4, 'M', u'最'), - (0x2F8D5, 'M', u'暜'), - (0x2F8D6, 'M', u'肭'), - (0x2F8D7, 'M', u'䏙'), - (0x2F8D8, 'M', u'朗'), - (0x2F8D9, 'M', u'望'), - (0x2F8DA, 'M', u'朡'), - (0x2F8DB, 'M', u'杞'), - (0x2F8DC, 'M', u'杓'), - (0x2F8DD, 'M', u'𣏃'), - (0x2F8DE, 'M', u'㭉'), - (0x2F8DF, 'M', u'柺'), - (0x2F8E0, 'M', u'枅'), - (0x2F8E1, 'M', u'桒'), - (0x2F8E2, 'M', u'梅'), - (0x2F8E3, 'M', u'𣑭'), - (0x2F8E4, 'M', u'梎'), - (0x2F8E5, 'M', u'栟'), - (0x2F8E6, 'M', u'椔'), - (0x2F8E7, 'M', u'㮝'), - (0x2F8E8, 'M', u'楂'), - (0x2F8E9, 'M', u'榣'), - (0x2F8EA, 'M', u'槪'), - (0x2F8EB, 'M', u'檨'), - (0x2F8EC, 'M', u'𣚣'), - (0x2F8ED, 'M', u'櫛'), - (0x2F8EE, 'M', u'㰘'), - (0x2F8EF, 'M', u'次'), - (0x2F8F0, 'M', u'𣢧'), - (0x2F8F1, 'M', u'歔'), - (0x2F8F2, 'M', u'㱎'), - (0x2F8F3, 'M', u'歲'), - (0x2F8F4, 'M', u'殟'), - (0x2F8F5, 'M', u'殺'), - (0x2F8F6, 'M', u'殻'), - (0x2F8F7, 'M', u'𣪍'), - (0x2F8F8, 'M', u'𡴋'), - (0x2F8F9, 'M', u'𣫺'), - (0x2F8FA, 'M', u'汎'), - (0x2F8FB, 'M', u'𣲼'), - (0x2F8FC, 'M', u'沿'), - (0x2F8FD, 'M', u'泍'), - (0x2F8FE, 'M', u'汧'), - (0x2F8FF, 'M', u'洖'), - (0x2F900, 'M', u'派'), - (0x2F901, 'M', u'海'), - (0x2F902, 'M', u'流'), - (0x2F903, 'M', u'浩'), - (0x2F904, 'M', u'浸'), - (0x2F905, 'M', u'涅'), - (0x2F906, 'M', u'𣴞'), - (0x2F907, 'M', u'洴'), - (0x2F908, 'M', u'港'), - (0x2F909, 'M', u'湮'), - (0x2F90A, 'M', u'㴳'), - (0x2F90B, 'M', u'滋'), - (0x2F90C, 'M', u'滇'), - (0x2F90D, 'M', u'𣻑'), - (0x2F90E, 'M', u'淹'), - (0x2F90F, 'M', u'潮'), - (0x2F910, 'M', u'𣽞'), - (0x2F911, 'M', u'𣾎'), - (0x2F912, 'M', u'濆'), - (0x2F913, 'M', u'瀹'), - (0x2F914, 'M', u'瀞'), - (0x2F915, 'M', u'瀛'), - (0x2F916, 'M', u'㶖'), - (0x2F917, 'M', u'灊'), - (0x2F918, 'M', u'災'), - (0x2F919, 'M', u'灷'), - (0x2F91A, 'M', u'炭'), - (0x2F91B, 'M', u'𠔥'), - (0x2F91C, 'M', u'煅'), - (0x2F91D, 'M', u'𤉣'), - (0x2F91E, 'M', u'熜'), + (0x2F8A7, 'M', '慌'), + (0x2F8A8, 'M', '慎'), + (0x2F8A9, 'M', '慌'), + (0x2F8AA, 'M', '慺'), + (0x2F8AB, 'M', '憎'), + (0x2F8AC, 'M', '憲'), + (0x2F8AD, 'M', '憤'), + (0x2F8AE, 'M', '憯'), + (0x2F8AF, 'M', '懞'), + (0x2F8B0, 'M', '懲'), + (0x2F8B1, 'M', '懶'), + (0x2F8B2, 'M', '成'), + (0x2F8B3, 'M', '戛'), + (0x2F8B4, 'M', '扝'), + (0x2F8B5, 'M', '抱'), + (0x2F8B6, 'M', '拔'), + (0x2F8B7, 'M', '捐'), + (0x2F8B8, 'M', '𢬌'), + (0x2F8B9, 'M', '挽'), + (0x2F8BA, 'M', '拼'), + (0x2F8BB, 'M', '捨'), + (0x2F8BC, 'M', '掃'), + (0x2F8BD, 'M', '揤'), + (0x2F8BE, 'M', '𢯱'), + (0x2F8BF, 'M', '搢'), + (0x2F8C0, 'M', '揅'), + (0x2F8C1, 'M', '掩'), + (0x2F8C2, 'M', '㨮'), + (0x2F8C3, 'M', '摩'), + (0x2F8C4, 'M', '摾'), + (0x2F8C5, 'M', '撝'), + (0x2F8C6, 'M', '摷'), + (0x2F8C7, 'M', '㩬'), + (0x2F8C8, 'M', '敏'), + (0x2F8C9, 'M', '敬'), + (0x2F8CA, 'M', '𣀊'), + (0x2F8CB, 'M', '旣'), + (0x2F8CC, 'M', '書'), + (0x2F8CD, 'M', '晉'), + (0x2F8CE, 'M', '㬙'), + (0x2F8CF, 'M', '暑'), + (0x2F8D0, 'M', '㬈'), + (0x2F8D1, 'M', '㫤'), + (0x2F8D2, 'M', '冒'), + (0x2F8D3, 'M', '冕'), + (0x2F8D4, 'M', '最'), + (0x2F8D5, 'M', '暜'), + (0x2F8D6, 'M', '肭'), + (0x2F8D7, 'M', '䏙'), + (0x2F8D8, 'M', '朗'), + (0x2F8D9, 'M', '望'), + (0x2F8DA, 'M', '朡'), + (0x2F8DB, 'M', '杞'), + (0x2F8DC, 'M', '杓'), + (0x2F8DD, 'M', '𣏃'), + (0x2F8DE, 'M', '㭉'), + (0x2F8DF, 'M', '柺'), + (0x2F8E0, 'M', '枅'), + (0x2F8E1, 'M', '桒'), + (0x2F8E2, 'M', '梅'), + (0x2F8E3, 'M', '𣑭'), + (0x2F8E4, 'M', '梎'), + (0x2F8E5, 'M', '栟'), + (0x2F8E6, 'M', '椔'), + (0x2F8E7, 'M', '㮝'), + (0x2F8E8, 'M', '楂'), + (0x2F8E9, 'M', '榣'), + (0x2F8EA, 'M', '槪'), + (0x2F8EB, 'M', '檨'), + (0x2F8EC, 'M', '𣚣'), + (0x2F8ED, 'M', '櫛'), + (0x2F8EE, 'M', '㰘'), + (0x2F8EF, 'M', '次'), + (0x2F8F0, 'M', '𣢧'), + (0x2F8F1, 'M', '歔'), + (0x2F8F2, 'M', '㱎'), + (0x2F8F3, 'M', '歲'), + (0x2F8F4, 'M', '殟'), + (0x2F8F5, 'M', '殺'), + (0x2F8F6, 'M', '殻'), + (0x2F8F7, 'M', '𣪍'), + (0x2F8F8, 'M', '𡴋'), + (0x2F8F9, 'M', '𣫺'), + (0x2F8FA, 'M', '汎'), + (0x2F8FB, 'M', '𣲼'), + (0x2F8FC, 'M', '沿'), + (0x2F8FD, 'M', '泍'), + (0x2F8FE, 'M', '汧'), + (0x2F8FF, 'M', '洖'), + (0x2F900, 'M', '派'), + (0x2F901, 'M', '海'), + (0x2F902, 'M', '流'), + (0x2F903, 'M', '浩'), + (0x2F904, 'M', '浸'), + (0x2F905, 'M', '涅'), + (0x2F906, 'M', '𣴞'), + (0x2F907, 'M', '洴'), + (0x2F908, 'M', '港'), + (0x2F909, 'M', '湮'), + (0x2F90A, 'M', '㴳'), + ] + +def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F90B, 'M', '滋'), + (0x2F90C, 'M', '滇'), + (0x2F90D, 'M', '𣻑'), + (0x2F90E, 'M', '淹'), + (0x2F90F, 'M', '潮'), + (0x2F910, 'M', '𣽞'), + (0x2F911, 'M', '𣾎'), + (0x2F912, 'M', '濆'), + (0x2F913, 'M', '瀹'), + (0x2F914, 'M', '瀞'), + (0x2F915, 'M', '瀛'), + (0x2F916, 'M', '㶖'), + (0x2F917, 'M', '灊'), + (0x2F918, 'M', '災'), + (0x2F919, 'M', '灷'), + (0x2F91A, 'M', '炭'), + (0x2F91B, 'M', '𠔥'), + (0x2F91C, 'M', '煅'), + (0x2F91D, 'M', '𤉣'), + (0x2F91E, 'M', '熜'), (0x2F91F, 'X'), - (0x2F920, 'M', u'爨'), - (0x2F921, 'M', u'爵'), - (0x2F922, 'M', u'牐'), - (0x2F923, 'M', u'𤘈'), - (0x2F924, 'M', u'犀'), - (0x2F925, 'M', u'犕'), - (0x2F926, 'M', u'𤜵'), - ] - -def _seg_77(): - return [ - (0x2F927, 'M', u'𤠔'), - (0x2F928, 'M', u'獺'), - (0x2F929, 'M', u'王'), - (0x2F92A, 'M', u'㺬'), - (0x2F92B, 'M', u'玥'), - (0x2F92C, 'M', u'㺸'), - (0x2F92E, 'M', u'瑇'), - (0x2F92F, 'M', u'瑜'), - (0x2F930, 'M', u'瑱'), - (0x2F931, 'M', u'璅'), - (0x2F932, 'M', u'瓊'), - (0x2F933, 'M', u'㼛'), - (0x2F934, 'M', u'甤'), - (0x2F935, 'M', u'𤰶'), - (0x2F936, 'M', u'甾'), - (0x2F937, 'M', u'𤲒'), - (0x2F938, 'M', u'異'), - (0x2F939, 'M', u'𢆟'), - (0x2F93A, 'M', u'瘐'), - (0x2F93B, 'M', u'𤾡'), - (0x2F93C, 'M', u'𤾸'), - (0x2F93D, 'M', u'𥁄'), - (0x2F93E, 'M', u'㿼'), - (0x2F93F, 'M', u'䀈'), - (0x2F940, 'M', u'直'), - (0x2F941, 'M', u'𥃳'), - (0x2F942, 'M', u'𥃲'), - (0x2F943, 'M', u'𥄙'), - (0x2F944, 'M', u'𥄳'), - (0x2F945, 'M', u'眞'), - (0x2F946, 'M', u'真'), - (0x2F948, 'M', u'睊'), - (0x2F949, 'M', u'䀹'), - (0x2F94A, 'M', u'瞋'), - (0x2F94B, 'M', u'䁆'), - (0x2F94C, 'M', u'䂖'), - (0x2F94D, 'M', u'𥐝'), - (0x2F94E, 'M', u'硎'), - (0x2F94F, 'M', u'碌'), - (0x2F950, 'M', u'磌'), - (0x2F951, 'M', u'䃣'), - (0x2F952, 'M', u'𥘦'), - (0x2F953, 'M', u'祖'), - (0x2F954, 'M', u'𥚚'), - (0x2F955, 'M', u'𥛅'), - (0x2F956, 'M', u'福'), - (0x2F957, 'M', u'秫'), - (0x2F958, 'M', u'䄯'), - (0x2F959, 'M', u'穀'), - (0x2F95A, 'M', u'穊'), - (0x2F95B, 'M', u'穏'), - (0x2F95C, 'M', u'𥥼'), - (0x2F95D, 'M', u'𥪧'), + (0x2F920, 'M', '爨'), + (0x2F921, 'M', '爵'), + (0x2F922, 'M', '牐'), + (0x2F923, 'M', '𤘈'), + (0x2F924, 'M', '犀'), + (0x2F925, 'M', '犕'), + (0x2F926, 'M', '𤜵'), + (0x2F927, 'M', '𤠔'), + (0x2F928, 'M', '獺'), + (0x2F929, 'M', '王'), + (0x2F92A, 'M', '㺬'), + (0x2F92B, 'M', '玥'), + (0x2F92C, 'M', '㺸'), + (0x2F92E, 'M', '瑇'), + (0x2F92F, 'M', '瑜'), + (0x2F930, 'M', '瑱'), + (0x2F931, 'M', '璅'), + (0x2F932, 'M', '瓊'), + (0x2F933, 'M', '㼛'), + (0x2F934, 'M', '甤'), + (0x2F935, 'M', '𤰶'), + (0x2F936, 'M', '甾'), + (0x2F937, 'M', '𤲒'), + (0x2F938, 'M', '異'), + (0x2F939, 'M', '𢆟'), + (0x2F93A, 'M', '瘐'), + (0x2F93B, 'M', '𤾡'), + (0x2F93C, 'M', '𤾸'), + (0x2F93D, 'M', '𥁄'), + (0x2F93E, 'M', '㿼'), + (0x2F93F, 'M', '䀈'), + (0x2F940, 'M', '直'), + (0x2F941, 'M', '𥃳'), + (0x2F942, 'M', '𥃲'), + (0x2F943, 'M', '𥄙'), + (0x2F944, 'M', '𥄳'), + (0x2F945, 'M', '眞'), + (0x2F946, 'M', '真'), + (0x2F948, 'M', '睊'), + (0x2F949, 'M', '䀹'), + (0x2F94A, 'M', '瞋'), + (0x2F94B, 'M', '䁆'), + (0x2F94C, 'M', '䂖'), + (0x2F94D, 'M', '𥐝'), + (0x2F94E, 'M', '硎'), + (0x2F94F, 'M', '碌'), + (0x2F950, 'M', '磌'), + (0x2F951, 'M', '䃣'), + (0x2F952, 'M', '𥘦'), + (0x2F953, 'M', '祖'), + (0x2F954, 'M', '𥚚'), + (0x2F955, 'M', '𥛅'), + (0x2F956, 'M', '福'), + (0x2F957, 'M', '秫'), + (0x2F958, 'M', '䄯'), + (0x2F959, 'M', '穀'), + (0x2F95A, 'M', '穊'), + (0x2F95B, 'M', '穏'), + (0x2F95C, 'M', '𥥼'), + (0x2F95D, 'M', '𥪧'), (0x2F95F, 'X'), - (0x2F960, 'M', u'䈂'), - (0x2F961, 'M', u'𥮫'), - (0x2F962, 'M', u'篆'), - (0x2F963, 'M', u'築'), - (0x2F964, 'M', u'䈧'), - (0x2F965, 'M', u'𥲀'), - (0x2F966, 'M', u'糒'), - (0x2F967, 'M', u'䊠'), - (0x2F968, 'M', u'糨'), - (0x2F969, 'M', u'糣'), - (0x2F96A, 'M', u'紀'), - (0x2F96B, 'M', u'𥾆'), - (0x2F96C, 'M', u'絣'), - (0x2F96D, 'M', u'䌁'), - (0x2F96E, 'M', u'緇'), - (0x2F96F, 'M', u'縂'), - (0x2F970, 'M', u'繅'), - (0x2F971, 'M', u'䌴'), - (0x2F972, 'M', u'𦈨'), - (0x2F973, 'M', u'𦉇'), - (0x2F974, 'M', u'䍙'), - (0x2F975, 'M', u'𦋙'), - (0x2F976, 'M', u'罺'), - (0x2F977, 'M', u'𦌾'), - (0x2F978, 'M', u'羕'), - (0x2F979, 'M', u'翺'), - (0x2F97A, 'M', u'者'), - (0x2F97B, 'M', u'𦓚'), - (0x2F97C, 'M', u'𦔣'), - (0x2F97D, 'M', u'聠'), - (0x2F97E, 'M', u'𦖨'), - (0x2F97F, 'M', u'聰'), - (0x2F980, 'M', u'𣍟'), - (0x2F981, 'M', u'䏕'), - (0x2F982, 'M', u'育'), - (0x2F983, 'M', u'脃'), - (0x2F984, 'M', u'䐋'), - (0x2F985, 'M', u'脾'), - (0x2F986, 'M', u'媵'), - (0x2F987, 'M', u'𦞧'), - (0x2F988, 'M', u'𦞵'), - (0x2F989, 'M', u'𣎓'), - (0x2F98A, 'M', u'𣎜'), - (0x2F98B, 'M', u'舁'), - (0x2F98C, 'M', u'舄'), - (0x2F98D, 'M', u'辞'), + (0x2F960, 'M', '䈂'), + (0x2F961, 'M', '𥮫'), + (0x2F962, 'M', '篆'), + (0x2F963, 'M', '築'), + (0x2F964, 'M', '䈧'), + (0x2F965, 'M', '𥲀'), + (0x2F966, 'M', '糒'), + (0x2F967, 'M', '䊠'), + (0x2F968, 'M', '糨'), + (0x2F969, 'M', '糣'), + (0x2F96A, 'M', '紀'), + (0x2F96B, 'M', '𥾆'), + (0x2F96C, 'M', '絣'), + (0x2F96D, 'M', '䌁'), + (0x2F96E, 'M', '緇'), + (0x2F96F, 'M', '縂'), + (0x2F970, 'M', '繅'), + (0x2F971, 'M', '䌴'), ] -def _seg_78(): +def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2F98E, 'M', u'䑫'), - (0x2F98F, 'M', u'芑'), - (0x2F990, 'M', u'芋'), - (0x2F991, 'M', u'芝'), - (0x2F992, 'M', u'劳'), - (0x2F993, 'M', u'花'), - (0x2F994, 'M', u'芳'), - (0x2F995, 'M', u'芽'), - (0x2F996, 'M', u'苦'), - (0x2F997, 'M', u'𦬼'), - (0x2F998, 'M', u'若'), - (0x2F999, 'M', u'茝'), - (0x2F99A, 'M', u'荣'), - (0x2F99B, 'M', u'莭'), - (0x2F99C, 'M', u'茣'), - (0x2F99D, 'M', u'莽'), - (0x2F99E, 'M', u'菧'), - (0x2F99F, 'M', u'著'), - (0x2F9A0, 'M', u'荓'), - (0x2F9A1, 'M', u'菊'), - (0x2F9A2, 'M', u'菌'), - (0x2F9A3, 'M', u'菜'), - (0x2F9A4, 'M', u'𦰶'), - (0x2F9A5, 'M', u'𦵫'), - (0x2F9A6, 'M', u'𦳕'), - (0x2F9A7, 'M', u'䔫'), - (0x2F9A8, 'M', u'蓱'), - (0x2F9A9, 'M', u'蓳'), - (0x2F9AA, 'M', u'蔖'), - (0x2F9AB, 'M', u'𧏊'), - (0x2F9AC, 'M', u'蕤'), - (0x2F9AD, 'M', u'𦼬'), - (0x2F9AE, 'M', u'䕝'), - (0x2F9AF, 'M', u'䕡'), - (0x2F9B0, 'M', u'𦾱'), - (0x2F9B1, 'M', u'𧃒'), - (0x2F9B2, 'M', u'䕫'), - (0x2F9B3, 'M', u'虐'), - (0x2F9B4, 'M', u'虜'), - (0x2F9B5, 'M', u'虧'), - (0x2F9B6, 'M', u'虩'), - (0x2F9B7, 'M', u'蚩'), - (0x2F9B8, 'M', u'蚈'), - (0x2F9B9, 'M', u'蜎'), - (0x2F9BA, 'M', u'蛢'), - (0x2F9BB, 'M', u'蝹'), - (0x2F9BC, 'M', u'蜨'), - (0x2F9BD, 'M', u'蝫'), - (0x2F9BE, 'M', u'螆'), + (0x2F972, 'M', '𦈨'), + (0x2F973, 'M', '𦉇'), + (0x2F974, 'M', '䍙'), + (0x2F975, 'M', '𦋙'), + (0x2F976, 'M', '罺'), + (0x2F977, 'M', '𦌾'), + (0x2F978, 'M', '羕'), + (0x2F979, 'M', '翺'), + (0x2F97A, 'M', '者'), + (0x2F97B, 'M', '𦓚'), + (0x2F97C, 'M', '𦔣'), + (0x2F97D, 'M', '聠'), + (0x2F97E, 'M', '𦖨'), + (0x2F97F, 'M', '聰'), + (0x2F980, 'M', '𣍟'), + (0x2F981, 'M', '䏕'), + (0x2F982, 'M', '育'), + (0x2F983, 'M', '脃'), + (0x2F984, 'M', '䐋'), + (0x2F985, 'M', '脾'), + (0x2F986, 'M', '媵'), + (0x2F987, 'M', '𦞧'), + (0x2F988, 'M', '𦞵'), + (0x2F989, 'M', '𣎓'), + (0x2F98A, 'M', '𣎜'), + (0x2F98B, 'M', '舁'), + (0x2F98C, 'M', '舄'), + (0x2F98D, 'M', '辞'), + (0x2F98E, 'M', '䑫'), + (0x2F98F, 'M', '芑'), + (0x2F990, 'M', '芋'), + (0x2F991, 'M', '芝'), + (0x2F992, 'M', '劳'), + (0x2F993, 'M', '花'), + (0x2F994, 'M', '芳'), + (0x2F995, 'M', '芽'), + (0x2F996, 'M', '苦'), + (0x2F997, 'M', '𦬼'), + (0x2F998, 'M', '若'), + (0x2F999, 'M', '茝'), + (0x2F99A, 'M', '荣'), + (0x2F99B, 'M', '莭'), + (0x2F99C, 'M', '茣'), + (0x2F99D, 'M', '莽'), + (0x2F99E, 'M', '菧'), + (0x2F99F, 'M', '著'), + (0x2F9A0, 'M', '荓'), + (0x2F9A1, 'M', '菊'), + (0x2F9A2, 'M', '菌'), + (0x2F9A3, 'M', '菜'), + (0x2F9A4, 'M', '𦰶'), + (0x2F9A5, 'M', '𦵫'), + (0x2F9A6, 'M', '𦳕'), + (0x2F9A7, 'M', '䔫'), + (0x2F9A8, 'M', '蓱'), + (0x2F9A9, 'M', '蓳'), + (0x2F9AA, 'M', '蔖'), + (0x2F9AB, 'M', '𧏊'), + (0x2F9AC, 'M', '蕤'), + (0x2F9AD, 'M', '𦼬'), + (0x2F9AE, 'M', '䕝'), + (0x2F9AF, 'M', '䕡'), + (0x2F9B0, 'M', '𦾱'), + (0x2F9B1, 'M', '𧃒'), + (0x2F9B2, 'M', '䕫'), + (0x2F9B3, 'M', '虐'), + (0x2F9B4, 'M', '虜'), + (0x2F9B5, 'M', '虧'), + (0x2F9B6, 'M', '虩'), + (0x2F9B7, 'M', '蚩'), + (0x2F9B8, 'M', '蚈'), + (0x2F9B9, 'M', '蜎'), + (0x2F9BA, 'M', '蛢'), + (0x2F9BB, 'M', '蝹'), + (0x2F9BC, 'M', '蜨'), + (0x2F9BD, 'M', '蝫'), + (0x2F9BE, 'M', '螆'), (0x2F9BF, 'X'), - (0x2F9C0, 'M', u'蟡'), - (0x2F9C1, 'M', u'蠁'), - (0x2F9C2, 'M', u'䗹'), - (0x2F9C3, 'M', u'衠'), - (0x2F9C4, 'M', u'衣'), - (0x2F9C5, 'M', u'𧙧'), - (0x2F9C6, 'M', u'裗'), - (0x2F9C7, 'M', u'裞'), - (0x2F9C8, 'M', u'䘵'), - (0x2F9C9, 'M', u'裺'), - (0x2F9CA, 'M', u'㒻'), - (0x2F9CB, 'M', u'𧢮'), - (0x2F9CC, 'M', u'𧥦'), - (0x2F9CD, 'M', u'䚾'), - (0x2F9CE, 'M', u'䛇'), - (0x2F9CF, 'M', u'誠'), - (0x2F9D0, 'M', u'諭'), - (0x2F9D1, 'M', u'變'), - (0x2F9D2, 'M', u'豕'), - (0x2F9D3, 'M', u'𧲨'), - (0x2F9D4, 'M', u'貫'), - (0x2F9D5, 'M', u'賁'), - (0x2F9D6, 'M', u'贛'), - (0x2F9D7, 'M', u'起'), - (0x2F9D8, 'M', u'𧼯'), - (0x2F9D9, 'M', u'𠠄'), - (0x2F9DA, 'M', u'跋'), - (0x2F9DB, 'M', u'趼'), - (0x2F9DC, 'M', u'跰'), - (0x2F9DD, 'M', u'𠣞'), - (0x2F9DE, 'M', u'軔'), - (0x2F9DF, 'M', u'輸'), - (0x2F9E0, 'M', u'𨗒'), - (0x2F9E1, 'M', u'𨗭'), - (0x2F9E2, 'M', u'邔'), - (0x2F9E3, 'M', u'郱'), - (0x2F9E4, 'M', u'鄑'), - (0x2F9E5, 'M', u'𨜮'), - (0x2F9E6, 'M', u'鄛'), - (0x2F9E7, 'M', u'鈸'), - (0x2F9E8, 'M', u'鋗'), - (0x2F9E9, 'M', u'鋘'), - (0x2F9EA, 'M', u'鉼'), - (0x2F9EB, 'M', u'鏹'), - (0x2F9EC, 'M', u'鐕'), - (0x2F9ED, 'M', u'𨯺'), - (0x2F9EE, 'M', u'開'), - (0x2F9EF, 'M', u'䦕'), - (0x2F9F0, 'M', u'閷'), - (0x2F9F1, 'M', u'𨵷'), + (0x2F9C0, 'M', '蟡'), + (0x2F9C1, 'M', '蠁'), + (0x2F9C2, 'M', '䗹'), + (0x2F9C3, 'M', '衠'), + (0x2F9C4, 'M', '衣'), + (0x2F9C5, 'M', '𧙧'), + (0x2F9C6, 'M', '裗'), + (0x2F9C7, 'M', '裞'), + (0x2F9C8, 'M', '䘵'), + (0x2F9C9, 'M', '裺'), + (0x2F9CA, 'M', '㒻'), + (0x2F9CB, 'M', '𧢮'), + (0x2F9CC, 'M', '𧥦'), + (0x2F9CD, 'M', '䚾'), + (0x2F9CE, 'M', '䛇'), + (0x2F9CF, 'M', '誠'), + (0x2F9D0, 'M', '諭'), + (0x2F9D1, 'M', '變'), + (0x2F9D2, 'M', '豕'), + (0x2F9D3, 'M', '𧲨'), + (0x2F9D4, 'M', '貫'), + (0x2F9D5, 'M', '賁'), ] -def _seg_79(): +def _seg_81() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ - (0x2F9F2, 'M', u'䧦'), - (0x2F9F3, 'M', u'雃'), - (0x2F9F4, 'M', u'嶲'), - (0x2F9F5, 'M', u'霣'), - (0x2F9F6, 'M', u'𩅅'), - (0x2F9F7, 'M', u'𩈚'), - (0x2F9F8, 'M', u'䩮'), - (0x2F9F9, 'M', u'䩶'), - (0x2F9FA, 'M', u'韠'), - (0x2F9FB, 'M', u'𩐊'), - (0x2F9FC, 'M', u'䪲'), - (0x2F9FD, 'M', u'𩒖'), - (0x2F9FE, 'M', u'頋'), - (0x2FA00, 'M', u'頩'), - (0x2FA01, 'M', u'𩖶'), - (0x2FA02, 'M', u'飢'), - (0x2FA03, 'M', u'䬳'), - (0x2FA04, 'M', u'餩'), - (0x2FA05, 'M', u'馧'), - (0x2FA06, 'M', u'駂'), - (0x2FA07, 'M', u'駾'), - (0x2FA08, 'M', u'䯎'), - (0x2FA09, 'M', u'𩬰'), - (0x2FA0A, 'M', u'鬒'), - (0x2FA0B, 'M', u'鱀'), - (0x2FA0C, 'M', u'鳽'), - (0x2FA0D, 'M', u'䳎'), - (0x2FA0E, 'M', u'䳭'), - (0x2FA0F, 'M', u'鵧'), - (0x2FA10, 'M', u'𪃎'), - (0x2FA11, 'M', u'䳸'), - (0x2FA12, 'M', u'𪄅'), - (0x2FA13, 'M', u'𪈎'), - (0x2FA14, 'M', u'𪊑'), - (0x2FA15, 'M', u'麻'), - (0x2FA16, 'M', u'䵖'), - (0x2FA17, 'M', u'黹'), - (0x2FA18, 'M', u'黾'), - (0x2FA19, 'M', u'鼅'), - (0x2FA1A, 'M', u'鼏'), - (0x2FA1B, 'M', u'鼖'), - (0x2FA1C, 'M', u'鼻'), - (0x2FA1D, 'M', u'𪘀'), + (0x2F9D6, 'M', '贛'), + (0x2F9D7, 'M', '起'), + (0x2F9D8, 'M', '𧼯'), + (0x2F9D9, 'M', '𠠄'), + (0x2F9DA, 'M', '跋'), + (0x2F9DB, 'M', '趼'), + (0x2F9DC, 'M', '跰'), + (0x2F9DD, 'M', '𠣞'), + (0x2F9DE, 'M', '軔'), + (0x2F9DF, 'M', '輸'), + (0x2F9E0, 'M', '𨗒'), + (0x2F9E1, 'M', '𨗭'), + (0x2F9E2, 'M', '邔'), + (0x2F9E3, 'M', '郱'), + (0x2F9E4, 'M', '鄑'), + (0x2F9E5, 'M', '𨜮'), + (0x2F9E6, 'M', '鄛'), + (0x2F9E7, 'M', '鈸'), + (0x2F9E8, 'M', '鋗'), + (0x2F9E9, 'M', '鋘'), + (0x2F9EA, 'M', '鉼'), + (0x2F9EB, 'M', '鏹'), + (0x2F9EC, 'M', '鐕'), + (0x2F9ED, 'M', '𨯺'), + (0x2F9EE, 'M', '開'), + (0x2F9EF, 'M', '䦕'), + (0x2F9F0, 'M', '閷'), + (0x2F9F1, 'M', '𨵷'), + (0x2F9F2, 'M', '䧦'), + (0x2F9F3, 'M', '雃'), + (0x2F9F4, 'M', '嶲'), + (0x2F9F5, 'M', '霣'), + (0x2F9F6, 'M', '𩅅'), + (0x2F9F7, 'M', '𩈚'), + (0x2F9F8, 'M', '䩮'), + (0x2F9F9, 'M', '䩶'), + (0x2F9FA, 'M', '韠'), + (0x2F9FB, 'M', '𩐊'), + (0x2F9FC, 'M', '䪲'), + (0x2F9FD, 'M', '𩒖'), + (0x2F9FE, 'M', '頋'), + (0x2FA00, 'M', '頩'), + (0x2FA01, 'M', '𩖶'), + (0x2FA02, 'M', '飢'), + (0x2FA03, 'M', '䬳'), + (0x2FA04, 'M', '餩'), + (0x2FA05, 'M', '馧'), + (0x2FA06, 'M', '駂'), + (0x2FA07, 'M', '駾'), + (0x2FA08, 'M', '䯎'), + (0x2FA09, 'M', '𩬰'), + (0x2FA0A, 'M', '鬒'), + (0x2FA0B, 'M', '鱀'), + (0x2FA0C, 'M', '鳽'), + (0x2FA0D, 'M', '䳎'), + (0x2FA0E, 'M', '䳭'), + (0x2FA0F, 'M', '鵧'), + (0x2FA10, 'M', '𪃎'), + (0x2FA11, 'M', '䳸'), + (0x2FA12, 'M', '𪄅'), + (0x2FA13, 'M', '𪈎'), + (0x2FA14, 'M', '𪊑'), + (0x2FA15, 'M', '麻'), + (0x2FA16, 'M', '䵖'), + (0x2FA17, 'M', '黹'), + (0x2FA18, 'M', '黾'), + (0x2FA19, 'M', '鼅'), + (0x2FA1A, 'M', '鼏'), + (0x2FA1B, 'M', '鼖'), + (0x2FA1C, 'M', '鼻'), + (0x2FA1D, 'M', '𪘀'), (0x2FA1E, 'X'), (0x30000, 'V'), (0x3134B, 'X'), + (0x31350, 'V'), + (0x323B0, 'X'), (0xE0100, 'I'), (0xE01F0, 'X'), ] @@ -8354,4 +8595,6 @@ uts46data = tuple( + _seg_77() + _seg_78() + _seg_79() -) + + _seg_80() + + _seg_81() +) # type: Tuple[Union[Tuple[int, str], Tuple[int, str, str]], ...] diff --git a/libs/common/importlib_metadata/__init__.py b/libs/common/importlib_metadata/__init__.py index 4f3d5ebd..26a1388c 100644 --- a/libs/common/importlib_metadata/__init__.py +++ b/libs/common/importlib_metadata/__init__.py @@ -1,85 +1,179 @@ -from __future__ import unicode_literals, absolute_import - -import io import os import re import abc import csv import sys import zipp +import email +import pathlib import operator +import textwrap +import warnings import functools import itertools import posixpath import collections +from . import _adapters, _meta, _py39compat +from ._collections import FreezableDefaultDict, Pair from ._compat import ( - install, NullFinder, - ConfigParser, - suppress, - map, - FileNotFoundError, - IsADirectoryError, - NotADirectoryError, - PermissionError, - pathlib, - ModuleNotFoundError, - MetaPathFinder, - email_message_from_string, - PyPy_repr, - unique_ordered, - str, - ) + install, + pypy_partial, +) +from ._functools import method_cache, pass_none +from ._itertools import always_iterable, unique_everseen +from ._meta import PackageMetadata, SimplePath + +from contextlib import suppress from importlib import import_module +from importlib.abc import MetaPathFinder from itertools import starmap - - -__metaclass__ = type +from typing import List, Mapping, Optional __all__ = [ 'Distribution', 'DistributionFinder', + 'PackageMetadata', 'PackageNotFoundError', 'distribution', 'distributions', 'entry_points', 'files', 'metadata', + 'packages_distributions', 'requires', 'version', - ] +] class PackageNotFoundError(ModuleNotFoundError): """The package was not found.""" def __str__(self): - tmpl = "No package metadata was found for {self.name}" - return tmpl.format(**locals()) + return f"No package metadata was found for {self.name}" @property def name(self): - name, = self.args + (name,) = self.args return name -class EntryPoint( - PyPy_repr, - collections.namedtuple('EntryPointBase', 'name value group')): +class Sectioned: + """ + A simple entry point config parser for performance + + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name + 'sec1' + >>> item.value + Pair(name='a', value='1') + >>> item = next(res) + >>> item.value + Pair(name='b', value='2') + >>> item = next(res) + >>> item.name + 'sec2' + >>> item.value + Pair(name='a', value='2') + >>> list(res) + [] + """ + + _sample = textwrap.dedent( + """ + [sec1] + # comments ignored + a = 1 + b = 2 + + [sec2] + a = 2 + """ + ).lstrip() + + @classmethod + def section_pairs(cls, text): + return ( + section._replace(value=Pair.parse(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None + ) + + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Pair(name, value) + + @staticmethod + def valid(line): + return line and not line.startswith('#') + + +class DeprecatedTuple: + """ + Provide subscript item access for backward compatibility. + + >>> recwarn = getfixture('recwarn') + >>> ep = EntryPoint(name='name', value='value', group='group') + >>> ep[:] + ('name', 'value', 'group') + >>> ep[0] + 'name' + >>> len(recwarn) + 1 + """ + + # Do not remove prior to 2023-05-01 or Python 3.13 + _warn = functools.partial( + warnings.warn, + "EntryPoint tuple interface is deprecated. Access members by name.", + DeprecationWarning, + stacklevel=pypy_partial(2), + ) + + def __getitem__(self, item): + self._warn() + return self._key()[item] + + +class EntryPoint(DeprecatedTuple): """An entry point as defined by Python packaging conventions. See `the packaging docs on entry points `_ for more information. + + >>> ep = EntryPoint( + ... name=None, group=None, value='package.module:attr [extra1, extra2]') + >>> ep.module + 'package.module' + >>> ep.attr + 'attr' + >>> ep.extras + ['extra1', 'extra2'] """ pattern = re.compile( r'(?P[\w.]+)\s*' r'(:\s*(?P[\w.]+)\s*)?' r'((?P\[.*\])\s*)?$' - ) + ) """ A regular expression describing the syntax for an entry point, which might look like: @@ -96,6 +190,15 @@ class EntryPoint( following the attr, and following any extras. """ + name: str + value: str + group: str + + dist: Optional['Distribution'] = None + + def __init__(self, name, value, group): + vars(self).update(name=name, value=value, group=group) + def load(self): """Load the entry point from its definition. If only a module is indicated by the value, return that module. Otherwise, @@ -119,39 +222,104 @@ class EntryPoint( @property def extras(self): match = self.pattern.match(self.value) - return list(re.finditer(r'\w+', match.group('extras') or '')) + return re.findall(r'\w+', match.group('extras') or '') - @classmethod - def _from_config(cls, config): - return [ - cls(name, value, group) - for group in config.sections() - for name, value in config.items(group) - ] + def _for(self, dist): + vars(self).update(dist=dist) + return self - @classmethod - def _from_text(cls, text): - config = ConfigParser(delimiters='=') - # case sensitive: https://stackoverflow.com/q/1611799/812183 - config.optionxform = str - try: - config.read_string(text) - except AttributeError: # pragma: nocover - # Python 2 has no read_string - config.readfp(io.StringIO(text)) - return EntryPoint._from_config(config) - - def __iter__(self): + def matches(self, **params): """ - Supply iter so one may construct dicts of EntryPoints easily. - """ - return iter((self.name, self)) + EntryPoint matches the given parameters. - def __reduce__(self): + >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]') + >>> ep.matches(group='foo') + True + >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]') + True + >>> ep.matches(group='foo', name='other') + False + >>> ep.matches() + True + >>> ep.matches(extras=['extra1', 'extra2']) + True + >>> ep.matches(module='bing') + True + >>> ep.matches(attr='bong') + True + """ + attrs = (getattr(self, param) for param in params) + return all(map(operator.eq, params.values(), attrs)) + + def _key(self): + return self.name, self.value, self.group + + def __lt__(self, other): + return self._key() < other._key() + + def __eq__(self, other): + return self._key() == other._key() + + def __setattr__(self, name, value): + raise AttributeError("EntryPoint objects are immutable.") + + def __repr__(self): return ( - self.__class__, - (self.name, self.value, self.group), - ) + f'EntryPoint(name={self.name!r}, value={self.value!r}, ' + f'group={self.group!r})' + ) + + def __hash__(self): + return hash(self._key()) + + +class EntryPoints(tuple): + """ + An immutable collection of selectable EntryPoint objects. + """ + + __slots__ = () + + def __getitem__(self, name): # -> EntryPoint: + """ + Get the EntryPoint in self matching name. + """ + try: + return next(iter(self.select(name=name))) + except StopIteration: + raise KeyError(name) + + def select(self, **params): + """ + Select entry points from self that match the + given parameters (typically group and/or name). + """ + return EntryPoints(ep for ep in self if _py39compat.ep_matches(ep, **params)) + + @property + def names(self): + """ + Return the set of all names of all entry points. + """ + return {ep.name for ep in self} + + @property + def groups(self): + """ + Return the set of all groups of all entry points. + """ + return {ep.group for ep in self} + + @classmethod + def _from_text_for(cls, text, dist): + return cls(ep._for(dist) for ep in cls._from_text(text)) + + @staticmethod + def _from_text(text): + return ( + EntryPoint(name=item.value.name, value=item.value.value, group=item.name) + for item in Sectioned.section_pairs(text or '') + ) class PackagePath(pathlib.PurePosixPath): @@ -175,7 +343,7 @@ class FileHash: self.mode, _, self.value = spec.partition('=') def __repr__(self): - return ''.format(self.mode, self.value) + return f'' class Distribution: @@ -197,7 +365,7 @@ class Distribution: """ @classmethod - def from_name(cls, name): + def from_name(cls, name: str): """Return the Distribution for the given package name. :param name: The name of the distribution package to search for. @@ -205,13 +373,13 @@ class Distribution: package, if found. :raises PackageNotFoundError: When the named package's distribution metadata cannot be found. + :raises ValueError: When an invalid value is supplied for name. """ - for resolver in cls._discover_resolvers(): - dists = resolver(DistributionFinder.Context(name=name)) - dist = next(iter(dists), None) - if dist is not None: - return dist - else: + if not name: + raise ValueError("A distribution name is required.") + try: + return next(cls.discover(name=name)) + except StopIteration: raise PackageNotFoundError(name) @classmethod @@ -229,9 +397,8 @@ class Distribution: raise ValueError("cannot accept context and kwargs") context = context or DistributionFinder.Context(**kwargs) return itertools.chain.from_iterable( - resolver(context) - for resolver in cls._discover_resolvers() - ) + resolver(context) for resolver in cls._discover_resolvers() + ) @staticmethod def at(path): @@ -246,24 +413,12 @@ class Distribution: def _discover_resolvers(): """Search the meta_path for resolvers.""" declared = ( - getattr(finder, 'find_distributions', None) - for finder in sys.meta_path - ) + getattr(finder, 'find_distributions', None) for finder in sys.meta_path + ) return filter(None, declared) - @classmethod - def _local(cls, root='.'): - from pep517 import build, meta - system = build.compat_system(root) - builder = functools.partial( - meta.build, - source_dir=root, - system=system, - ) - return PathDistribution(zipp.Path(meta.build_as_zip(builder))) - @property - def metadata(self): + def metadata(self) -> _meta.PackageMetadata: """Return the parsed metadata for this Distribution. The returned object will have keys that name the various bits of @@ -276,8 +431,18 @@ class Distribution: # effect is to just end up using the PathDistribution's self._path # (which points to the egg-info file) attribute unchanged. or self.read_text('') - ) - return email_message_from_string(text) + ) + return _adapters.Message(email.message_from_string(text)) + + @property + def name(self): + """Return the 'Name' metadata for the distribution package.""" + return self.metadata['Name'] + + @property + def _normalized_name(self): + """Return a normalized version of the name.""" + return Prepared.normalize(self.name) @property def version(self): @@ -286,7 +451,7 @@ class Distribution: @property def entry_points(self): - return EntryPoint._from_text(self.read_text('entry_points.txt')) + return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self) @property def files(self): @@ -299,7 +464,6 @@ class Distribution: missing. Result may be empty if the metadata exists but is empty. """ - file_lines = self._read_files_distinfo() or self._read_files_egginfo() def make_file(name, hash=None, size_str=None): result = PackagePath(name) @@ -308,7 +472,11 @@ class Distribution: result.dist = self return result - return file_lines and list(starmap(make_file, csv.reader(file_lines))) + @pass_none + def make_files(lines): + return list(starmap(make_file, csv.reader(lines))) + + return make_files(self._read_files_distinfo() or self._read_files_egginfo()) def _read_files_distinfo(self): """ @@ -336,27 +504,11 @@ class Distribution: def _read_egg_info_reqs(self): source = self.read_text('requires.txt') - return source and self._deps_from_requires_text(source) + return pass_none(self._deps_from_requires_text)(source) @classmethod def _deps_from_requires_text(cls, source): - section_pairs = cls._read_sections(source.splitlines()) - sections = { - section: list(map(operator.itemgetter('line'), results)) - for section, results in - itertools.groupby(section_pairs, operator.itemgetter('section')) - } - return cls._convert_egg_info_reqs_to_simple_reqs(sections) - - @staticmethod - def _read_sections(lines): - section = None - for line in filter(None, lines): - section_match = re.match(r'\[(.*)\]$', line) - if section_match: - section = section_match.group(1) - continue - yield locals() + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) @staticmethod def _convert_egg_info_reqs_to_simple_reqs(sections): @@ -369,20 +521,29 @@ class Distribution: requirement. This method converts the former to the latter. See _test_deps_from_requires_text for an example. """ - def make_condition(name): - return name and 'extra == "{name}"'.format(name=name) - def parse_condition(section): + def make_condition(name): + return name and f'extra == "{name}"' + + def quoted_marker(section): section = section or '' extra, sep, markers = section.partition(':') if extra and markers: - markers = '({markers})'.format(markers=markers) + markers = f'({markers})' conditions = list(filter(None, [markers, make_condition(extra)])) return '; ' + ' and '.join(conditions) if conditions else '' - for section, deps in sections.items(): - for dep in deps: - yield dep + parse_condition(section) + def url_req_space(req): + """ + PEP 508 requires a space between the url_spec and the quoted_marker. + Ref python/importlib_metadata#357. + """ + # '@' is uniquely indicative of a url_req. + return ' ' * ('@' in req) + + for section in sections: + space = url_req_space(section.value) + yield section.value + space + quoted_marker(section.name) class DistributionFinder(MetaPathFinder): @@ -414,10 +575,11 @@ class DistributionFinder(MetaPathFinder): @property def path(self): """ - The path that a distribution finder should search. + The sequence of directory path that a distribution finder + should search. - Typically refers to Python package paths and defaults - to ``sys.path``. + Typically refers to Python installed package paths such as + "site-packages" directories and defaults to ``sys.path``. """ return vars(self).get('path', sys.path) @@ -436,11 +598,17 @@ class FastPath: """ Micro-optimized class for searching a path for children. + + >>> FastPath('').children() + ['...'] """ + @functools.lru_cache() # type: ignore + def __new__(cls, root): + return super().__new__(cls) + def __init__(self, root): - self.root = str(root) - self.base = os.path.basename(self.root).lower() + self.root = root def joinpath(self, child): return pathlib.Path(self.root, child) @@ -457,34 +625,72 @@ class FastPath: names = zip_path.root.namelist() self.joinpath = zip_path.joinpath - return unique_ordered( - child.split(posixpath.sep, 1)[0] - for child in names - ) + return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) def search(self, name): - return ( - self.joinpath(child) - for child in self.children() - if name.matches(child, self.base) - ) + return self.lookup(self.mtime).search(name) + + @property + def mtime(self): + with suppress(OSError): + return os.stat(self.root).st_mtime + self.lookup.cache_clear() + + @method_cache + def lookup(self, mtime): + return Lookup(self) + + +class Lookup: + def __init__(self, path: FastPath): + base = os.path.basename(path.root).lower() + base_is_egg = base.endswith(".egg") + self.infos = FreezableDefaultDict(list) + self.eggs = FreezableDefaultDict(list) + + for child in path.children(): + low = child.lower() + if low.endswith((".dist-info", ".egg-info")): + # rpartition is faster than splitext and suitable for this purpose. + name = low.rpartition(".")[0].partition("-")[0] + normalized = Prepared.normalize(name) + self.infos[normalized].append(path.joinpath(child)) + elif base_is_egg and low == "egg-info": + name = base.rpartition(".")[0].partition("-")[0] + legacy_normalized = Prepared.legacy_normalize(name) + self.eggs[legacy_normalized].append(path.joinpath(child)) + + self.infos.freeze() + self.eggs.freeze() + + def search(self, prepared): + infos = ( + self.infos[prepared.normalized] + if prepared + else itertools.chain.from_iterable(self.infos.values()) + ) + eggs = ( + self.eggs[prepared.legacy_normalized] + if prepared + else itertools.chain.from_iterable(self.eggs.values()) + ) + return itertools.chain(infos, eggs) class Prepared: """ A prepared search for metadata on a possibly-named package. """ + normalized = None - suffixes = '.dist-info', '.egg-info' - exact_matches = [''][:0] + legacy_normalized = None def __init__(self, name): self.name = name if name is None: return self.normalized = self.normalize(name) - self.exact_matches = [ - self.normalized + suffix for suffix in self.suffixes] + self.legacy_normalized = self.legacy_normalize(name) @staticmethod def normalize(name): @@ -501,28 +707,8 @@ class Prepared: """ return name.lower().replace('-', '_') - def matches(self, cand, base): - low = cand.lower() - pre, ext = os.path.splitext(low) - name, sep, rest = pre.partition('-') - return ( - low in self.exact_matches - or ext in self.suffixes and ( - not self.normalized or - name.replace('.', '_') == self.normalized - ) - # legacy case: - or self.is_egg(base) and low == 'egg-info' - ) - - def is_egg(self, base): - normalized = self.legacy_normalize(self.name or '') - prefix = normalized + '-' if normalized else '' - versionless_egg_name = normalized + '.egg' if self.name else '' - return ( - base == versionless_egg_name - or base.startswith(prefix) - and base.endswith('.egg')) + def __bool__(self): + return bool(self.name) @install @@ -548,30 +734,67 @@ class MetadataPathFinder(NullFinder, DistributionFinder): @classmethod def _search_paths(cls, name, paths): """Find metadata directories in paths heuristically.""" + prepared = Prepared(name) return itertools.chain.from_iterable( - path.search(Prepared(name)) - for path in map(FastPath, paths) - ) + path.search(prepared) for path in map(FastPath, paths) + ) + + def invalidate_caches(cls): + FastPath.__new__.cache_clear() class PathDistribution(Distribution): - def __init__(self, path): - """Construct a distribution from a path to the metadata directory. + def __init__(self, path: SimplePath): + """Construct a distribution. - :param path: A pathlib.Path or similar object supporting - .joinpath(), __div__, .parent, and .read_text(). + :param path: SimplePath indicating the metadata directory. """ self._path = path def read_text(self, filename): - with suppress(FileNotFoundError, IsADirectoryError, KeyError, - NotADirectoryError, PermissionError): + with suppress( + FileNotFoundError, + IsADirectoryError, + KeyError, + NotADirectoryError, + PermissionError, + ): return self._path.joinpath(filename).read_text(encoding='utf-8') + read_text.__doc__ = Distribution.read_text.__doc__ def locate_file(self, path): return self._path.parent / path + @property + def _normalized_name(self): + """ + Performance optimization: where possible, resolve the + normalized name from the file system path. + """ + stem = os.path.basename(str(self._path)) + return ( + pass_none(Prepared.normalize)(self._name_from_stem(stem)) + or super()._normalized_name + ) + + @staticmethod + def _name_from_stem(stem): + """ + >>> PathDistribution._name_from_stem('foo-3.0.egg-info') + 'foo' + >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info') + 'CherryPy' + >>> PathDistribution._name_from_stem('face.egg-info') + 'face' + >>> PathDistribution._name_from_stem('foo.bar') + """ + filename, ext = os.path.splitext(stem) + if ext not in ('.dist-info', '.egg-info'): + return + name, sep, rest = filename.partition('-') + return name + def distribution(distribution_name): """Get the ``Distribution`` instance for the named package. @@ -590,11 +813,11 @@ def distributions(**kwargs): return Distribution.discover(**kwargs) -def metadata(distribution_name): +def metadata(distribution_name) -> _meta.PackageMetadata: """Get the metadata for the named package. :param distribution_name: The name of the distribution package to query. - :return: An email.Message containing the parsed metadata. + :return: A PackageMetadata containing the parsed metadata. """ return Distribution.from_name(distribution_name).metadata @@ -609,20 +832,28 @@ def version(distribution_name): return distribution(distribution_name).version -def entry_points(): +_unique = functools.partial( + unique_everseen, + key=_py39compat.normalized_name, +) +""" +Wrapper for ``distributions`` to return unique distributions by name. +""" + + +def entry_points(**params) -> EntryPoints: """Return EntryPoint objects for all installed packages. - :return: EntryPoint objects for all installed packages. + Pass selection parameters (group or name) to filter the + result to entry points matching those properties (see + EntryPoints.select()). + + :return: EntryPoints for all installed packages. """ eps = itertools.chain.from_iterable( - dist.entry_points for dist in distributions()) - by_group = operator.attrgetter('group') - ordered = sorted(eps, key=by_group) - grouped = itertools.groupby(ordered, by_group) - return { - group: tuple(eps) - for group, eps in grouped - } + dist.entry_points for dist in _unique(distributions()) + ) + return EntryPoints(eps).select(**params) def files(distribution_name): @@ -639,6 +870,35 @@ def requires(distribution_name): Return a list of requirements for the named package. :return: An iterator of requirements, suitable for - packaging.requirement.Requirement. + packaging.requirement.Requirement. """ return distribution(distribution_name).requires + + +def packages_distributions() -> Mapping[str, List[str]]: + """ + Return a mapping of top-level packages to their + distributions. + + >>> import collections.abc + >>> pkgs = packages_distributions() + >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values()) + True + """ + pkg_to_dist = collections.defaultdict(list) + for dist in distributions(): + for pkg in _top_level_declared(dist) or _top_level_inferred(dist): + pkg_to_dist[pkg].append(dist.metadata['Name']) + return dict(pkg_to_dist) + + +def _top_level_declared(dist): + return (dist.read_text('top_level.txt') or '').split() + + +def _top_level_inferred(dist): + return { + f.parts[0] if len(f.parts) > 1 else f.with_suffix('').name + for f in always_iterable(dist.files) + if f.suffix == ".py" + } diff --git a/libs/common/importlib_metadata/_adapters.py b/libs/common/importlib_metadata/_adapters.py new file mode 100644 index 00000000..aa460d3e --- /dev/null +++ b/libs/common/importlib_metadata/_adapters.py @@ -0,0 +1,68 @@ +import re +import textwrap +import email.message + +from ._text import FoldedCase + + +class Message(email.message.Message): + multiple_use_keys = set( + map( + FoldedCase, + [ + 'Classifier', + 'Obsoletes-Dist', + 'Platform', + 'Project-URL', + 'Provides-Dist', + 'Provides-Extra', + 'Requires-Dist', + 'Requires-External', + 'Supported-Platform', + 'Dynamic', + ], + ) + ) + """ + Keys that may be indicated multiple times per PEP 566. + """ + + def __new__(cls, orig: email.message.Message): + res = super().__new__(cls) + vars(res).update(vars(orig)) + return res + + def __init__(self, *args, **kwargs): + self._headers = self._repair_headers() + + # suppress spurious error from mypy + def __iter__(self): + return super().__iter__() + + def _repair_headers(self): + def redent(value): + "Correct for RFC822 indentation" + if not value or '\n' not in value: + return value + return textwrap.dedent(' ' * 8 + value) + + headers = [(key, redent(value)) for key, value in vars(self)['_headers']] + if self._payload: + headers.append(('Description', self.get_payload())) + return headers + + @property + def json(self): + """ + Convert PackageMetadata to a JSON-compatible format + per PEP 0566. + """ + + def transform(key): + value = self.get_all(key) if key in self.multiple_use_keys else self[key] + if key == 'Keywords': + value = re.split(r'\s+', value) + tk = key.lower().replace('-', '_') + return tk, value + + return dict(map(transform, map(FoldedCase, self))) diff --git a/libs/common/importlib_metadata/_collections.py b/libs/common/importlib_metadata/_collections.py new file mode 100644 index 00000000..cf0954e1 --- /dev/null +++ b/libs/common/importlib_metadata/_collections.py @@ -0,0 +1,30 @@ +import collections + + +# from jaraco.collections 3.3 +class FreezableDefaultDict(collections.defaultdict): + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) diff --git a/libs/common/importlib_metadata/_compat.py b/libs/common/importlib_metadata/_compat.py index 303d4a22..3d78566e 100644 --- a/libs/common/importlib_metadata/_compat.py +++ b/libs/common/importlib_metadata/_compat.py @@ -1,59 +1,15 @@ -from __future__ import absolute_import, unicode_literals - -import io -import abc import sys -import email +import platform -if sys.version_info > (3,): # pragma: nocover - import builtins - from configparser import ConfigParser - import contextlib - FileNotFoundError = builtins.FileNotFoundError - IsADirectoryError = builtins.IsADirectoryError - NotADirectoryError = builtins.NotADirectoryError - PermissionError = builtins.PermissionError - map = builtins.map - from itertools import filterfalse -else: # pragma: nocover - from backports.configparser import ConfigParser - from itertools import imap as map # type: ignore - from itertools import ifilterfalse as filterfalse - import contextlib2 as contextlib - FileNotFoundError = IOError, OSError - IsADirectoryError = IOError, OSError - NotADirectoryError = IOError, OSError - PermissionError = IOError, OSError +__all__ = ['install', 'NullFinder', 'Protocol'] -str = type('') - -suppress = contextlib.suppress - -if sys.version_info > (3, 5): # pragma: nocover - import pathlib -else: # pragma: nocover - import pathlib2 as pathlib try: - ModuleNotFoundError = builtins.FileNotFoundError -except (NameError, AttributeError): # pragma: nocover - ModuleNotFoundError = ImportError # type: ignore - - -if sys.version_info >= (3,): # pragma: nocover - from importlib.abc import MetaPathFinder -else: # pragma: nocover - class MetaPathFinder(object): - __metaclass__ = abc.ABCMeta - - -__metaclass__ = type -__all__ = [ - 'install', 'NullFinder', 'MetaPathFinder', 'ModuleNotFoundError', - 'pathlib', 'ConfigParser', 'map', 'suppress', 'FileNotFoundError', - 'NotADirectoryError', 'email_message_from_string', - ] + from typing import Protocol +except ImportError: # pragma: no cover + # Python 3.7 compatibility + from typing_extensions import Protocol # type: ignore def install(cls): @@ -77,11 +33,12 @@ def disable_stdlib_finder(): See #91 for more background for rationale on this sketchy behavior. """ + def matches(finder): - return ( - getattr(finder, '__module__', None) == '_frozen_importlib_external' - and hasattr(finder, 'find_distributions') - ) + return getattr( + finder, '__module__', None + ) == '_frozen_importlib_external' and hasattr(finder, 'find_distributions') + for finder in filter(matches, sys.meta_path): # pragma: nocover del finder.find_distributions @@ -91,6 +48,7 @@ class NullFinder: A "Finder" (aka "MetaClassFinder") that never finds any modules, but may find distributions. """ + @staticmethod def find_spec(*args, **kwargs): return None @@ -104,49 +62,11 @@ class NullFinder: find_module = find_spec -def py2_message_from_string(text): # nocoverpy3 - # Work around https://bugs.python.org/issue25545 where - # email.message_from_string cannot handle Unicode on Python 2. - io_buffer = io.StringIO(text) - return email.message_from_file(io_buffer) - - -email_message_from_string = ( - py2_message_from_string - if sys.version_info < (3,) else - email.message_from_string - ) - - -class PyPy_repr: +def pypy_partial(val): """ - Override repr for EntryPoint objects on PyPy to avoid __iter__ access. - Ref #97, #102. + Adjust for variable stacklevel on partial under PyPy. + + Workaround for #327. """ - affected = hasattr(sys, 'pypy_version_info') - - def __compat_repr__(self): # pragma: nocover - def make_param(name): - value = getattr(self, name) - return '{name}={value!r}'.format(**locals()) - params = ', '.join(map(make_param, self._fields)) - return 'EntryPoint({params})'.format(**locals()) - - if affected: # pragma: nocover - __repr__ = __compat_repr__ - del affected - - -# from itertools recipes -def unique_everseen(iterable): # pragma: nocover - "List unique elements, preserving order. Remember all elements ever seen." - seen = set() - seen_add = seen.add - - for element in filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - - -unique_ordered = ( - unique_everseen if sys.version_info < (3, 7) else dict.fromkeys) + is_pypy = platform.python_implementation() == 'PyPy' + return val + is_pypy diff --git a/libs/common/importlib_metadata/_functools.py b/libs/common/importlib_metadata/_functools.py new file mode 100644 index 00000000..71f66bd0 --- /dev/null +++ b/libs/common/importlib_metadata/_functools.py @@ -0,0 +1,104 @@ +import types +import functools + + +# from jaraco.functools 3.3 +def method_cache(method, cache_wrapper=None): + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + cache_wrapper = cache_wrapper or functools.lru_cache() + + def wrapper(self, *args, **kwargs): + # it's the first call, replace the method with a cached, bound method + bound_method = types.MethodType(method, self) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None + + return wrapper + + +# From jaraco.functools 3.3 +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper diff --git a/libs/common/importlib_metadata/_itertools.py b/libs/common/importlib_metadata/_itertools.py new file mode 100644 index 00000000..d4ca9b91 --- /dev/null +++ b/libs/common/importlib_metadata/_itertools.py @@ -0,0 +1,73 @@ +from itertools import filterfalse + + +def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + +# copied from more_itertools 8.8 +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) diff --git a/libs/common/importlib_metadata/_meta.py b/libs/common/importlib_metadata/_meta.py new file mode 100644 index 00000000..259b15ba --- /dev/null +++ b/libs/common/importlib_metadata/_meta.py @@ -0,0 +1,49 @@ +from ._compat import Protocol +from typing import Any, Dict, Iterator, List, TypeVar, Union + + +_T = TypeVar("_T") + + +class PackageMetadata(Protocol): + def __len__(self) -> int: + ... # pragma: no cover + + def __contains__(self, item: str) -> bool: + ... # pragma: no cover + + def __getitem__(self, key: str) -> str: + ... # pragma: no cover + + def __iter__(self) -> Iterator[str]: + ... # pragma: no cover + + def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: + """ + Return all values associated with a possibly multi-valued key. + """ + + @property + def json(self) -> Dict[str, Union[str, List[str]]]: + """ + A JSON-compatible form of the metadata. + """ + + +class SimplePath(Protocol[_T]): + """ + A minimal subset of pathlib.Path required by PathDistribution. + """ + + def joinpath(self) -> _T: + ... # pragma: no cover + + def __truediv__(self, other: Union[str, _T]) -> _T: + ... # pragma: no cover + + @property + def parent(self) -> _T: + ... # pragma: no cover + + def read_text(self) -> str: + ... # pragma: no cover diff --git a/libs/common/importlib_metadata/_py39compat.py b/libs/common/importlib_metadata/_py39compat.py new file mode 100644 index 00000000..cde4558f --- /dev/null +++ b/libs/common/importlib_metadata/_py39compat.py @@ -0,0 +1,35 @@ +""" +Compatibility layer with Python 3.8/3.9 +""" +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: # pragma: no cover + # Prevent circular imports on runtime. + from . import Distribution, EntryPoint +else: + Distribution = EntryPoint = Any + + +def normalized_name(dist: Distribution) -> Optional[str]: + """ + Honor name normalization for distributions that don't provide ``_normalized_name``. + """ + try: + return dist._normalized_name + except AttributeError: + from . import Prepared # -> delay to prevent circular imports. + + return Prepared.normalize(getattr(dist, "name", None) or dist.metadata['Name']) + + +def ep_matches(ep: EntryPoint, **params) -> bool: + """ + Workaround for ``EntryPoint`` objects without the ``matches`` method. + """ + try: + return ep.matches(**params) + except AttributeError: + from . import EntryPoint # -> delay to prevent circular imports. + + # Reconstruct the EntryPoint object to make sure it is compatible. + return EntryPoint(ep.name, ep.value, ep.group).matches(**params) diff --git a/libs/common/importlib_metadata/_text.py b/libs/common/importlib_metadata/_text.py new file mode 100644 index 00000000..c88cfbb2 --- /dev/null +++ b/libs/common/importlib_metadata/_text.py @@ -0,0 +1,99 @@ +import re + +from ._functools import method_cache + + +# from jaraco.text 3.5 +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use in_: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) diff --git a/libs/common/importlib_metadata/py.typed b/libs/common/importlib_metadata/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/__init__.py b/libs/common/importlib_resources/__init__.py new file mode 100644 index 00000000..34e3a995 --- /dev/null +++ b/libs/common/importlib_resources/__init__.py @@ -0,0 +1,36 @@ +"""Read resources contained within a package.""" + +from ._common import ( + as_file, + files, + Package, +) + +from ._legacy import ( + contents, + open_binary, + read_binary, + open_text, + read_text, + is_resource, + path, + Resource, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Resource', + 'ResourceReader', + 'as_file', + 'contents', + 'files', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/libs/common/importlib_resources/_adapters.py b/libs/common/importlib_resources/_adapters.py new file mode 100644 index 00000000..ea363d86 --- /dev/null +++ b/libs/common/importlib_resources/_adapters.py @@ -0,0 +1,170 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError( + "Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode) + ) + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/libs/common/importlib_resources/_common.py b/libs/common/importlib_resources/_common.py new file mode 100644 index 00000000..9f19784d --- /dev/null +++ b/libs/common/importlib_resources/_common.py @@ -0,0 +1,207 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib +import inspect +import warnings +import itertools + +from typing import Union, Optional, cast +from .abc import ResourceReader, Traversable + +from ._compat import wrap_spec + +Package = Union[types.ModuleType, str] +Anchor = Package + + +def package_to_anchor(func): + """ + Replace 'package' parameter as 'anchor' and warn about the change. + + Other errors should fall through. + + >>> files('a', 'b') + Traceback (most recent call last): + TypeError: files() takes from 0 to 1 positional arguments but 2 were given + """ + undefined = object() + + @functools.wraps(func) + def wrapper(anchor=undefined, package=undefined): + if package is not undefined: + if anchor is not undefined: + return func(anchor, package) + warnings.warn( + "First parameter to files is renamed to 'anchor'", + DeprecationWarning, + stacklevel=2, + ) + return func(package) + elif anchor is undefined: + return func() + return func(anchor) + + return wrapper + + +@package_to_anchor +def files(anchor: Optional[Anchor] = None) -> Traversable: + """ + Get a Traversable resource for an anchor. + """ + return from_package(resolve(anchor)) + + +def get_resource_reader(package: types.ModuleType) -> Optional[ResourceReader]: + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore + if reader is None: + return None + return reader(spec.name) # type: ignore + + +@functools.singledispatch +def resolve(cand: Optional[Anchor]) -> types.ModuleType: + return cast(types.ModuleType, cand) + + +@resolve.register +def _(cand: str) -> types.ModuleType: + return importlib.import_module(cand) + + +@resolve.register +def _(cand: None) -> types.ModuleType: + return resolve(_infer_caller().f_globals['__name__']) + + +def _infer_caller(): + """ + Walk the stack and find the frame of the first caller not in this module. + """ + + def is_this_file(frame_info): + return frame_info.filename == __file__ + + def is_wrapper(frame_info): + return frame_info.function == 'wrapper' + + not_this_file = itertools.filterfalse(is_this_file, inspect.stack()) + # also exclude 'wrapper' due to singledispatch in the call stack + callers = itertools.filterfalse(is_wrapper, not_this_file) + return next(callers).frame + + +def from_package(package: types.ModuleType): + """ + Return a Traversable object for the given package. + + """ + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile( + reader, + suffix='', + # gh-93353: Keep a reference to call os.remove() in late Python + # finalization. + *, + _os_remove=os.remove, +): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + _os_remove(raw_path) + except FileNotFoundError: + pass + + +def _temp_file(path): + return _tempfile(path.read_bytes, suffix=path.name) + + +def _is_present_dir(path: Traversable) -> bool: + """ + Some Traversables implement ``is_dir()`` to raise an + exception (i.e. ``FileNotFoundError``) when the + directory doesn't exist. This function wraps that call + to always return a boolean and only return True + if there's a dir and it exists. + """ + with contextlib.suppress(FileNotFoundError): + return path.is_dir() + return False + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _temp_dir(path) if _is_present_dir(path) else _temp_file(path) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path + + +@contextlib.contextmanager +def _temp_path(dir: tempfile.TemporaryDirectory): + """ + Wrap tempfile.TemporyDirectory to return a pathlib object. + """ + with dir as result: + yield pathlib.Path(result) + + +@contextlib.contextmanager +def _temp_dir(path): + """ + Given a traversable dir, recursively replicate the whole tree + to the file system in a context manager. + """ + assert path.is_dir() + with _temp_path(tempfile.TemporaryDirectory()) as temp_dir: + yield _write_contents(temp_dir, path) + + +def _write_contents(target, source): + child = target.joinpath(source.name) + if source.is_dir(): + child.mkdir() + for item in source.iterdir(): + _write_contents(child, item) + else: + child.open('wb').write(source.read_bytes()) + return child diff --git a/libs/common/importlib_resources/_compat.py b/libs/common/importlib_resources/_compat.py new file mode 100644 index 00000000..8d7ade08 --- /dev/null +++ b/libs/common/importlib_resources/_compat.py @@ -0,0 +1,108 @@ +# flake8: noqa + +import abc +import os +import sys +import pathlib +from contextlib import suppress +from typing import Union + + +if sys.version_info >= (3, 10): + from zipfile import Path as ZipPath # type: ignore +else: + from zipp import Path as ZipPath # type: ignore + + +try: + from typing import runtime_checkable # type: ignore +except ImportError: + + def runtime_checkable(cls): # type: ignore + return cls + + +try: + from typing import Protocol # type: ignore +except ImportError: + Protocol = abc.ABC # type: ignore + + +class TraversableResourcesLoader: + """ + Adapt loaders to provide TraversableResources and other + compatibility. + + Used primarily for Python 3.9 and earlier where the native + loaders do not yet implement TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + @property + def path(self): + return self.spec.origin + + def get_resource_reader(self, name): + from . import readers, _adapters + + def _zip_reader(spec): + with suppress(AttributeError): + return readers.ZipReader(spec.loader, spec.name) + + def _namespace_reader(spec): + with suppress(AttributeError, ValueError): + return readers.NamespaceReader(spec.submodule_search_locations) + + def _available_reader(spec): + with suppress(AttributeError): + return spec.loader.get_resource_reader(spec.name) + + def _native_reader(spec): + reader = _available_reader(spec) + return reader if hasattr(reader, 'files') else None + + def _file_reader(spec): + try: + path = pathlib.Path(self.path) + except TypeError: + return None + if path.exists(): + return readers.FileReader(self) + + return ( + # native reader if it supplies 'files' + _native_reader(self.spec) + or + # local ZipReader if a zip module + _zip_reader(self.spec) + or + # local NamespaceReader if a namespace module + _namespace_reader(self.spec) + or + # local FileReader + _file_reader(self.spec) + # fallback - adapt the spec ResourceReader to TraversableReader + or _adapters.CompatibilityFiles(self.spec) + ) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + + Supersedes _adapters.wrap_spec to use TraversableResourcesLoader + from above for older Python compatibility (<3.10). + """ + from . import _adapters + + return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) + + +if sys.version_info >= (3, 9): + StrPath = Union[str, os.PathLike[str]] +else: + # PathLike is only subscriptable at runtime in 3.9+ + StrPath = Union[str, "os.PathLike[str]"] diff --git a/libs/common/importlib_resources/_itertools.py b/libs/common/importlib_resources/_itertools.py new file mode 100644 index 00000000..cce05582 --- /dev/null +++ b/libs/common/importlib_resources/_itertools.py @@ -0,0 +1,35 @@ +from itertools import filterfalse + +from typing import ( + Callable, + Iterable, + Iterator, + Optional, + Set, + TypeVar, + Union, +) + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + + +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None +) -> Iterator[_T]: + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen: Set[Union[_T, _U]] = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element diff --git a/libs/common/importlib_resources/_legacy.py b/libs/common/importlib_resources/_legacy.py new file mode 100644 index 00000000..b1ea8105 --- /dev/null +++ b/libs/common/importlib_resources/_legacy.py @@ -0,0 +1,120 @@ +import functools +import os +import pathlib +import types +import warnings + +from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any + +from . import _common + +Package = Union[types.ModuleType, str] +Resource = str + + +def deprecated(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + warnings.warn( + f"{func.__name__} is deprecated. Use files() instead. " + "Refer to https://importlib-resources.readthedocs.io" + "/en/latest/using.html#migrating-from-legacy for migration advice.", + DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + +def normalize_path(path: Any) -> str: + """Normalize a path by ensuring it is a string. + + If the resulting string contains path separators, an exception is raised. + """ + str_path = str(path) + parent, file_name = os.path.split(str_path) + if parent: + raise ValueError(f'{path!r} must be only a file name') + return file_name + + +@deprecated +def open_binary(package: Package, resource: Resource) -> BinaryIO: + """Return a file-like object opened for binary reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open('rb') + + +@deprecated +def read_binary(package: Package, resource: Resource) -> bytes: + """Return the binary contents of the resource.""" + return (_common.files(package) / normalize_path(resource)).read_bytes() + + +@deprecated +def open_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> TextIO: + """Return a file-like object opened for text reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open( + 'r', encoding=encoding, errors=errors + ) + + +@deprecated +def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> str: + """Return the decoded string of the resource. + + The decoding-related arguments have the same semantics as those of + bytes.decode(). + """ + with open_text(package, resource, encoding, errors) as fp: + return fp.read() + + +@deprecated +def contents(package: Package) -> Iterable[str]: + """Return an iterable of entries in `package`. + + Note that not all entries are resources. Specifically, directories are + not considered resources. Use `is_resource()` on each entry returned here + to check if it is a resource or not. + """ + return [path.name for path in _common.files(package).iterdir()] + + +@deprecated +def is_resource(package: Package, name: str) -> bool: + """True if `name` is a resource inside `package`. + + Directories are *not* resources. + """ + resource = normalize_path(name) + return any( + traversable.name == resource and traversable.is_file() + for traversable in _common.files(package).iterdir() + ) + + +@deprecated +def path( + package: Package, + resource: Resource, +) -> ContextManager[pathlib.Path]: + """A context manager providing a file path object to the resource. + + If the resource does not already exist on its own on the file system, + a temporary file will be created. If the file was created, the file + will be deleted upon exiting the context manager (no exception is + raised if the file was deleted prior to the context manager + exiting). + """ + return _common.as_file(_common.files(package) / normalize_path(resource)) diff --git a/libs/common/importlib_resources/abc.py b/libs/common/importlib_resources/abc.py new file mode 100644 index 00000000..23b6aeaf --- /dev/null +++ b/libs/common/importlib_resources/abc.py @@ -0,0 +1,170 @@ +import abc +import io +import itertools +import pathlib +from typing import Any, BinaryIO, Iterable, Iterator, NoReturn, Text, Optional + +from ._compat import runtime_checkable, Protocol, StrPath + + +__all__ = ["ResourceReader", "Traversable", "TraversableResources"] + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +class TraversalError(Exception): + pass + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + Any exceptions that occur when accessing the backing resource + may propagate unaltered. + """ + + @abc.abstractmethod + def iterdir(self) -> Iterator["Traversable"]: + """ + Yield Traversable objects in self + """ + + def read_bytes(self) -> bytes: + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding: Optional[str] = None) -> str: + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + def joinpath(self, *descendants: StrPath) -> "Traversable": + """ + Return Traversable resolved with any descendants applied. + + Each descendant should be a path segment relative to self + and each may contain multiple levels separated by + ``posixpath.sep`` (``/``). + """ + if not descendants: + return self + names = itertools.chain.from_iterable( + path.parts for path in map(pathlib.PurePosixPath, descendants) + ) + target = next(names) + matches = ( + traversable for traversable in self.iterdir() if traversable.name == target + ) + try: + match = next(matches) + except StopIteration: + raise TraversalError( + "Target not found during traversal.", target, list(names) + ) + return match.joinpath(*names) + + def __truediv__(self, child: StrPath) -> "Traversable": + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self) -> "Traversable": + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource: StrPath) -> io.BufferedReader: + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource: Any) -> NoReturn: + raise FileNotFoundError(resource) + + def is_resource(self, path: StrPath) -> bool: + return self.files().joinpath(path).is_file() + + def contents(self) -> Iterator[str]: + return (item.name for item in self.files().iterdir()) diff --git a/libs/common/importlib_resources/py.typed b/libs/common/importlib_resources/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/readers.py b/libs/common/importlib_resources/readers.py new file mode 100644 index 00000000..ab34db74 --- /dev/null +++ b/libs/common/importlib_resources/readers.py @@ -0,0 +1,120 @@ +import collections +import pathlib +import operator + +from . import abc + +from ._itertools import unique_everseen +from ._compat import ZipPath + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + _, _, name = module.rpartition('.') + self.prefix = loader.prefix.replace('\\', '/') + name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + # workaround for `zipfile.Path.is_file` returning true + # for non-existent paths. + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return ZipPath(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + files = (file for path in self._paths for file in path.iterdir()) + return unique_everseen(files, key=operator.attrgetter('name')) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, *descendants): + try: + return super().joinpath(*descendants) + except abc.TraversalError: + # One of the paths did not resolve (a directory does not exist). + # Just return something that will not exist. + return self._paths[0].joinpath(*descendants) + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*list(namespace_path)) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path diff --git a/libs/common/importlib_resources/simple.py b/libs/common/importlib_resources/simple.py new file mode 100644 index 00000000..7770c922 --- /dev/null +++ b/libs/common/importlib_resources/simple.py @@ -0,0 +1,106 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @property + @abc.abstractmethod + def package(self) -> str: + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self) -> List['SimpleReader']: + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self) -> List[str]: + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource: str) -> BinaryIO: + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader: SimpleReader): + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent: ResourceContainer, name: str): + self.parent = parent + self.name = name # type: ignore + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(*args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/libs/common/importlib_resources/tests/__init__.py b/libs/common/importlib_resources/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/_compat.py b/libs/common/importlib_resources/tests/_compat.py new file mode 100644 index 00000000..e7bf06dd --- /dev/null +++ b/libs/common/importlib_resources/tests/_compat.py @@ -0,0 +1,32 @@ +import os + + +try: + from test.support import import_helper # type: ignore +except ImportError: + # Python 3.9 and earlier + class import_helper: # type: ignore + from test.support import ( + modules_setup, + modules_cleanup, + DirsOnSysPath, + CleanImport, + ) + + +try: + from test.support import os_helper # type: ignore +except ImportError: + # Python 3.9 compat + class os_helper: # type:ignore + from test.support import temp_dir + + +try: + # Python 3.10 + from test.support.os_helper import unlink +except ImportError: + from test.support import unlink as _unlink + + def unlink(target): + return _unlink(os.fspath(target)) diff --git a/libs/common/importlib_resources/tests/_path.py b/libs/common/importlib_resources/tests/_path.py new file mode 100644 index 00000000..c630e4d3 --- /dev/null +++ b/libs/common/importlib_resources/tests/_path.py @@ -0,0 +1,50 @@ +import pathlib +import functools + + +#### +# from jaraco.path 3.4 + + +def build(spec, prefix=pathlib.Path()): + """ + Build a set of files/directories, as described by the spec. + + Each key represents a pathname, and the value represents + the content. Content may be a nested directory. + + >>> spec = { + ... 'README.txt': "A README file", + ... "foo": { + ... "__init__.py": "", + ... "bar": { + ... "__init__.py": "", + ... }, + ... "baz.py": "# Some code", + ... } + ... } + >>> tmpdir = getfixture('tmpdir') + >>> build(spec, tmpdir) + """ + for name, contents in spec.items(): + create(contents, pathlib.Path(prefix) / name) + + +@functools.singledispatch +def create(content, path): + path.mkdir(exist_ok=True) + build(content, prefix=path) # type: ignore + + +@create.register +def _(content: bytes, path): + path.write_bytes(content) + + +@create.register +def _(content: str, path): + path.write_text(content) + + +# end from jaraco.path +#### diff --git a/libs/common/importlib_resources/tests/data01/__init__.py b/libs/common/importlib_resources/tests/data01/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/data01/binary.file b/libs/common/importlib_resources/tests/data01/binary.file new file mode 100644 index 00000000..eaf36c1d Binary files /dev/null and b/libs/common/importlib_resources/tests/data01/binary.file differ diff --git a/libs/common/importlib_resources/tests/data01/subdirectory/__init__.py b/libs/common/importlib_resources/tests/data01/subdirectory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/data01/subdirectory/binary.file b/libs/common/importlib_resources/tests/data01/subdirectory/binary.file new file mode 100644 index 00000000..eaf36c1d Binary files /dev/null and b/libs/common/importlib_resources/tests/data01/subdirectory/binary.file differ diff --git a/libs/common/importlib_resources/tests/data01/utf-16.file b/libs/common/importlib_resources/tests/data01/utf-16.file new file mode 100644 index 00000000..2cb77229 Binary files /dev/null and b/libs/common/importlib_resources/tests/data01/utf-16.file differ diff --git a/libs/common/importlib_resources/tests/data01/utf-8.file b/libs/common/importlib_resources/tests/data01/utf-8.file new file mode 100644 index 00000000..1c0132ad --- /dev/null +++ b/libs/common/importlib_resources/tests/data01/utf-8.file @@ -0,0 +1 @@ +Hello, UTF-8 world! diff --git a/libs/common/importlib_resources/tests/data02/__init__.py b/libs/common/importlib_resources/tests/data02/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/data02/one/__init__.py b/libs/common/importlib_resources/tests/data02/one/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/data02/one/resource1.txt b/libs/common/importlib_resources/tests/data02/one/resource1.txt new file mode 100644 index 00000000..61a813e4 --- /dev/null +++ b/libs/common/importlib_resources/tests/data02/one/resource1.txt @@ -0,0 +1 @@ +one resource diff --git a/libs/common/importlib_resources/tests/data02/two/__init__.py b/libs/common/importlib_resources/tests/data02/two/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/data02/two/resource2.txt b/libs/common/importlib_resources/tests/data02/two/resource2.txt new file mode 100644 index 00000000..a80ce46e --- /dev/null +++ b/libs/common/importlib_resources/tests/data02/two/resource2.txt @@ -0,0 +1 @@ +two resource diff --git a/libs/common/importlib_resources/tests/namespacedata01/binary.file b/libs/common/importlib_resources/tests/namespacedata01/binary.file new file mode 100644 index 00000000..eaf36c1d Binary files /dev/null and b/libs/common/importlib_resources/tests/namespacedata01/binary.file differ diff --git a/libs/common/importlib_resources/tests/namespacedata01/utf-16.file b/libs/common/importlib_resources/tests/namespacedata01/utf-16.file new file mode 100644 index 00000000..2cb77229 Binary files /dev/null and b/libs/common/importlib_resources/tests/namespacedata01/utf-16.file differ diff --git a/libs/common/importlib_resources/tests/namespacedata01/utf-8.file b/libs/common/importlib_resources/tests/namespacedata01/utf-8.file new file mode 100644 index 00000000..1c0132ad --- /dev/null +++ b/libs/common/importlib_resources/tests/namespacedata01/utf-8.file @@ -0,0 +1 @@ +Hello, UTF-8 world! diff --git a/libs/common/importlib_resources/tests/test_compatibilty_files.py b/libs/common/importlib_resources/tests/test_compatibilty_files.py new file mode 100644 index 00000000..d92c7c56 --- /dev/null +++ b/libs/common/importlib_resources/tests/test_compatibilty_files.py @@ -0,0 +1,102 @@ +import io +import unittest + +import importlib_resources as resources + +from importlib_resources._adapters import ( + CompatibilityFiles, + wrap_spec, +) + +from . import util + + +class CompatibilityFilesTests(unittest.TestCase): + @property + def package(self): + bytes_data = io.BytesIO(b'Hello, world!') + return util.create_package( + file=bytes_data, + path='some_path', + contents=('a', 'b', 'c'), + ) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_iter(self): + self.assertEqual( + sorted(path.name for path in self.files.iterdir()), + ['a', 'b', 'c'], + ) + + def test_child_path_iter(self): + self.assertEqual(list((self.files / 'a').iterdir()), []) + + def test_orphan_path_iter(self): + self.assertEqual(list((self.files / 'a' / 'a').iterdir()), []) + self.assertEqual(list((self.files / 'a' / 'a' / 'a').iterdir()), []) + + def test_spec_path_is(self): + self.assertFalse(self.files.is_file()) + self.assertFalse(self.files.is_dir()) + + def test_child_path_is(self): + self.assertTrue((self.files / 'a').is_file()) + self.assertFalse((self.files / 'a').is_dir()) + + def test_orphan_path_is(self): + self.assertFalse((self.files / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a').is_dir()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_dir()) + + def test_spec_path_name(self): + self.assertEqual(self.files.name, 'testingpackage') + + def test_child_path_name(self): + self.assertEqual((self.files / 'a').name, 'a') + + def test_orphan_path_name(self): + self.assertEqual((self.files / 'a' / 'b').name, 'b') + self.assertEqual((self.files / 'a' / 'b' / 'c').name, 'c') + + def test_spec_path_open(self): + self.assertEqual(self.files.read_bytes(), b'Hello, world!') + self.assertEqual(self.files.read_text(), 'Hello, world!') + + def test_child_path_open(self): + self.assertEqual((self.files / 'a').read_bytes(), b'Hello, world!') + self.assertEqual((self.files / 'a').read_text(), 'Hello, world!') + + def test_orphan_path_open(self): + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b').read_bytes() + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b' / 'c').read_bytes() + + def test_open_invalid_mode(self): + with self.assertRaises(ValueError): + self.files.open('0') + + def test_orphan_path_invalid(self): + with self.assertRaises(ValueError): + CompatibilityFiles.OrphanPath() + + def test_wrap_spec(self): + spec = wrap_spec(self.package) + self.assertIsInstance(spec.loader.get_resource_reader(None), CompatibilityFiles) + + +class CompatibilityFilesNoReaderTests(unittest.TestCase): + @property + def package(self): + return util.create_package_from_loader(None) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_joinpath(self): + self.assertIsInstance(self.files / 'a', CompatibilityFiles.OrphanPath) diff --git a/libs/common/importlib_resources/tests/test_contents.py b/libs/common/importlib_resources/tests/test_contents.py new file mode 100644 index 00000000..525568e8 --- /dev/null +++ b/libs/common/importlib_resources/tests/test_contents.py @@ -0,0 +1,43 @@ +import unittest +import importlib_resources as resources + +from . import data01 +from . import util + + +class ContentsTests: + expected = { + '__init__.py', + 'binary.file', + 'subdirectory', + 'utf-16.file', + 'utf-8.file', + } + + def test_contents(self): + contents = {path.name for path in resources.files(self.data).iterdir()} + assert self.expected <= contents + + +class ContentsDiskTests(ContentsTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase): + pass + + +class ContentsNamespaceTests(ContentsTests, unittest.TestCase): + expected = { + # no __init__ because of namespace design + # no subdirectory as incidental difference in fixture + 'binary.file', + 'utf-16.file', + 'utf-8.file', + } + + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 diff --git a/libs/common/importlib_resources/tests/test_files.py b/libs/common/importlib_resources/tests/test_files.py new file mode 100644 index 00000000..d258fb5f --- /dev/null +++ b/libs/common/importlib_resources/tests/test_files.py @@ -0,0 +1,112 @@ +import typing +import textwrap +import unittest +import warnings +import importlib +import contextlib + +import importlib_resources as resources +from ..abc import Traversable +from . import data01 +from . import util +from . import _path +from ._compat import os_helper, import_helper + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class FilesTests: + def test_read_bytes(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_bytes() + assert actual == b'Hello, UTF-8 world!\n' + + def test_read_text(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_text(encoding='utf-8') + assert actual == 'Hello, UTF-8 world!\n' + + @unittest.skipUnless( + hasattr(typing, 'runtime_checkable'), + "Only suitable when typing supports runtime_checkable", + ) + def test_traversable(self): + assert isinstance(resources.files(self.data), Traversable) + + def test_old_parameter(self): + """ + Files used to take a 'package' parameter. Make sure anyone + passing by name is still supported. + """ + with suppress_known_deprecation(): + resources.files(package=self.data) + + +class OpenDiskTests(FilesTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + pass + + +class OpenNamespaceTests(FilesTests, unittest.TestCase): + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 + + +class SiteDir: + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + self.site_dir = self.fixtures.enter_context(os_helper.temp_dir()) + self.fixtures.enter_context(import_helper.DirsOnSysPath(self.site_dir)) + self.fixtures.enter_context(import_helper.CleanImport()) + + +class ModulesFilesTests(SiteDir, unittest.TestCase): + def test_module_resources(self): + """ + A module can have resources found adjacent to the module. + """ + spec = { + 'mod.py': '', + 'res.txt': 'resources are the best', + } + _path.build(spec, self.site_dir) + import mod + + actual = resources.files(mod).joinpath('res.txt').read_text() + assert actual == spec['res.txt'] + + +class ImplicitContextFilesTests(SiteDir, unittest.TestCase): + def test_implicit_files(self): + """ + Without any parameter, files() will infer the location as the caller. + """ + spec = { + 'somepkg': { + '__init__.py': textwrap.dedent( + """ + import importlib_resources as res + val = res.files().joinpath('res.txt').read_text() + """ + ), + 'res.txt': 'resources are the best', + }, + } + _path.build(spec, self.site_dir) + assert importlib.import_module('somepkg').val == 'resources are the best' + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/common/importlib_resources/tests/test_open.py b/libs/common/importlib_resources/tests/test_open.py new file mode 100644 index 00000000..87b42c3d --- /dev/null +++ b/libs/common/importlib_resources/tests/test_open.py @@ -0,0 +1,81 @@ +import unittest + +import importlib_resources as resources +from . import data01 +from . import util + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open('rb'): + pass + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open(): + pass + + +class OpenTests: + def test_open_binary(self): + target = resources.files(self.data) / 'binary.file' + with target.open('rb') as fp: + result = fp.read() + self.assertEqual(result, b'\x00\x01\x02\x03') + + def test_open_text_default_encoding(self): + target = resources.files(self.data) / 'utf-8.file' + with target.open() as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_open_text_given_encoding(self): + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-16', errors='strict') as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_open_text_with_errors(self): + # Raises UnicodeError without the 'errors' argument. + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-8', errors='strict') as fp: + self.assertRaises(UnicodeError, fp.read) + with target.open(encoding='utf-8', errors='ignore') as fp: + result = fp.read() + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + def test_open_binary_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + self.assertRaises(FileNotFoundError, target.open, 'rb') + + def test_open_text_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + self.assertRaises(FileNotFoundError, target.open) + + +class OpenDiskTests(OpenTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class OpenDiskNamespaceTests(OpenTests, unittest.TestCase): + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 + + +class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/common/importlib_resources/tests/test_path.py b/libs/common/importlib_resources/tests/test_path.py new file mode 100644 index 00000000..4f4d3943 --- /dev/null +++ b/libs/common/importlib_resources/tests/test_path.py @@ -0,0 +1,64 @@ +import io +import unittest + +import importlib_resources as resources +from . import data01 +from . import util + + +class CommonTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + with resources.as_file(resources.files(package).joinpath(path)): + pass + + +class PathTests: + def test_reading(self): + # Path should be readable. + # Test also implicitly verifies the returned object is a pathlib.Path + # instance. + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + self.assertTrue(path.name.endswith("utf-8.file"), repr(path)) + # pathlib.Path.read_text() was introduced in Python 3.5. + with path.open('r', encoding='utf-8') as file: + text = file.read() + self.assertEqual('Hello, UTF-8 world!\n', text) + + +class PathDiskTests(PathTests, unittest.TestCase): + data = data01 + + def test_natural_path(self): + """ + Guarantee the internal implementation detail that + file-system-backed resources do not get the tempdir + treatment. + """ + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + assert 'data' in str(path) + + +class PathMemoryTests(PathTests, unittest.TestCase): + def setUp(self): + file = io.BytesIO(b'Hello, UTF-8 world!\n') + self.addCleanup(file.close) + self.data = util.create_package( + file=file, path=FileNotFoundError("package exists only in memory") + ) + self.data.__spec__.origin = None + self.data.__spec__.has_location = False + + +class PathZipTests(PathTests, util.ZipSetup, unittest.TestCase): + def test_remove_in_context_manager(self): + # It is not an error if the file that was temporarily stashed on the + # file system is removed inside the `with` stanza. + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + path.unlink() + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/common/importlib_resources/tests/test_read.py b/libs/common/importlib_resources/tests/test_read.py new file mode 100644 index 00000000..41dd6db5 --- /dev/null +++ b/libs/common/importlib_resources/tests/test_read.py @@ -0,0 +1,76 @@ +import unittest +import importlib_resources as resources + +from . import data01 +from . import util +from importlib import import_module + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_bytes() + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_text() + + +class ReadTests: + def test_read_bytes(self): + result = resources.files(self.data).joinpath('binary.file').read_bytes() + self.assertEqual(result, b'\0\1\2\3') + + def test_read_text_default_encoding(self): + result = resources.files(self.data).joinpath('utf-8.file').read_text() + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_read_text_given_encoding(self): + result = ( + resources.files(self.data) + .joinpath('utf-16.file') + .read_text(encoding='utf-16') + ) + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_read_text_with_errors(self): + # Raises UnicodeError without the 'errors' argument. + target = resources.files(self.data) / 'utf-16.file' + self.assertRaises(UnicodeError, target.read_text, encoding='utf-8') + result = target.read_text(encoding='utf-8', errors='ignore') + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + +class ReadDiskTests(ReadTests, unittest.TestCase): + data = data01 + + +class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + def test_read_submodule_resource(self): + submodule = import_module('ziptestdata.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, b'\0\1\2\3') + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('ziptestdata.subdirectory') + .joinpath('binary.file') + .read_bytes() + ) + self.assertEqual(result, b'\0\1\2\3') + + +class ReadNamespaceTests(ReadTests, unittest.TestCase): + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/common/importlib_resources/tests/test_reader.py b/libs/common/importlib_resources/tests/test_reader.py new file mode 100644 index 00000000..1c8ebeeb --- /dev/null +++ b/libs/common/importlib_resources/tests/test_reader.py @@ -0,0 +1,133 @@ +import os.path +import sys +import pathlib +import unittest + +from importlib import import_module +from importlib_resources.readers import MultiplexedPath, NamespaceReader + + +class MultiplexedPathTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + path = pathlib.Path(__file__).parent / 'namespacedata01' + cls.folder = str(path) + + def test_init_no_paths(self): + with self.assertRaises(FileNotFoundError): + MultiplexedPath() + + def test_init_file(self): + with self.assertRaises(NotADirectoryError): + MultiplexedPath(os.path.join(self.folder, 'binary.file')) + + def test_iterdir(self): + contents = {path.name for path in MultiplexedPath(self.folder).iterdir()} + try: + contents.remove('__pycache__') + except (KeyError, ValueError): + pass + self.assertEqual(contents, {'binary.file', 'utf-16.file', 'utf-8.file'}) + + def test_iterdir_duplicate(self): + data01 = os.path.abspath(os.path.join(__file__, '..', 'data01')) + contents = { + path.name for path in MultiplexedPath(self.folder, data01).iterdir() + } + for remove in ('__pycache__', '__init__.pyc'): + try: + contents.remove(remove) + except (KeyError, ValueError): + pass + self.assertEqual( + contents, + {'__init__.py', 'binary.file', 'subdirectory', 'utf-16.file', 'utf-8.file'}, + ) + + def test_is_dir(self): + self.assertEqual(MultiplexedPath(self.folder).is_dir(), True) + + def test_is_file(self): + self.assertEqual(MultiplexedPath(self.folder).is_file(), False) + + def test_open_file(self): + path = MultiplexedPath(self.folder) + with self.assertRaises(FileNotFoundError): + path.read_bytes() + with self.assertRaises(FileNotFoundError): + path.read_text() + with self.assertRaises(FileNotFoundError): + path.open() + + def test_join_path(self): + prefix = os.path.abspath(os.path.join(__file__, '..')) + data01 = os.path.join(prefix, 'data01') + path = MultiplexedPath(self.folder, data01) + self.assertEqual( + str(path.joinpath('binary.file'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'binary.file'), + ) + self.assertEqual( + str(path.joinpath('subdirectory'))[len(prefix) + 1 :], + os.path.join('data01', 'subdirectory'), + ) + self.assertEqual( + str(path.joinpath('imaginary'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'imaginary'), + ) + self.assertEqual(path.joinpath(), path) + + def test_join_path_compound(self): + path = MultiplexedPath(self.folder) + assert not path.joinpath('imaginary/foo.py').exists() + + def test_repr(self): + self.assertEqual( + repr(MultiplexedPath(self.folder)), + f"MultiplexedPath('{self.folder}')", + ) + + def test_name(self): + self.assertEqual( + MultiplexedPath(self.folder).name, + os.path.basename(self.folder), + ) + + +class NamespaceReaderTest(unittest.TestCase): + site_dir = str(pathlib.Path(__file__).parent) + + @classmethod + def setUpClass(cls): + sys.path.append(cls.site_dir) + + @classmethod + def tearDownClass(cls): + sys.path.remove(cls.site_dir) + + def test_init_error(self): + with self.assertRaises(ValueError): + NamespaceReader(['path1', 'path2']) + + def test_resource_path(self): + namespacedata01 = import_module('namespacedata01') + reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations) + + root = os.path.abspath(os.path.join(__file__, '..', 'namespacedata01')) + self.assertEqual( + reader.resource_path('binary.file'), os.path.join(root, 'binary.file') + ) + self.assertEqual( + reader.resource_path('imaginary'), os.path.join(root, 'imaginary') + ) + + def test_files(self): + namespacedata01 = import_module('namespacedata01') + reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations) + root = os.path.abspath(os.path.join(__file__, '..', 'namespacedata01')) + self.assertIsInstance(reader.files(), MultiplexedPath) + self.assertEqual(repr(reader.files()), f"MultiplexedPath('{root}')") + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/common/importlib_resources/tests/test_resource.py b/libs/common/importlib_resources/tests/test_resource.py new file mode 100644 index 00000000..82390271 --- /dev/null +++ b/libs/common/importlib_resources/tests/test_resource.py @@ -0,0 +1,260 @@ +import sys +import unittest +import importlib_resources as resources +import uuid +import pathlib + +from . import data01 +from . import zipdata01, zipdata02 +from . import util +from importlib import import_module +from ._compat import import_helper, unlink + + +class ResourceTests: + # Subclasses are expected to set the `data` attribute. + + def test_is_file_exists(self): + target = resources.files(self.data) / 'binary.file' + self.assertTrue(target.is_file()) + + def test_is_file_missing(self): + target = resources.files(self.data) / 'not-a-file' + self.assertFalse(target.is_file()) + + def test_is_dir(self): + target = resources.files(self.data) / 'subdirectory' + self.assertFalse(target.is_file()) + self.assertTrue(target.is_dir()) + + +class ResourceDiskTests(ResourceTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class ResourceZipTests(ResourceTests, util.ZipSetup, unittest.TestCase): + pass + + +def names(traversable): + return {item.name for item in traversable.iterdir()} + + +class ResourceLoaderTests(unittest.TestCase): + def test_resource_contents(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C'] + ) + self.assertEqual(names(resources.files(package)), {'A', 'B', 'C'}) + + def test_is_file(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F'] + ) + self.assertTrue(resources.files(package).joinpath('B').is_file()) + + def test_is_dir(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F'] + ) + self.assertTrue(resources.files(package).joinpath('D').is_dir()) + + def test_resource_missing(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F'] + ) + self.assertFalse(resources.files(package).joinpath('Z').is_file()) + + +class ResourceCornerCaseTests(unittest.TestCase): + def test_package_has_no_reader_fallback(self): + # Test odd ball packages which: + # 1. Do not have a ResourceReader as a loader + # 2. Are not on the file system + # 3. Are not in a zip file + module = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C'] + ) + # Give the module a dummy loader. + module.__loader__ = object() + # Give the module a dummy origin. + module.__file__ = '/path/which/shall/not/be/named' + module.__spec__.loader = module.__loader__ + module.__spec__.origin = module.__file__ + self.assertFalse(resources.files(module).joinpath('A').is_file()) + + +class ResourceFromZipsTest01(util.ZipSetupBase, unittest.TestCase): + ZIP_MODULE = zipdata01 # type: ignore + + def test_is_submodule_resource(self): + submodule = import_module('ziptestdata.subdirectory') + self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file()) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('ziptestdata.subdirectory') + .joinpath('binary.file') + .is_file() + ) + + def test_submodule_contents(self): + submodule = import_module('ziptestdata.subdirectory') + self.assertEqual( + names(resources.files(submodule)), {'__init__.py', 'binary.file'} + ) + + def test_submodule_contents_by_name(self): + self.assertEqual( + names(resources.files('ziptestdata.subdirectory')), + {'__init__.py', 'binary.file'}, + ) + + def test_as_file_directory(self): + with resources.as_file(resources.files('ziptestdata')) as data: + assert data.name == 'ziptestdata' + assert data.is_dir() + assert data.joinpath('subdirectory').is_dir() + assert len(list(data.iterdir())) + assert not data.parent.exists() + + +class ResourceFromZipsTest02(util.ZipSetupBase, unittest.TestCase): + ZIP_MODULE = zipdata02 # type: ignore + + def test_unrelated_contents(self): + """ + Test thata zip with two unrelated subpackages return + distinct resources. Ref python/importlib_resources#44. + """ + self.assertEqual( + names(resources.files('ziptestdata.one')), + {'__init__.py', 'resource1.txt'}, + ) + self.assertEqual( + names(resources.files('ziptestdata.two')), + {'__init__.py', 'resource2.txt'}, + ) + + +class DeletingZipsTest(unittest.TestCase): + """Having accessed resources in a zip file should not keep an open + reference to the zip. + """ + + ZIP_MODULE = zipdata01 + + def setUp(self): + modules = import_helper.modules_setup() + self.addCleanup(import_helper.modules_cleanup, *modules) + + data_path = pathlib.Path(self.ZIP_MODULE.__file__) + data_dir = data_path.parent + self.source_zip_path = data_dir / 'ziptestdata.zip' + self.zip_path = pathlib.Path(f'{uuid.uuid4()}.zip').absolute() + self.zip_path.write_bytes(self.source_zip_path.read_bytes()) + sys.path.append(str(self.zip_path)) + self.data = import_module('ziptestdata') + + def tearDown(self): + try: + sys.path.remove(str(self.zip_path)) + except ValueError: + pass + + try: + del sys.path_importer_cache[str(self.zip_path)] + del sys.modules[self.data.__name__] + except KeyError: + pass + + try: + unlink(self.zip_path) + except OSError: + # If the test fails, this will probably fail too + pass + + def test_iterdir_does_not_keep_open(self): + c = [item.name for item in resources.files('ziptestdata').iterdir()] + self.zip_path.unlink() + del c + + def test_is_file_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('binary.file').is_file() + self.zip_path.unlink() + del c + + def test_is_file_failure_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('not-present').is_file() + self.zip_path.unlink() + del c + + @unittest.skip("Desired but not supported.") + def test_as_file_does_not_keep_open(self): # pragma: no cover + c = resources.as_file(resources.files('ziptestdata') / 'binary.file') + self.zip_path.unlink() + del c + + def test_entered_path_does_not_keep_open(self): + # This is what certifi does on import to make its bundle + # available for the process duration. + c = resources.as_file( + resources.files('ziptestdata') / 'binary.file' + ).__enter__() + self.zip_path.unlink() + del c + + def test_read_binary_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('binary.file').read_bytes() + self.zip_path.unlink() + del c + + def test_read_text_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('utf-8.file').read_text() + self.zip_path.unlink() + del c + + +class ResourceFromNamespaceTest01(unittest.TestCase): + site_dir = str(pathlib.Path(__file__).parent) + + @classmethod + def setUpClass(cls): + sys.path.append(cls.site_dir) + + @classmethod + def tearDownClass(cls): + sys.path.remove(cls.site_dir) + + def test_is_submodule_resource(self): + self.assertTrue( + resources.files(import_module('namespacedata01')) + .joinpath('binary.file') + .is_file() + ) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('namespacedata01').joinpath('binary.file').is_file() + ) + + def test_submodule_contents(self): + contents = names(resources.files(import_module('namespacedata01'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + + def test_submodule_contents_by_name(self): + contents = names(resources.files('namespacedata01')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/common/importlib_resources/tests/update-zips.py b/libs/common/importlib_resources/tests/update-zips.py new file mode 100644 index 00000000..231334aa --- /dev/null +++ b/libs/common/importlib_resources/tests/update-zips.py @@ -0,0 +1,53 @@ +""" +Generate the zip test data files. + +Run to build the tests/zipdataNN/ziptestdata.zip files from +files in tests/dataNN. + +Replaces the file with the working copy, but does commit anything +to the source repo. +""" + +import contextlib +import os +import pathlib +import zipfile + + +def main(): + """ + >>> from unittest import mock + >>> monkeypatch = getfixture('monkeypatch') + >>> monkeypatch.setattr(zipfile, 'ZipFile', mock.MagicMock()) + >>> print(); main() # print workaround for bpo-32509 + + ...data01... -> ziptestdata/... + ... + ...data02... -> ziptestdata/... + ... + """ + suffixes = '01', '02' + tuple(map(generate, suffixes)) + + +def generate(suffix): + root = pathlib.Path(__file__).parent.relative_to(os.getcwd()) + zfpath = root / f'zipdata{suffix}/ziptestdata.zip' + with zipfile.ZipFile(zfpath, 'w') as zf: + for src, rel in walk(root / f'data{suffix}'): + dst = 'ziptestdata' / pathlib.PurePosixPath(rel.as_posix()) + print(src, '->', dst) + zf.write(src, dst) + + +def walk(datapath): + for dirpath, dirnames, filenames in os.walk(datapath): + with contextlib.suppress(ValueError): + dirnames.remove('__pycache__') + for filename in filenames: + res = pathlib.Path(dirpath) / filename + rel = res.relative_to(datapath) + yield res, rel + + +__name__ == '__main__' and main() diff --git a/libs/common/importlib_resources/tests/util.py b/libs/common/importlib_resources/tests/util.py new file mode 100644 index 00000000..b596c0ce --- /dev/null +++ b/libs/common/importlib_resources/tests/util.py @@ -0,0 +1,167 @@ +import abc +import importlib +import io +import sys +import types +import pathlib + +from . import data01 +from . import zipdata01 +from ..abc import ResourceReader +from ._compat import import_helper + + +from importlib.machinery import ModuleSpec + + +class Reader(ResourceReader): + def __init__(self, **kwargs): + vars(self).update(kwargs) + + def get_resource_reader(self, package): + return self + + def open_resource(self, path): + self._path = path + if isinstance(self.file, Exception): + raise self.file + return self.file + + def resource_path(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + return self.path + + def is_resource(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + + def part(entry): + return entry.split('/') + + return any( + len(parts) == 1 and parts[0] == path_ for parts in map(part, self._contents) + ) + + def contents(self): + if isinstance(self.path, Exception): + raise self.path + yield from self._contents + + +def create_package_from_loader(loader, is_package=True): + name = 'testingpackage' + module = types.ModuleType(name) + spec = ModuleSpec(name, loader, origin='does-not-exist', is_package=is_package) + module.__spec__ = spec + module.__loader__ = loader + return module + + +def create_package(file=None, path=None, is_package=True, contents=()): + return create_package_from_loader( + Reader(file=file, path=path, _contents=contents), + is_package, + ) + + +class CommonTests(metaclass=abc.ABCMeta): + """ + Tests shared by test_open, test_path, and test_read. + """ + + @abc.abstractmethod + def execute(self, package, path): + """ + Call the pertinent legacy API function (e.g. open_text, path) + on package and path. + """ + + def test_package_name(self): + # Passing in the package name should succeed. + self.execute(data01.__name__, 'utf-8.file') + + def test_package_object(self): + # Passing in the package itself should succeed. + self.execute(data01, 'utf-8.file') + + def test_string_path(self): + # Passing in a string for the path should succeed. + path = 'utf-8.file' + self.execute(data01, path) + + def test_pathlib_path(self): + # Passing in a pathlib.PurePath object for the path should succeed. + path = pathlib.PurePath('utf-8.file') + self.execute(data01, path) + + def test_importing_module_as_side_effect(self): + # The anchor package can already be imported. + del sys.modules[data01.__name__] + self.execute(data01.__name__, 'utf-8.file') + + def test_missing_path(self): + # Attempting to open or read or request the path for a + # non-existent path should succeed if open_resource + # can return a viable data stream. + bytes_data = io.BytesIO(b'Hello, world!') + package = create_package(file=bytes_data, path=FileNotFoundError()) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_extant_path(self): + # Attempting to open or read or request the path when the + # path does exist should still succeed. Does not assert + # anything about the result. + bytes_data = io.BytesIO(b'Hello, world!') + # any path that exists + path = __file__ + package = create_package(file=bytes_data, path=path) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_useless_loader(self): + package = create_package(file=FileNotFoundError(), path=FileNotFoundError()) + with self.assertRaises(FileNotFoundError): + self.execute(package, 'utf-8.file') + + +class ZipSetupBase: + ZIP_MODULE = None + + @classmethod + def setUpClass(cls): + data_path = pathlib.Path(cls.ZIP_MODULE.__file__) + data_dir = data_path.parent + cls._zip_path = str(data_dir / 'ziptestdata.zip') + sys.path.append(cls._zip_path) + cls.data = importlib.import_module('ziptestdata') + + @classmethod + def tearDownClass(cls): + try: + sys.path.remove(cls._zip_path) + except ValueError: + pass + + try: + del sys.path_importer_cache[cls._zip_path] + del sys.modules[cls.data.__name__] + except KeyError: + pass + + try: + del cls.data + del cls._zip_path + except AttributeError: + pass + + def setUp(self): + modules = import_helper.modules_setup() + self.addCleanup(import_helper.modules_cleanup, *modules) + + +class ZipSetup(ZipSetupBase): + ZIP_MODULE = zipdata01 # type: ignore diff --git a/libs/common/importlib_resources/tests/zipdata01/__init__.py b/libs/common/importlib_resources/tests/zipdata01/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/zipdata01/ziptestdata.zip b/libs/common/importlib_resources/tests/zipdata01/ziptestdata.zip new file mode 100644 index 00000000..9a3bb073 Binary files /dev/null and b/libs/common/importlib_resources/tests/zipdata01/ziptestdata.zip differ diff --git a/libs/common/importlib_resources/tests/zipdata02/__init__.py b/libs/common/importlib_resources/tests/zipdata02/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/importlib_resources/tests/zipdata02/ziptestdata.zip b/libs/common/importlib_resources/tests/zipdata02/ziptestdata.zip new file mode 100644 index 00000000..d63ff512 Binary files /dev/null and b/libs/common/importlib_resources/tests/zipdata02/ziptestdata.zip differ diff --git a/libs/common/more_itertools/__init__.py b/libs/common/more_itertools/__init__.py deleted file mode 100644 index bba462c3..00000000 --- a/libs/common/more_itertools/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from more_itertools.more import * # noqa -from more_itertools.recipes import * # noqa diff --git a/libs/common/more_itertools/tests/test_more.py b/libs/common/more_itertools/tests/test_more.py deleted file mode 100644 index eacf8a8a..00000000 --- a/libs/common/more_itertools/tests/test_more.py +++ /dev/null @@ -1,2313 +0,0 @@ -from __future__ import division, print_function, unicode_literals - -from collections import OrderedDict -from decimal import Decimal -from doctest import DocTestSuite -from fractions import Fraction -from functools import partial, reduce -from heapq import merge -from io import StringIO -from itertools import ( - chain, - count, - groupby, - islice, - permutations, - product, - repeat, -) -from operator import add, mul, itemgetter -from unittest import TestCase - -from six.moves import filter, map, range, zip - -import more_itertools as mi - - -def load_tests(loader, tests, ignore): - # Add the doctests - tests.addTests(DocTestSuite('more_itertools.more')) - return tests - - -class CollateTests(TestCase): - """Unit tests for ``collate()``""" - # Also accidentally tests peekable, though that could use its own tests - - def test_default(self): - """Test with the default `key` function.""" - iterables = [range(4), range(7), range(3, 6)] - self.assertEqual( - sorted(reduce(list.__add__, [list(it) for it in iterables])), - list(mi.collate(*iterables)) - ) - - def test_key(self): - """Test using a custom `key` function.""" - iterables = [range(5, 0, -1), range(4, 0, -1)] - actual = sorted( - reduce(list.__add__, [list(it) for it in iterables]), reverse=True - ) - expected = list(mi.collate(*iterables, key=lambda x: -x)) - self.assertEqual(actual, expected) - - def test_empty(self): - """Be nice if passed an empty list of iterables.""" - self.assertEqual([], list(mi.collate())) - - def test_one(self): - """Work when only 1 iterable is passed.""" - self.assertEqual([0, 1], list(mi.collate(range(2)))) - - def test_reverse(self): - """Test the `reverse` kwarg.""" - iterables = [range(4, 0, -1), range(7, 0, -1), range(3, 6, -1)] - - actual = sorted( - reduce(list.__add__, [list(it) for it in iterables]), reverse=True - ) - expected = list(mi.collate(*iterables, reverse=True)) - self.assertEqual(actual, expected) - - def test_alias(self): - self.assertNotEqual(merge.__doc__, mi.collate.__doc__) - self.assertNotEqual(partial.__doc__, mi.collate.__doc__) - - -class ChunkedTests(TestCase): - """Tests for ``chunked()``""" - - def test_even(self): - """Test when ``n`` divides evenly into the length of the iterable.""" - self.assertEqual( - list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']] - ) - - def test_odd(self): - """Test when ``n`` does not divide evenly into the length of the - iterable. - - """ - self.assertEqual( - list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']] - ) - - -class FirstTests(TestCase): - """Tests for ``first()``""" - - def test_many(self): - """Test that it works on many-item iterables.""" - # Also try it on a generator expression to make sure it works on - # whatever those return, across Python versions. - self.assertEqual(mi.first(x for x in range(4)), 0) - - def test_one(self): - """Test that it doesn't raise StopIteration prematurely.""" - self.assertEqual(mi.first([3]), 3) - - def test_empty_stop_iteration(self): - """It should raise StopIteration for empty iterables.""" - self.assertRaises(ValueError, lambda: mi.first([])) - - def test_default(self): - """It should return the provided default arg for empty iterables.""" - self.assertEqual(mi.first([], 'boo'), 'boo') - - -class IterOnlyRange: - """User-defined iterable class which only support __iter__. - - It is not specified to inherit ``object``, so indexing on a instance will - raise an ``AttributeError`` rather than ``TypeError`` in Python 2. - - >>> r = IterOnlyRange(5) - >>> r[0] - AttributeError: IterOnlyRange instance has no attribute '__getitem__' - - Note: In Python 3, ``TypeError`` will be raised because ``object`` is - inherited implicitly by default. - - >>> r[0] - TypeError: 'IterOnlyRange' object does not support indexing - """ - def __init__(self, n): - """Set the length of the range.""" - self.n = n - - def __iter__(self): - """Works same as range().""" - return iter(range(self.n)) - - -class LastTests(TestCase): - """Tests for ``last()``""" - - def test_many_nonsliceable(self): - """Test that it works on many-item non-slice-able iterables.""" - # Also try it on a generator expression to make sure it works on - # whatever those return, across Python versions. - self.assertEqual(mi.last(x for x in range(4)), 3) - - def test_one_nonsliceable(self): - """Test that it doesn't raise StopIteration prematurely.""" - self.assertEqual(mi.last(x for x in range(1)), 0) - - def test_empty_stop_iteration_nonsliceable(self): - """It should raise ValueError for empty non-slice-able iterables.""" - self.assertRaises(ValueError, lambda: mi.last(x for x in range(0))) - - def test_default_nonsliceable(self): - """It should return the provided default arg for empty non-slice-able - iterables. - """ - self.assertEqual(mi.last((x for x in range(0)), 'boo'), 'boo') - - def test_many_sliceable(self): - """Test that it works on many-item slice-able iterables.""" - self.assertEqual(mi.last([0, 1, 2, 3]), 3) - - def test_one_sliceable(self): - """Test that it doesn't raise StopIteration prematurely.""" - self.assertEqual(mi.last([3]), 3) - - def test_empty_stop_iteration_sliceable(self): - """It should raise ValueError for empty slice-able iterables.""" - self.assertRaises(ValueError, lambda: mi.last([])) - - def test_default_sliceable(self): - """It should return the provided default arg for empty slice-able - iterables. - """ - self.assertEqual(mi.last([], 'boo'), 'boo') - - def test_dict(self): - """last(dic) and last(dic.keys()) should return same result.""" - dic = {'a': 1, 'b': 2, 'c': 3} - self.assertEqual(mi.last(dic), mi.last(dic.keys())) - - def test_ordereddict(self): - """last(dic) should return the last key.""" - od = OrderedDict() - od['a'] = 1 - od['b'] = 2 - od['c'] = 3 - self.assertEqual(mi.last(od), 'c') - - def test_customrange(self): - """It should work on custom class where [] raises AttributeError.""" - self.assertEqual(mi.last(IterOnlyRange(5)), 4) - - -class PeekableTests(TestCase): - """Tests for ``peekable()`` behavor not incidentally covered by testing - ``collate()`` - - """ - def test_peek_default(self): - """Make sure passing a default into ``peek()`` works.""" - p = mi.peekable([]) - self.assertEqual(p.peek(7), 7) - - def test_truthiness(self): - """Make sure a ``peekable`` tests true iff there are items remaining in - the iterable. - - """ - p = mi.peekable([]) - self.assertFalse(p) - - p = mi.peekable(range(3)) - self.assertTrue(p) - - def test_simple_peeking(self): - """Make sure ``next`` and ``peek`` advance and don't advance the - iterator, respectively. - - """ - p = mi.peekable(range(10)) - self.assertEqual(next(p), 0) - self.assertEqual(p.peek(), 1) - self.assertEqual(next(p), 1) - - def test_indexing(self): - """ - Indexing into the peekable shouldn't advance the iterator. - """ - p = mi.peekable('abcdefghijkl') - - # The 0th index is what ``next()`` will return - self.assertEqual(p[0], 'a') - self.assertEqual(next(p), 'a') - - # Indexing further into the peekable shouldn't advance the itertor - self.assertEqual(p[2], 'd') - self.assertEqual(next(p), 'b') - - # The 0th index moves up with the iterator; the last index follows - self.assertEqual(p[0], 'c') - self.assertEqual(p[9], 'l') - - self.assertEqual(next(p), 'c') - self.assertEqual(p[8], 'l') - - # Negative indexing should work too - self.assertEqual(p[-2], 'k') - self.assertEqual(p[-9], 'd') - self.assertRaises(IndexError, lambda: p[-10]) - - def test_slicing(self): - """Slicing the peekable shouldn't advance the iterator.""" - seq = list('abcdefghijkl') - p = mi.peekable(seq) - - # Slicing the peekable should just be like slicing a re-iterable - self.assertEqual(p[1:4], seq[1:4]) - - # Advancing the iterator moves the slices up also - self.assertEqual(next(p), 'a') - self.assertEqual(p[1:4], seq[1:][1:4]) - - # Implicit starts and stop should work - self.assertEqual(p[:5], seq[1:][:5]) - self.assertEqual(p[:], seq[1:][:]) - - # Indexing past the end should work - self.assertEqual(p[:100], seq[1:][:100]) - - # Steps should work, including negative - self.assertEqual(p[::2], seq[1:][::2]) - self.assertEqual(p[::-1], seq[1:][::-1]) - - def test_slicing_reset(self): - """Test slicing on a fresh iterable each time""" - iterable = ['0', '1', '2', '3', '4', '5'] - indexes = list(range(-4, len(iterable) + 4)) + [None] - steps = [1, 2, 3, 4, -1, -2, -3, 4] - for slice_args in product(indexes, indexes, steps): - it = iter(iterable) - p = mi.peekable(it) - next(p) - index = slice(*slice_args) - actual = p[index] - expected = iterable[1:][index] - self.assertEqual(actual, expected, slice_args) - - def test_slicing_error(self): - iterable = '01234567' - p = mi.peekable(iter(iterable)) - - # Prime the cache - p.peek() - old_cache = list(p._cache) - - # Illegal slice - with self.assertRaises(ValueError): - p[1:-1:0] - - # Neither the cache nor the iteration should be affected - self.assertEqual(old_cache, list(p._cache)) - self.assertEqual(list(p), list(iterable)) - - def test_passthrough(self): - """Iterating a peekable without using ``peek()`` or ``prepend()`` - should just give the underlying iterable's elements (a trivial test but - useful to set a baseline in case something goes wrong)""" - expected = [1, 2, 3, 4, 5] - actual = list(mi.peekable(expected)) - self.assertEqual(actual, expected) - - # prepend() behavior tests - - def test_prepend(self): - """Tests intersperesed ``prepend()`` and ``next()`` calls""" - it = mi.peekable(range(2)) - actual = [] - - # Test prepend() before next() - it.prepend(10) - actual += [next(it), next(it)] - - # Test prepend() between next()s - it.prepend(11) - actual += [next(it), next(it)] - - # Test prepend() after source iterable is consumed - it.prepend(12) - actual += [next(it)] - - expected = [10, 0, 11, 1, 12] - self.assertEqual(actual, expected) - - def test_multi_prepend(self): - """Tests prepending multiple items and getting them in proper order""" - it = mi.peekable(range(5)) - actual = [next(it), next(it)] - it.prepend(10, 11, 12) - it.prepend(20, 21) - actual += list(it) - expected = [0, 1, 20, 21, 10, 11, 12, 2, 3, 4] - self.assertEqual(actual, expected) - - def test_empty(self): - """Tests prepending in front of an empty iterable""" - it = mi.peekable([]) - it.prepend(10) - actual = list(it) - expected = [10] - self.assertEqual(actual, expected) - - def test_prepend_truthiness(self): - """Tests that ``__bool__()`` or ``__nonzero__()`` works properly - with ``prepend()``""" - it = mi.peekable(range(5)) - self.assertTrue(it) - actual = list(it) - self.assertFalse(it) - it.prepend(10) - self.assertTrue(it) - actual += [next(it)] - self.assertFalse(it) - expected = [0, 1, 2, 3, 4, 10] - self.assertEqual(actual, expected) - - def test_multi_prepend_peek(self): - """Tests prepending multiple elements and getting them in reverse order - while peeking""" - it = mi.peekable(range(5)) - actual = [next(it), next(it)] - self.assertEqual(it.peek(), 2) - it.prepend(10, 11, 12) - self.assertEqual(it.peek(), 10) - it.prepend(20, 21) - self.assertEqual(it.peek(), 20) - actual += list(it) - self.assertFalse(it) - expected = [0, 1, 20, 21, 10, 11, 12, 2, 3, 4] - self.assertEqual(actual, expected) - - def test_prepend_after_stop(self): - """Test resuming iteration after a previous exhaustion""" - it = mi.peekable(range(3)) - self.assertEqual(list(it), [0, 1, 2]) - self.assertRaises(StopIteration, lambda: next(it)) - it.prepend(10) - self.assertEqual(next(it), 10) - self.assertRaises(StopIteration, lambda: next(it)) - - def test_prepend_slicing(self): - """Tests interaction between prepending and slicing""" - seq = list(range(20)) - p = mi.peekable(seq) - - p.prepend(30, 40, 50) - pseq = [30, 40, 50] + seq # pseq for prepended_seq - - # adapt the specific tests from test_slicing - self.assertEqual(p[0], 30) - self.assertEqual(p[1:8], pseq[1:8]) - self.assertEqual(p[1:], pseq[1:]) - self.assertEqual(p[:5], pseq[:5]) - self.assertEqual(p[:], pseq[:]) - self.assertEqual(p[:100], pseq[:100]) - self.assertEqual(p[::2], pseq[::2]) - self.assertEqual(p[::-1], pseq[::-1]) - - def test_prepend_indexing(self): - """Tests interaction between prepending and indexing""" - seq = list(range(20)) - p = mi.peekable(seq) - - p.prepend(30, 40, 50) - - self.assertEqual(p[0], 30) - self.assertEqual(next(p), 30) - self.assertEqual(p[2], 0) - self.assertEqual(next(p), 40) - self.assertEqual(p[0], 50) - self.assertEqual(p[9], 8) - self.assertEqual(next(p), 50) - self.assertEqual(p[8], 8) - self.assertEqual(p[-2], 18) - self.assertEqual(p[-9], 11) - self.assertRaises(IndexError, lambda: p[-21]) - - def test_prepend_iterable(self): - """Tests prepending from an iterable""" - it = mi.peekable(range(5)) - # Don't directly use the range() object to avoid any range-specific - # optimizations - it.prepend(*(x for x in range(5))) - actual = list(it) - expected = list(chain(range(5), range(5))) - self.assertEqual(actual, expected) - - def test_prepend_many(self): - """Tests that prepending a huge number of elements works""" - it = mi.peekable(range(5)) - # Don't directly use the range() object to avoid any range-specific - # optimizations - it.prepend(*(x for x in range(20000))) - actual = list(it) - expected = list(chain(range(20000), range(5))) - self.assertEqual(actual, expected) - - def test_prepend_reversed(self): - """Tests prepending from a reversed iterable""" - it = mi.peekable(range(3)) - it.prepend(*reversed((10, 11, 12))) - actual = list(it) - expected = [12, 11, 10, 0, 1, 2] - self.assertEqual(actual, expected) - - -class ConsumerTests(TestCase): - """Tests for ``consumer()``""" - - def test_consumer(self): - @mi.consumer - def eater(): - while True: - x = yield # noqa - - e = eater() - e.send('hi') # without @consumer, would raise TypeError - - -class DistinctPermutationsTests(TestCase): - def test_distinct_permutations(self): - """Make sure the output for ``distinct_permutations()`` is the same as - set(permutations(it)). - - """ - iterable = ['z', 'a', 'a', 'q', 'q', 'q', 'y'] - test_output = sorted(mi.distinct_permutations(iterable)) - ref_output = sorted(set(permutations(iterable))) - self.assertEqual(test_output, ref_output) - - def test_other_iterables(self): - """Make sure ``distinct_permutations()`` accepts a different type of - iterables. - - """ - # a generator - iterable = (c for c in ['z', 'a', 'a', 'q', 'q', 'q', 'y']) - test_output = sorted(mi.distinct_permutations(iterable)) - # "reload" it - iterable = (c for c in ['z', 'a', 'a', 'q', 'q', 'q', 'y']) - ref_output = sorted(set(permutations(iterable))) - self.assertEqual(test_output, ref_output) - - # an iterator - iterable = iter(['z', 'a', 'a', 'q', 'q', 'q', 'y']) - test_output = sorted(mi.distinct_permutations(iterable)) - # "reload" it - iterable = iter(['z', 'a', 'a', 'q', 'q', 'q', 'y']) - ref_output = sorted(set(permutations(iterable))) - self.assertEqual(test_output, ref_output) - - -class IlenTests(TestCase): - def test_ilen(self): - """Sanity-checks for ``ilen()``.""" - # Non-empty - self.assertEqual( - mi.ilen(filter(lambda x: x % 10 == 0, range(101))), 11 - ) - - # Empty - self.assertEqual(mi.ilen((x for x in range(0))), 0) - - # Iterable with __len__ - self.assertEqual(mi.ilen(list(range(6))), 6) - - -class WithIterTests(TestCase): - def test_with_iter(self): - s = StringIO('One fish\nTwo fish') - initial_words = [line.split()[0] for line in mi.with_iter(s)] - - # Iterable's items should be faithfully represented - self.assertEqual(initial_words, ['One', 'Two']) - # The file object should be closed - self.assertTrue(s.closed) - - -class OneTests(TestCase): - def test_basic(self): - it = iter(['item']) - self.assertEqual(mi.one(it), 'item') - - def test_too_short(self): - it = iter([]) - self.assertRaises(ValueError, lambda: mi.one(it)) - self.assertRaises(IndexError, lambda: mi.one(it, too_short=IndexError)) - - def test_too_long(self): - it = count() - self.assertRaises(ValueError, lambda: mi.one(it)) # burn 0 and 1 - self.assertEqual(next(it), 2) - self.assertRaises( - OverflowError, lambda: mi.one(it, too_long=OverflowError) - ) - - -class IntersperseTest(TestCase): - """ Tests for intersperse() """ - - def test_even(self): - iterable = (x for x in '01') - self.assertEqual( - list(mi.intersperse(None, iterable)), ['0', None, '1'] - ) - - def test_odd(self): - iterable = (x for x in '012') - self.assertEqual( - list(mi.intersperse(None, iterable)), ['0', None, '1', None, '2'] - ) - - def test_nested(self): - element = ('a', 'b') - iterable = (x for x in '012') - actual = list(mi.intersperse(element, iterable)) - expected = ['0', ('a', 'b'), '1', ('a', 'b'), '2'] - self.assertEqual(actual, expected) - - def test_not_iterable(self): - self.assertRaises(TypeError, lambda: mi.intersperse('x', 1)) - - def test_n(self): - for n, element, expected in [ - (1, '_', ['0', '_', '1', '_', '2', '_', '3', '_', '4', '_', '5']), - (2, '_', ['0', '1', '_', '2', '3', '_', '4', '5']), - (3, '_', ['0', '1', '2', '_', '3', '4', '5']), - (4, '_', ['0', '1', '2', '3', '_', '4', '5']), - (5, '_', ['0', '1', '2', '3', '4', '_', '5']), - (6, '_', ['0', '1', '2', '3', '4', '5']), - (7, '_', ['0', '1', '2', '3', '4', '5']), - (3, ['a', 'b'], ['0', '1', '2', ['a', 'b'], '3', '4', '5']), - ]: - iterable = (x for x in '012345') - actual = list(mi.intersperse(element, iterable, n=n)) - self.assertEqual(actual, expected) - - def test_n_zero(self): - self.assertRaises( - ValueError, lambda: list(mi.intersperse('x', '012', n=0)) - ) - - -class UniqueToEachTests(TestCase): - """Tests for ``unique_to_each()``""" - - def test_all_unique(self): - """When all the input iterables are unique the output should match - the input.""" - iterables = [[1, 2], [3, 4, 5], [6, 7, 8]] - self.assertEqual(mi.unique_to_each(*iterables), iterables) - - def test_duplicates(self): - """When there are duplicates in any of the input iterables that aren't - in the rest, those duplicates should be emitted.""" - iterables = ["mississippi", "missouri"] - self.assertEqual( - mi.unique_to_each(*iterables), [['p', 'p'], ['o', 'u', 'r']] - ) - - def test_mixed(self): - """When the input iterables contain different types the function should - still behave properly""" - iterables = ['x', (i for i in range(3)), [1, 2, 3], tuple()] - self.assertEqual(mi.unique_to_each(*iterables), [['x'], [0], [3], []]) - - -class WindowedTests(TestCase): - """Tests for ``windowed()``""" - - def test_basic(self): - actual = list(mi.windowed([1, 2, 3, 4, 5], 3)) - expected = [(1, 2, 3), (2, 3, 4), (3, 4, 5)] - self.assertEqual(actual, expected) - - def test_large_size(self): - """ - When the window size is larger than the iterable, and no fill value is - given,``None`` should be filled in. - """ - actual = list(mi.windowed([1, 2, 3, 4, 5], 6)) - expected = [(1, 2, 3, 4, 5, None)] - self.assertEqual(actual, expected) - - def test_fillvalue(self): - """ - When sizes don't match evenly, the given fill value should be used. - """ - iterable = [1, 2, 3, 4, 5] - - for n, kwargs, expected in [ - (6, {}, [(1, 2, 3, 4, 5, '!')]), # n > len(iterable) - (3, {'step': 3}, [(1, 2, 3), (4, 5, '!')]), # using ``step`` - ]: - actual = list(mi.windowed(iterable, n, fillvalue='!', **kwargs)) - self.assertEqual(actual, expected) - - def test_zero(self): - """When the window size is zero, an empty tuple should be emitted.""" - actual = list(mi.windowed([1, 2, 3, 4, 5], 0)) - expected = [tuple()] - self.assertEqual(actual, expected) - - def test_negative(self): - """When the window size is negative, ValueError should be raised.""" - with self.assertRaises(ValueError): - list(mi.windowed([1, 2, 3, 4, 5], -1)) - - def test_step(self): - """The window should advance by the number of steps provided""" - iterable = [1, 2, 3, 4, 5, 6, 7] - for n, step, expected in [ - (3, 2, [(1, 2, 3), (3, 4, 5), (5, 6, 7)]), # n > step - (3, 3, [(1, 2, 3), (4, 5, 6), (7, None, None)]), # n == step - (3, 4, [(1, 2, 3), (5, 6, 7)]), # line up nicely - (3, 5, [(1, 2, 3), (6, 7, None)]), # off by one - (3, 6, [(1, 2, 3), (7, None, None)]), # off by two - (3, 7, [(1, 2, 3)]), # step past the end - (7, 8, [(1, 2, 3, 4, 5, 6, 7)]), # step > len(iterable) - ]: - actual = list(mi.windowed(iterable, n, step=step)) - self.assertEqual(actual, expected) - - # Step must be greater than or equal to 1 - with self.assertRaises(ValueError): - list(mi.windowed(iterable, 3, step=0)) - - -class SubstringsTests(TestCase): - def test_basic(self): - iterable = (x for x in range(4)) - actual = list(mi.substrings(iterable)) - expected = [ - (0,), - (1,), - (2,), - (3,), - (0, 1), - (1, 2), - (2, 3), - (0, 1, 2), - (1, 2, 3), - (0, 1, 2, 3), - ] - self.assertEqual(actual, expected) - - def test_strings(self): - iterable = 'abc' - actual = list(mi.substrings(iterable)) - expected = [ - ('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c'), ('a', 'b', 'c') - ] - self.assertEqual(actual, expected) - - def test_empty(self): - iterable = iter([]) - actual = list(mi.substrings(iterable)) - expected = [] - self.assertEqual(actual, expected) - - def test_order(self): - iterable = [2, 0, 1] - actual = list(mi.substrings(iterable)) - expected = [(2,), (0,), (1,), (2, 0), (0, 1), (2, 0, 1)] - self.assertEqual(actual, expected) - - -class BucketTests(TestCase): - """Tests for ``bucket()``""" - - def test_basic(self): - iterable = [10, 20, 30, 11, 21, 31, 12, 22, 23, 33] - D = mi.bucket(iterable, key=lambda x: 10 * (x // 10)) - - # In-order access - self.assertEqual(list(D[10]), [10, 11, 12]) - - # Out of order access - self.assertEqual(list(D[30]), [30, 31, 33]) - self.assertEqual(list(D[20]), [20, 21, 22, 23]) - - self.assertEqual(list(D[40]), []) # Nothing in here! - - def test_in(self): - iterable = [10, 20, 30, 11, 21, 31, 12, 22, 23, 33] - D = mi.bucket(iterable, key=lambda x: 10 * (x // 10)) - - self.assertIn(10, D) - self.assertNotIn(40, D) - self.assertIn(20, D) - self.assertNotIn(21, D) - - # Checking in-ness shouldn't advance the iterator - self.assertEqual(next(D[10]), 10) - - def test_validator(self): - iterable = count(0) - key = lambda x: int(str(x)[0]) # First digit of each number - validator = lambda x: 0 < x < 10 # No leading zeros - D = mi.bucket(iterable, key, validator=validator) - self.assertEqual(mi.take(3, D[1]), [1, 10, 11]) - self.assertNotIn(0, D) # Non-valid entries don't return True - self.assertNotIn(0, D._cache) # Don't store non-valid entries - self.assertEqual(list(D[0]), []) - - -class SpyTests(TestCase): - """Tests for ``spy()``""" - - def test_basic(self): - original_iterable = iter('abcdefg') - head, new_iterable = mi.spy(original_iterable) - self.assertEqual(head, ['a']) - self.assertEqual( - list(new_iterable), ['a', 'b', 'c', 'd', 'e', 'f', 'g'] - ) - - def test_unpacking(self): - original_iterable = iter('abcdefg') - (first, second, third), new_iterable = mi.spy(original_iterable, 3) - self.assertEqual(first, 'a') - self.assertEqual(second, 'b') - self.assertEqual(third, 'c') - self.assertEqual( - list(new_iterable), ['a', 'b', 'c', 'd', 'e', 'f', 'g'] - ) - - def test_too_many(self): - original_iterable = iter('abc') - head, new_iterable = mi.spy(original_iterable, 4) - self.assertEqual(head, ['a', 'b', 'c']) - self.assertEqual(list(new_iterable), ['a', 'b', 'c']) - - def test_zero(self): - original_iterable = iter('abc') - head, new_iterable = mi.spy(original_iterable, 0) - self.assertEqual(head, []) - self.assertEqual(list(new_iterable), ['a', 'b', 'c']) - - -class InterleaveTests(TestCase): - def test_even(self): - actual = list(mi.interleave([1, 4, 7], [2, 5, 8], [3, 6, 9])) - expected = [1, 2, 3, 4, 5, 6, 7, 8, 9] - self.assertEqual(actual, expected) - - def test_short(self): - actual = list(mi.interleave([1, 4], [2, 5, 7], [3, 6, 8])) - expected = [1, 2, 3, 4, 5, 6] - self.assertEqual(actual, expected) - - def test_mixed_types(self): - it_list = ['a', 'b', 'c', 'd'] - it_str = '12345' - it_inf = count() - actual = list(mi.interleave(it_list, it_str, it_inf)) - expected = ['a', '1', 0, 'b', '2', 1, 'c', '3', 2, 'd', '4', 3] - self.assertEqual(actual, expected) - - -class InterleaveLongestTests(TestCase): - def test_even(self): - actual = list(mi.interleave_longest([1, 4, 7], [2, 5, 8], [3, 6, 9])) - expected = [1, 2, 3, 4, 5, 6, 7, 8, 9] - self.assertEqual(actual, expected) - - def test_short(self): - actual = list(mi.interleave_longest([1, 4], [2, 5, 7], [3, 6, 8])) - expected = [1, 2, 3, 4, 5, 6, 7, 8] - self.assertEqual(actual, expected) - - def test_mixed_types(self): - it_list = ['a', 'b', 'c', 'd'] - it_str = '12345' - it_gen = (x for x in range(3)) - actual = list(mi.interleave_longest(it_list, it_str, it_gen)) - expected = ['a', '1', 0, 'b', '2', 1, 'c', '3', 2, 'd', '4', '5'] - self.assertEqual(actual, expected) - - -class TestCollapse(TestCase): - """Tests for ``collapse()``""" - - def test_collapse(self): - l = [[1], 2, [[3], 4], [[[5]]]] - self.assertEqual(list(mi.collapse(l)), [1, 2, 3, 4, 5]) - - def test_collapse_to_string(self): - l = [["s1"], "s2", [["s3"], "s4"], [[["s5"]]]] - self.assertEqual(list(mi.collapse(l)), ["s1", "s2", "s3", "s4", "s5"]) - - def test_collapse_flatten(self): - l = [[1], [2], [[3], 4], [[[5]]]] - self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l))) - - def test_collapse_to_level(self): - l = [[1], 2, [[3], 4], [[[5]]]] - self.assertEqual(list(mi.collapse(l, levels=2)), [1, 2, 3, 4, [5]]) - self.assertEqual( - list(mi.collapse(mi.collapse(l, levels=1), levels=1)), - list(mi.collapse(l, levels=2)) - ) - - def test_collapse_to_list(self): - l = (1, [2], (3, [4, (5,)], 'ab')) - actual = list(mi.collapse(l, base_type=list)) - expected = [1, [2], 3, [4, (5,)], 'ab'] - self.assertEqual(actual, expected) - - -class SideEffectTests(TestCase): - """Tests for ``side_effect()``""" - - def test_individual(self): - # The function increments the counter for each call - counter = [0] - - def func(arg): - counter[0] += 1 - - result = list(mi.side_effect(func, range(10))) - self.assertEqual(result, list(range(10))) - self.assertEqual(counter[0], 10) - - def test_chunked(self): - # The function increments the counter for each call - counter = [0] - - def func(arg): - counter[0] += 1 - - result = list(mi.side_effect(func, range(10), 2)) - self.assertEqual(result, list(range(10))) - self.assertEqual(counter[0], 5) - - def test_before_after(self): - f = StringIO() - collector = [] - - def func(item): - print(item, file=f) - collector.append(f.getvalue()) - - def it(): - yield 'a' - yield 'b' - raise RuntimeError('kaboom') - - before = lambda: print('HEADER', file=f) - after = f.close - - try: - mi.consume(mi.side_effect(func, it(), before=before, after=after)) - except RuntimeError: - pass - - # The iterable should have been written to the file - self.assertEqual(collector, ['HEADER\na\n', 'HEADER\na\nb\n']) - - # The file should be closed even though something bad happened - self.assertTrue(f.closed) - - def test_before_fails(self): - f = StringIO() - func = lambda x: print(x, file=f) - - def before(): - raise RuntimeError('ouch') - - try: - mi.consume( - mi.side_effect(func, 'abc', before=before, after=f.close) - ) - except RuntimeError: - pass - - # The file should be closed even though something bad happened in the - # before function - self.assertTrue(f.closed) - - -class SlicedTests(TestCase): - """Tests for ``sliced()``""" - - def test_even(self): - """Test when the length of the sequence is divisible by *n*""" - seq = 'ABCDEFGHI' - self.assertEqual(list(mi.sliced(seq, 3)), ['ABC', 'DEF', 'GHI']) - - def test_odd(self): - """Test when the length of the sequence is not divisible by *n*""" - seq = 'ABCDEFGHI' - self.assertEqual(list(mi.sliced(seq, 4)), ['ABCD', 'EFGH', 'I']) - - def test_not_sliceable(self): - seq = (x for x in 'ABCDEFGHI') - - with self.assertRaises(TypeError): - list(mi.sliced(seq, 3)) - - -class SplitAtTests(TestCase): - """Tests for ``split()``""" - - def comp_with_str_split(self, str_to_split, delim): - pred = lambda c: c == delim - actual = list(map(''.join, mi.split_at(str_to_split, pred))) - expected = str_to_split.split(delim) - self.assertEqual(actual, expected) - - def test_seperators(self): - test_strs = ['', 'abcba', 'aaabbbcccddd', 'e'] - for s, delim in product(test_strs, 'abcd'): - self.comp_with_str_split(s, delim) - - -class SplitBeforeTest(TestCase): - """Tests for ``split_before()``""" - - def test_starts_with_sep(self): - actual = list(mi.split_before('xooxoo', lambda c: c == 'x')) - expected = [['x', 'o', 'o'], ['x', 'o', 'o']] - self.assertEqual(actual, expected) - - def test_ends_with_sep(self): - actual = list(mi.split_before('ooxoox', lambda c: c == 'x')) - expected = [['o', 'o'], ['x', 'o', 'o'], ['x']] - self.assertEqual(actual, expected) - - def test_no_sep(self): - actual = list(mi.split_before('ooo', lambda c: c == 'x')) - expected = [['o', 'o', 'o']] - self.assertEqual(actual, expected) - - -class SplitAfterTest(TestCase): - """Tests for ``split_after()``""" - - def test_starts_with_sep(self): - actual = list(mi.split_after('xooxoo', lambda c: c == 'x')) - expected = [['x'], ['o', 'o', 'x'], ['o', 'o']] - self.assertEqual(actual, expected) - - def test_ends_with_sep(self): - actual = list(mi.split_after('ooxoox', lambda c: c == 'x')) - expected = [['o', 'o', 'x'], ['o', 'o', 'x']] - self.assertEqual(actual, expected) - - def test_no_sep(self): - actual = list(mi.split_after('ooo', lambda c: c == 'x')) - expected = [['o', 'o', 'o']] - self.assertEqual(actual, expected) - - -class SplitIntoTests(TestCase): - """Tests for ``split_into()``""" - - def test_iterable_just_right(self): - """Size of ``iterable`` equals the sum of ``sizes``.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [2, 3, 4] - expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_iterable_too_small(self): - """Size of ``iterable`` is smaller than sum of ``sizes``. Last return - list is shorter as a result.""" - iterable = [1, 2, 3, 4, 5, 6, 7] - sizes = [2, 3, 4] - expected = [[1, 2], [3, 4, 5], [6, 7]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_iterable_too_small_extra(self): - """Size of ``iterable`` is smaller than sum of ``sizes``. Second last - return list is shorter and last return list is empty as a result.""" - iterable = [1, 2, 3, 4, 5, 6, 7] - sizes = [2, 3, 4, 5] - expected = [[1, 2], [3, 4, 5], [6, 7], []] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_iterable_too_large(self): - """Size of ``iterable`` is larger than sum of ``sizes``. Not all - items of iterable are returned.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [2, 3, 2] - expected = [[1, 2], [3, 4, 5], [6, 7]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_using_none_with_leftover(self): - """Last item of ``sizes`` is None when items still remain in - ``iterable``. Last list returned stretches to fit all remaining items - of ``iterable``.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [2, 3, None] - expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_using_none_without_leftover(self): - """Last item of ``sizes`` is None when no items remain in - ``iterable``. Last list returned is empty.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [2, 3, 4, None] - expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9], []] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_using_none_mid_sizes(self): - """None is present in ``sizes`` but is not the last item. Last list - returned stretches to fit all remaining items of ``iterable`` but - all items in ``sizes`` after None are ignored.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [2, 3, None, 4] - expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_iterable_empty(self): - """``iterable`` argument is empty but ``sizes`` is not. An empty - list is returned for each item in ``sizes``.""" - iterable = [] - sizes = [2, 4, 2] - expected = [[], [], []] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_iterable_empty_using_none(self): - """``iterable`` argument is empty but ``sizes`` is not. An empty - list is returned for each item in ``sizes`` that is not after a - None item.""" - iterable = [] - sizes = [2, 4, None, 2] - expected = [[], [], []] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_sizes_empty(self): - """``sizes`` argument is empty but ``iterable`` is not. An empty - generator is returned.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [] - expected = [] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_both_empty(self): - """Both ``sizes`` and ``iterable`` arguments are empty. An empty - generator is returned.""" - iterable = [] - sizes = [] - expected = [] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_bool_in_sizes(self): - """A bool object is present in ``sizes`` is treated as a 1 or 0 for - ``True`` or ``False`` due to bool being an instance of int.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [3, True, 2, False] - expected = [[1, 2, 3], [4], [5, 6], []] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_invalid_in_sizes(self): - """A ValueError is raised if an object in ``sizes`` is neither ``None`` - or an integer.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [1, [], 3] - with self.assertRaises(ValueError): - list(mi.split_into(iterable, sizes)) - - def test_invalid_in_sizes_after_none(self): - """A item in ``sizes`` that is invalid will not raise a TypeError if it - comes after a ``None`` item.""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = [3, 4, None, []] - expected = [[1, 2, 3], [4, 5, 6, 7], [8, 9]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - def test_generator_iterable_integrity(self): - """Check that if ``iterable`` is an iterator, it is consumed only by as - many items as the sum of ``sizes``.""" - iterable = (i for i in range(10)) - sizes = [2, 3] - - expected = [[0, 1], [2, 3, 4]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - iterable_expected = [5, 6, 7, 8, 9] - iterable_actual = list(iterable) - self.assertEqual(iterable_actual, iterable_expected) - - def test_generator_sizes_integrity(self): - """Check that if ``sizes`` is an iterator, it is consumed only until a - ``None`` item is reached""" - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] - sizes = (i for i in [1, 2, None, 3, 4]) - - expected = [[1], [2, 3], [4, 5, 6, 7, 8, 9]] - actual = list(mi.split_into(iterable, sizes)) - self.assertEqual(actual, expected) - - sizes_expected = [3, 4] - sizes_actual = list(sizes) - self.assertEqual(sizes_actual, sizes_expected) - - -class PaddedTest(TestCase): - """Tests for ``padded()``""" - - def test_no_n(self): - seq = [1, 2, 3] - - # No fillvalue - self.assertEqual(mi.take(5, mi.padded(seq)), [1, 2, 3, None, None]) - - # With fillvalue - self.assertEqual( - mi.take(5, mi.padded(seq, fillvalue='')), [1, 2, 3, '', ''] - ) - - def test_invalid_n(self): - self.assertRaises(ValueError, lambda: list(mi.padded([1, 2, 3], n=-1))) - self.assertRaises(ValueError, lambda: list(mi.padded([1, 2, 3], n=0))) - - def test_valid_n(self): - seq = [1, 2, 3, 4, 5] - - # No need for padding: len(seq) <= n - self.assertEqual(list(mi.padded(seq, n=4)), [1, 2, 3, 4, 5]) - self.assertEqual(list(mi.padded(seq, n=5)), [1, 2, 3, 4, 5]) - - # No fillvalue - self.assertEqual( - list(mi.padded(seq, n=7)), [1, 2, 3, 4, 5, None, None] - ) - - # With fillvalue - self.assertEqual( - list(mi.padded(seq, fillvalue='', n=7)), [1, 2, 3, 4, 5, '', ''] - ) - - def test_next_multiple(self): - seq = [1, 2, 3, 4, 5, 6] - - # No need for padding: len(seq) % n == 0 - self.assertEqual( - list(mi.padded(seq, n=3, next_multiple=True)), [1, 2, 3, 4, 5, 6] - ) - - # Padding needed: len(seq) < n - self.assertEqual( - list(mi.padded(seq, n=8, next_multiple=True)), - [1, 2, 3, 4, 5, 6, None, None] - ) - - # No padding needed: len(seq) == n - self.assertEqual( - list(mi.padded(seq, n=6, next_multiple=True)), [1, 2, 3, 4, 5, 6] - ) - - # Padding needed: len(seq) > n - self.assertEqual( - list(mi.padded(seq, n=4, next_multiple=True)), - [1, 2, 3, 4, 5, 6, None, None] - ) - - # With fillvalue - self.assertEqual( - list(mi.padded(seq, fillvalue='', n=4, next_multiple=True)), - [1, 2, 3, 4, 5, 6, '', ''] - ) - - -class DistributeTest(TestCase): - """Tests for distribute()""" - - def test_invalid_n(self): - self.assertRaises(ValueError, lambda: mi.distribute(-1, [1, 2, 3])) - self.assertRaises(ValueError, lambda: mi.distribute(0, [1, 2, 3])) - - def test_basic(self): - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - - for n, expected in [ - (1, [iterable]), - (2, [[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]]), - (3, [[1, 4, 7, 10], [2, 5, 8], [3, 6, 9]]), - (10, [[n] for n in range(1, 10 + 1)]), - ]: - self.assertEqual( - [list(x) for x in mi.distribute(n, iterable)], expected - ) - - def test_large_n(self): - iterable = [1, 2, 3, 4] - self.assertEqual( - [list(x) for x in mi.distribute(6, iterable)], - [[1], [2], [3], [4], [], []] - ) - - -class StaggerTest(TestCase): - """Tests for ``stagger()``""" - - def test_default(self): - iterable = [0, 1, 2, 3] - actual = list(mi.stagger(iterable)) - expected = [(None, 0, 1), (0, 1, 2), (1, 2, 3)] - self.assertEqual(actual, expected) - - def test_offsets(self): - iterable = [0, 1, 2, 3] - for offsets, expected in [ - ((-2, 0, 2), [('', 0, 2), ('', 1, 3)]), - ((-2, -1), [('', ''), ('', 0), (0, 1), (1, 2), (2, 3)]), - ((1, 2), [(1, 2), (2, 3)]), - ]: - all_groups = mi.stagger(iterable, offsets=offsets, fillvalue='') - self.assertEqual(list(all_groups), expected) - - def test_longest(self): - iterable = [0, 1, 2, 3] - for offsets, expected in [ - ( - (-1, 0, 1), - [('', 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, ''), (3, '', '')] - ), - ((-2, -1), [('', ''), ('', 0), (0, 1), (1, 2), (2, 3), (3, '')]), - ((1, 2), [(1, 2), (2, 3), (3, '')]), - ]: - all_groups = mi.stagger( - iterable, offsets=offsets, fillvalue='', longest=True - ) - self.assertEqual(list(all_groups), expected) - - -class ZipOffsetTest(TestCase): - """Tests for ``zip_offset()``""" - - def test_shortest(self): - a_1 = [0, 1, 2, 3] - a_2 = [0, 1, 2, 3, 4, 5] - a_3 = [0, 1, 2, 3, 4, 5, 6, 7] - actual = list( - mi.zip_offset(a_1, a_2, a_3, offsets=(-1, 0, 1), fillvalue='') - ) - expected = [('', 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5)] - self.assertEqual(actual, expected) - - def test_longest(self): - a_1 = [0, 1, 2, 3] - a_2 = [0, 1, 2, 3, 4, 5] - a_3 = [0, 1, 2, 3, 4, 5, 6, 7] - actual = list( - mi.zip_offset(a_1, a_2, a_3, offsets=(-1, 0, 1), longest=True) - ) - expected = [ - (None, 0, 1), - (0, 1, 2), - (1, 2, 3), - (2, 3, 4), - (3, 4, 5), - (None, 5, 6), - (None, None, 7), - ] - self.assertEqual(actual, expected) - - def test_mismatch(self): - iterables = [0, 1, 2], [2, 3, 4] - offsets = (-1, 0, 1) - self.assertRaises( - ValueError, - lambda: list(mi.zip_offset(*iterables, offsets=offsets)) - ) - - -class UnzipTests(TestCase): - """Tests for unzip()""" - - def test_empty_iterable(self): - self.assertEqual(list(mi.unzip([])), []) - # in reality zip([], [], []) is equivalent to iter([]) - # but it doesn't hurt to test both - self.assertEqual(list(mi.unzip(zip([], [], []))), []) - - def test_length_one_iterable(self): - xs, ys, zs = mi.unzip(zip([1], [2], [3])) - self.assertEqual(list(xs), [1]) - self.assertEqual(list(ys), [2]) - self.assertEqual(list(zs), [3]) - - def test_normal_case(self): - xs, ys, zs = range(10), range(1, 11), range(2, 12) - zipped = zip(xs, ys, zs) - xs, ys, zs = mi.unzip(zipped) - self.assertEqual(list(xs), list(range(10))) - self.assertEqual(list(ys), list(range(1, 11))) - self.assertEqual(list(zs), list(range(2, 12))) - - def test_improperly_zipped(self): - zipped = iter([(1, 2, 3), (4, 5), (6,)]) - xs, ys, zs = mi.unzip(zipped) - self.assertEqual(list(xs), [1, 4, 6]) - self.assertEqual(list(ys), [2, 5]) - self.assertEqual(list(zs), [3]) - - def test_increasingly_zipped(self): - zipped = iter([(1, 2), (3, 4, 5), (6, 7, 8, 9)]) - unzipped = mi.unzip(zipped) - # from the docstring: - # len(first tuple) is the number of iterables zipped - self.assertEqual(len(unzipped), 2) - xs, ys = unzipped - self.assertEqual(list(xs), [1, 3, 6]) - self.assertEqual(list(ys), [2, 4, 7]) - - -class SortTogetherTest(TestCase): - """Tests for sort_together()""" - - def test_key_list(self): - """tests `key_list` including default, iterables include duplicates""" - iterables = [ - ['GA', 'GA', 'GA', 'CT', 'CT', 'CT'], - ['May', 'Aug.', 'May', 'June', 'July', 'July'], - [97, 20, 100, 70, 100, 20] - ] - - self.assertEqual( - mi.sort_together(iterables), - [ - ('CT', 'CT', 'CT', 'GA', 'GA', 'GA'), - ('June', 'July', 'July', 'May', 'Aug.', 'May'), - (70, 100, 20, 97, 20, 100) - ] - ) - - self.assertEqual( - mi.sort_together(iterables, key_list=(0, 1)), - [ - ('CT', 'CT', 'CT', 'GA', 'GA', 'GA'), - ('July', 'July', 'June', 'Aug.', 'May', 'May'), - (100, 20, 70, 20, 97, 100) - ] - ) - - self.assertEqual( - mi.sort_together(iterables, key_list=(0, 1, 2)), - [ - ('CT', 'CT', 'CT', 'GA', 'GA', 'GA'), - ('July', 'July', 'June', 'Aug.', 'May', 'May'), - (20, 100, 70, 20, 97, 100) - ] - ) - - self.assertEqual( - mi.sort_together(iterables, key_list=(2,)), - [ - ('GA', 'CT', 'CT', 'GA', 'GA', 'CT'), - ('Aug.', 'July', 'June', 'May', 'May', 'July'), - (20, 20, 70, 97, 100, 100) - ] - ) - - def test_invalid_key_list(self): - """tests `key_list` for indexes not available in `iterables`""" - iterables = [ - ['GA', 'GA', 'GA', 'CT', 'CT', 'CT'], - ['May', 'Aug.', 'May', 'June', 'July', 'July'], - [97, 20, 100, 70, 100, 20] - ] - - self.assertRaises( - IndexError, lambda: mi.sort_together(iterables, key_list=(5,)) - ) - - def test_reverse(self): - """tests `reverse` to ensure a reverse sort for `key_list` iterables""" - iterables = [ - ['GA', 'GA', 'GA', 'CT', 'CT', 'CT'], - ['May', 'Aug.', 'May', 'June', 'July', 'July'], - [97, 20, 100, 70, 100, 20] - ] - - self.assertEqual( - mi.sort_together(iterables, key_list=(0, 1, 2), reverse=True), - [('GA', 'GA', 'GA', 'CT', 'CT', 'CT'), - ('May', 'May', 'Aug.', 'June', 'July', 'July'), - (100, 97, 20, 70, 100, 20)] - ) - - def test_uneven_iterables(self): - """tests trimming of iterables to the shortest length before sorting""" - iterables = [['GA', 'GA', 'GA', 'CT', 'CT', 'CT', 'MA'], - ['May', 'Aug.', 'May', 'June', 'July', 'July'], - [97, 20, 100, 70, 100, 20, 0]] - - self.assertEqual( - mi.sort_together(iterables), - [ - ('CT', 'CT', 'CT', 'GA', 'GA', 'GA'), - ('June', 'July', 'July', 'May', 'Aug.', 'May'), - (70, 100, 20, 97, 20, 100) - ] - ) - - -class DivideTest(TestCase): - """Tests for divide()""" - - def test_invalid_n(self): - self.assertRaises(ValueError, lambda: mi.divide(-1, [1, 2, 3])) - self.assertRaises(ValueError, lambda: mi.divide(0, [1, 2, 3])) - - def test_basic(self): - iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - - for n, expected in [ - (1, [iterable]), - (2, [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]), - (3, [[1, 2, 3, 4], [5, 6, 7], [8, 9, 10]]), - (10, [[n] for n in range(1, 10 + 1)]), - ]: - self.assertEqual( - [list(x) for x in mi.divide(n, iterable)], expected - ) - - def test_large_n(self): - iterable = [1, 2, 3, 4] - self.assertEqual( - [list(x) for x in mi.divide(6, iterable)], - [[1], [2], [3], [4], [], []] - ) - - -class TestAlwaysIterable(TestCase): - """Tests for always_iterable()""" - def test_single(self): - self.assertEqual(list(mi.always_iterable(1)), [1]) - - def test_strings(self): - for obj in ['foo', b'bar', 'baz']: - actual = list(mi.always_iterable(obj)) - expected = [obj] - self.assertEqual(actual, expected) - - def test_base_type(self): - dict_obj = {'a': 1, 'b': 2} - str_obj = '123' - - # Default: dicts are iterable like they normally are - default_actual = list(mi.always_iterable(dict_obj)) - default_expected = list(dict_obj) - self.assertEqual(default_actual, default_expected) - - # Unitary types set: dicts are not iterable - custom_actual = list(mi.always_iterable(dict_obj, base_type=dict)) - custom_expected = [dict_obj] - self.assertEqual(custom_actual, custom_expected) - - # With unitary types set, strings are iterable - str_actual = list(mi.always_iterable(str_obj, base_type=None)) - str_expected = list(str_obj) - self.assertEqual(str_actual, str_expected) - - def test_iterables(self): - self.assertEqual(list(mi.always_iterable([0, 1])), [0, 1]) - self.assertEqual( - list(mi.always_iterable([0, 1], base_type=list)), [[0, 1]] - ) - self.assertEqual( - list(mi.always_iterable(iter('foo'))), ['f', 'o', 'o'] - ) - self.assertEqual(list(mi.always_iterable([])), []) - - def test_none(self): - self.assertEqual(list(mi.always_iterable(None)), []) - - def test_generator(self): - def _gen(): - yield 0 - yield 1 - - self.assertEqual(list(mi.always_iterable(_gen())), [0, 1]) - - -class AdjacentTests(TestCase): - def test_typical(self): - actual = list(mi.adjacent(lambda x: x % 5 == 0, range(10))) - expected = [(True, 0), (True, 1), (False, 2), (False, 3), (True, 4), - (True, 5), (True, 6), (False, 7), (False, 8), (False, 9)] - self.assertEqual(actual, expected) - - def test_empty_iterable(self): - actual = list(mi.adjacent(lambda x: x % 5 == 0, [])) - expected = [] - self.assertEqual(actual, expected) - - def test_length_one(self): - actual = list(mi.adjacent(lambda x: x % 5 == 0, [0])) - expected = [(True, 0)] - self.assertEqual(actual, expected) - - actual = list(mi.adjacent(lambda x: x % 5 == 0, [1])) - expected = [(False, 1)] - self.assertEqual(actual, expected) - - def test_consecutive_true(self): - """Test that when the predicate matches multiple consecutive elements - it doesn't repeat elements in the output""" - actual = list(mi.adjacent(lambda x: x % 5 < 2, range(10))) - expected = [(True, 0), (True, 1), (True, 2), (False, 3), (True, 4), - (True, 5), (True, 6), (True, 7), (False, 8), (False, 9)] - self.assertEqual(actual, expected) - - def test_distance(self): - actual = list(mi.adjacent(lambda x: x % 5 == 0, range(10), distance=2)) - expected = [(True, 0), (True, 1), (True, 2), (True, 3), (True, 4), - (True, 5), (True, 6), (True, 7), (False, 8), (False, 9)] - self.assertEqual(actual, expected) - - actual = list(mi.adjacent(lambda x: x % 5 == 0, range(10), distance=3)) - expected = [(True, 0), (True, 1), (True, 2), (True, 3), (True, 4), - (True, 5), (True, 6), (True, 7), (True, 8), (False, 9)] - self.assertEqual(actual, expected) - - def test_large_distance(self): - """Test distance larger than the length of the iterable""" - iterable = range(10) - actual = list(mi.adjacent(lambda x: x % 5 == 4, iterable, distance=20)) - expected = list(zip(repeat(True), iterable)) - self.assertEqual(actual, expected) - - actual = list(mi.adjacent(lambda x: False, iterable, distance=20)) - expected = list(zip(repeat(False), iterable)) - self.assertEqual(actual, expected) - - def test_zero_distance(self): - """Test that adjacent() reduces to zip+map when distance is 0""" - iterable = range(1000) - predicate = lambda x: x % 4 == 2 - actual = mi.adjacent(predicate, iterable, 0) - expected = zip(map(predicate, iterable), iterable) - self.assertTrue(all(a == e for a, e in zip(actual, expected))) - - def test_negative_distance(self): - """Test that adjacent() raises an error with negative distance""" - pred = lambda x: x - self.assertRaises( - ValueError, lambda: mi.adjacent(pred, range(1000), -1) - ) - self.assertRaises( - ValueError, lambda: mi.adjacent(pred, range(10), -10) - ) - - def test_grouping(self): - """Test interaction of adjacent() with groupby_transform()""" - iterable = mi.adjacent(lambda x: x % 5 == 0, range(10)) - grouper = mi.groupby_transform(iterable, itemgetter(0), itemgetter(1)) - actual = [(k, list(g)) for k, g in grouper] - expected = [ - (True, [0, 1]), - (False, [2, 3]), - (True, [4, 5, 6]), - (False, [7, 8, 9]), - ] - self.assertEqual(actual, expected) - - def test_call_once(self): - """Test that the predicate is only called once per item.""" - already_seen = set() - iterable = range(10) - - def predicate(item): - self.assertNotIn(item, already_seen) - already_seen.add(item) - return True - - actual = list(mi.adjacent(predicate, iterable)) - expected = [(True, x) for x in iterable] - self.assertEqual(actual, expected) - - -class GroupByTransformTests(TestCase): - def assertAllGroupsEqual(self, groupby1, groupby2): - """Compare two groupby objects for equality, both keys and groups.""" - for a, b in zip(groupby1, groupby2): - key1, group1 = a - key2, group2 = b - self.assertEqual(key1, key2) - self.assertListEqual(list(group1), list(group2)) - self.assertRaises(StopIteration, lambda: next(groupby1)) - self.assertRaises(StopIteration, lambda: next(groupby2)) - - def test_default_funcs(self): - """Test that groupby_transform() with default args mimics groupby()""" - iterable = [(x // 5, x) for x in range(1000)] - actual = mi.groupby_transform(iterable) - expected = groupby(iterable) - self.assertAllGroupsEqual(actual, expected) - - def test_valuefunc(self): - iterable = [(int(x / 5), int(x / 3), x) for x in range(10)] - - # Test the standard usage of grouping one iterable using another's keys - grouper = mi.groupby_transform( - iterable, keyfunc=itemgetter(0), valuefunc=itemgetter(-1) - ) - actual = [(k, list(g)) for k, g in grouper] - expected = [(0, [0, 1, 2, 3, 4]), (1, [5, 6, 7, 8, 9])] - self.assertEqual(actual, expected) - - grouper = mi.groupby_transform( - iterable, keyfunc=itemgetter(1), valuefunc=itemgetter(-1) - ) - actual = [(k, list(g)) for k, g in grouper] - expected = [(0, [0, 1, 2]), (1, [3, 4, 5]), (2, [6, 7, 8]), (3, [9])] - self.assertEqual(actual, expected) - - # and now for something a little different - d = dict(zip(range(10), 'abcdefghij')) - grouper = mi.groupby_transform( - range(10), keyfunc=lambda x: x // 5, valuefunc=d.get - ) - actual = [(k, ''.join(g)) for k, g in grouper] - expected = [(0, 'abcde'), (1, 'fghij')] - self.assertEqual(actual, expected) - - def test_no_valuefunc(self): - iterable = range(1000) - - def key(x): - return x // 5 - - actual = mi.groupby_transform(iterable, key, valuefunc=None) - expected = groupby(iterable, key) - self.assertAllGroupsEqual(actual, expected) - - actual = mi.groupby_transform(iterable, key) # default valuefunc - expected = groupby(iterable, key) - self.assertAllGroupsEqual(actual, expected) - - -class NumericRangeTests(TestCase): - def test_basic(self): - for args, expected in [ - ((4,), [0, 1, 2, 3]), - ((4.0,), [0.0, 1.0, 2.0, 3.0]), - ((1.0, 4), [1.0, 2.0, 3.0]), - ((1, 4.0), [1, 2, 3]), - ((1.0, 5), [1.0, 2.0, 3.0, 4.0]), - ((0, 20, 5), [0, 5, 10, 15]), - ((0, 20, 5.0), [0.0, 5.0, 10.0, 15.0]), - ((0, 10, 3), [0, 3, 6, 9]), - ((0, 10, 3.0), [0.0, 3.0, 6.0, 9.0]), - ((0, -5, -1), [0, -1, -2, -3, -4]), - ((0.0, -5, -1), [0.0, -1.0, -2.0, -3.0, -4.0]), - ((1, 2, Fraction(1, 2)), [Fraction(1, 1), Fraction(3, 2)]), - ((0,), []), - ((0.0,), []), - ((1, 0), []), - ((1.0, 0.0), []), - ((Fraction(2, 1),), [Fraction(0, 1), Fraction(1, 1)]), - ((Decimal('2.0'),), [Decimal('0.0'), Decimal('1.0')]), - ]: - actual = list(mi.numeric_range(*args)) - self.assertEqual(actual, expected) - self.assertTrue( - all(type(a) == type(e) for a, e in zip(actual, expected)) - ) - - def test_arg_count(self): - self.assertRaises(TypeError, lambda: list(mi.numeric_range())) - self.assertRaises( - TypeError, lambda: list(mi.numeric_range(0, 1, 2, 3)) - ) - - def test_zero_step(self): - self.assertRaises( - ValueError, lambda: list(mi.numeric_range(1, 2, 0)) - ) - - -class CountCycleTests(TestCase): - def test_basic(self): - expected = [ - (0, 'a'), (0, 'b'), (0, 'c'), - (1, 'a'), (1, 'b'), (1, 'c'), - (2, 'a'), (2, 'b'), (2, 'c'), - ] - for actual in [ - mi.take(9, mi.count_cycle('abc')), # n=None - list(mi.count_cycle('abc', 3)), # n=3 - ]: - self.assertEqual(actual, expected) - - def test_empty(self): - self.assertEqual(list(mi.count_cycle('')), []) - self.assertEqual(list(mi.count_cycle('', 2)), []) - - def test_negative(self): - self.assertEqual(list(mi.count_cycle('abc', -3)), []) - - -class LocateTests(TestCase): - def test_default_pred(self): - iterable = [0, 1, 1, 0, 1, 0, 0] - actual = list(mi.locate(iterable)) - expected = [1, 2, 4] - self.assertEqual(actual, expected) - - def test_no_matches(self): - iterable = [0, 0, 0] - actual = list(mi.locate(iterable)) - expected = [] - self.assertEqual(actual, expected) - - def test_custom_pred(self): - iterable = ['0', 1, 1, '0', 1, '0', '0'] - pred = lambda x: x == '0' - actual = list(mi.locate(iterable, pred)) - expected = [0, 3, 5, 6] - self.assertEqual(actual, expected) - - def test_window_size(self): - iterable = ['0', 1, 1, '0', 1, '0', '0'] - pred = lambda *args: args == ('0', 1) - actual = list(mi.locate(iterable, pred, window_size=2)) - expected = [0, 3] - self.assertEqual(actual, expected) - - def test_window_size_large(self): - iterable = [1, 2, 3, 4] - pred = lambda a, b, c, d, e: True - actual = list(mi.locate(iterable, pred, window_size=5)) - expected = [0] - self.assertEqual(actual, expected) - - def test_window_size_zero(self): - iterable = [1, 2, 3, 4] - pred = lambda: True - with self.assertRaises(ValueError): - list(mi.locate(iterable, pred, window_size=0)) - - -class StripFunctionTests(TestCase): - def test_hashable(self): - iterable = list('www.example.com') - pred = lambda x: x in set('cmowz.') - - self.assertEqual(list(mi.lstrip(iterable, pred)), list('example.com')) - self.assertEqual(list(mi.rstrip(iterable, pred)), list('www.example')) - self.assertEqual(list(mi.strip(iterable, pred)), list('example')) - - def test_not_hashable(self): - iterable = [ - list('http://'), list('www'), list('.example'), list('.com') - ] - pred = lambda x: x in [list('http://'), list('www'), list('.com')] - - self.assertEqual(list(mi.lstrip(iterable, pred)), iterable[2:]) - self.assertEqual(list(mi.rstrip(iterable, pred)), iterable[:3]) - self.assertEqual(list(mi.strip(iterable, pred)), iterable[2: 3]) - - def test_math(self): - iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] - pred = lambda x: x <= 2 - - self.assertEqual(list(mi.lstrip(iterable, pred)), iterable[3:]) - self.assertEqual(list(mi.rstrip(iterable, pred)), iterable[:-3]) - self.assertEqual(list(mi.strip(iterable, pred)), iterable[3:-3]) - - -class IsliceExtendedTests(TestCase): - def test_all(self): - iterable = ['0', '1', '2', '3', '4', '5'] - indexes = list(range(-4, len(iterable) + 4)) + [None] - steps = [1, 2, 3, 4, -1, -2, -3, 4] - for slice_args in product(indexes, indexes, steps): - try: - actual = list(mi.islice_extended(iterable, *slice_args)) - except Exception as e: - self.fail((slice_args, e)) - - expected = iterable[slice(*slice_args)] - self.assertEqual(actual, expected, slice_args) - - def test_zero_step(self): - with self.assertRaises(ValueError): - list(mi.islice_extended([1, 2, 3], 0, 1, 0)) - - -class ConsecutiveGroupsTest(TestCase): - def test_numbers(self): - iterable = [-10, -8, -7, -6, 1, 2, 4, 5, -1, 7] - actual = [list(g) for g in mi.consecutive_groups(iterable)] - expected = [[-10], [-8, -7, -6], [1, 2], [4, 5], [-1], [7]] - self.assertEqual(actual, expected) - - def test_custom_ordering(self): - iterable = ['1', '10', '11', '20', '21', '22', '30', '31'] - ordering = lambda x: int(x) - actual = [list(g) for g in mi.consecutive_groups(iterable, ordering)] - expected = [['1'], ['10', '11'], ['20', '21', '22'], ['30', '31']] - self.assertEqual(actual, expected) - - def test_exotic_ordering(self): - iterable = [ - ('a', 'b', 'c', 'd'), - ('a', 'c', 'b', 'd'), - ('a', 'c', 'd', 'b'), - ('a', 'd', 'b', 'c'), - ('d', 'b', 'c', 'a'), - ('d', 'c', 'a', 'b'), - ] - ordering = list(permutations('abcd')).index - actual = [list(g) for g in mi.consecutive_groups(iterable, ordering)] - expected = [ - [('a', 'b', 'c', 'd')], - [('a', 'c', 'b', 'd'), ('a', 'c', 'd', 'b'), ('a', 'd', 'b', 'c')], - [('d', 'b', 'c', 'a'), ('d', 'c', 'a', 'b')], - ] - self.assertEqual(actual, expected) - - -class DifferenceTest(TestCase): - def test_normal(self): - iterable = [10, 20, 30, 40, 50] - actual = list(mi.difference(iterable)) - expected = [10, 10, 10, 10, 10] - self.assertEqual(actual, expected) - - def test_custom(self): - iterable = [10, 20, 30, 40, 50] - actual = list(mi.difference(iterable, add)) - expected = [10, 30, 50, 70, 90] - self.assertEqual(actual, expected) - - def test_roundtrip(self): - original = list(range(100)) - accumulated = mi.accumulate(original) - actual = list(mi.difference(accumulated)) - self.assertEqual(actual, original) - - def test_one(self): - self.assertEqual(list(mi.difference([0])), [0]) - - def test_empty(self): - self.assertEqual(list(mi.difference([])), []) - - -class SeekableTest(TestCase): - def test_exhaustion_reset(self): - iterable = [str(n) for n in range(10)] - - s = mi.seekable(iterable) - self.assertEqual(list(s), iterable) # Normal iteration - self.assertEqual(list(s), []) # Iterable is exhausted - - s.seek(0) - self.assertEqual(list(s), iterable) # Back in action - - def test_partial_reset(self): - iterable = [str(n) for n in range(10)] - - s = mi.seekable(iterable) - self.assertEqual(mi.take(5, s), iterable[:5]) # Normal iteration - - s.seek(1) - self.assertEqual(list(s), iterable[1:]) # Get the rest of the iterable - - def test_forward(self): - iterable = [str(n) for n in range(10)] - - s = mi.seekable(iterable) - self.assertEqual(mi.take(1, s), iterable[:1]) # Normal iteration - - s.seek(3) # Skip over index 2 - self.assertEqual(list(s), iterable[3:]) # Result is similar to slicing - - s.seek(0) # Back to 0 - self.assertEqual(list(s), iterable) # No difference in result - - def test_past_end(self): - iterable = [str(n) for n in range(10)] - - s = mi.seekable(iterable) - self.assertEqual(mi.take(1, s), iterable[:1]) # Normal iteration - - s.seek(20) - self.assertEqual(list(s), []) # Iterable is exhausted - - s.seek(0) # Back to 0 - self.assertEqual(list(s), iterable) # No difference in result - - def test_elements(self): - iterable = map(str, count()) - - s = mi.seekable(iterable) - mi.take(10, s) - - elements = s.elements() - self.assertEqual( - [elements[i] for i in range(10)], [str(n) for n in range(10)] - ) - self.assertEqual(len(elements), 10) - - mi.take(10, s) - self.assertEqual(list(elements), [str(n) for n in range(20)]) - - -class SequenceViewTests(TestCase): - def test_init(self): - view = mi.SequenceView((1, 2, 3)) - self.assertEqual(repr(view), "SequenceView((1, 2, 3))") - self.assertRaises(TypeError, lambda: mi.SequenceView({})) - - def test_update(self): - seq = [1, 2, 3] - view = mi.SequenceView(seq) - self.assertEqual(len(view), 3) - self.assertEqual(repr(view), "SequenceView([1, 2, 3])") - - seq.pop() - self.assertEqual(len(view), 2) - self.assertEqual(repr(view), "SequenceView([1, 2])") - - def test_indexing(self): - seq = ('a', 'b', 'c', 'd', 'e', 'f') - view = mi.SequenceView(seq) - for i in range(-len(seq), len(seq)): - self.assertEqual(view[i], seq[i]) - - def test_slicing(self): - seq = ('a', 'b', 'c', 'd', 'e', 'f') - view = mi.SequenceView(seq) - n = len(seq) - indexes = list(range(-n - 1, n + 1)) + [None] - steps = list(range(-n, n + 1)) - steps.remove(0) - for slice_args in product(indexes, indexes, steps): - i = slice(*slice_args) - self.assertEqual(view[i], seq[i]) - - def test_abc_methods(self): - # collections.Sequence should provide all of this functionality - seq = ('a', 'b', 'c', 'd', 'e', 'f', 'f') - view = mi.SequenceView(seq) - - # __contains__ - self.assertIn('b', view) - self.assertNotIn('g', view) - - # __iter__ - self.assertEqual(list(iter(view)), list(seq)) - - # __reversed__ - self.assertEqual(list(reversed(view)), list(reversed(seq))) - - # index - self.assertEqual(view.index('b'), 1) - - # count - self.assertEqual(seq.count('f'), 2) - - -class RunLengthTest(TestCase): - def test_encode(self): - iterable = (int(str(n)[0]) for n in count(800)) - actual = mi.take(4, mi.run_length.encode(iterable)) - expected = [(8, 100), (9, 100), (1, 1000), (2, 1000)] - self.assertEqual(actual, expected) - - def test_decode(self): - iterable = [('d', 4), ('c', 3), ('b', 2), ('a', 1)] - actual = ''.join(mi.run_length.decode(iterable)) - expected = 'ddddcccbba' - self.assertEqual(actual, expected) - - -class ExactlyNTests(TestCase): - """Tests for ``exactly_n()``""" - - def test_true(self): - """Iterable has ``n`` ``True`` elements""" - self.assertTrue(mi.exactly_n([True, False, True], 2)) - self.assertTrue(mi.exactly_n([1, 1, 1, 0], 3)) - self.assertTrue(mi.exactly_n([False, False], 0)) - self.assertTrue(mi.exactly_n(range(100), 10, lambda x: x < 10)) - - def test_false(self): - """Iterable does not have ``n`` ``True`` elements""" - self.assertFalse(mi.exactly_n([True, False, False], 2)) - self.assertFalse(mi.exactly_n([True, True, False], 1)) - self.assertFalse(mi.exactly_n([False], 1)) - self.assertFalse(mi.exactly_n([True], -1)) - self.assertFalse(mi.exactly_n(repeat(True), 100)) - - def test_empty(self): - """Return ``True`` if the iterable is empty and ``n`` is 0""" - self.assertTrue(mi.exactly_n([], 0)) - self.assertFalse(mi.exactly_n([], 1)) - - -class AlwaysReversibleTests(TestCase): - """Tests for ``always_reversible()``""" - - def test_regular_reversed(self): - self.assertEqual(list(reversed(range(10))), - list(mi.always_reversible(range(10)))) - self.assertEqual(list(reversed([1, 2, 3])), - list(mi.always_reversible([1, 2, 3]))) - self.assertEqual(reversed([1, 2, 3]).__class__, - mi.always_reversible([1, 2, 3]).__class__) - - def test_nonseq_reversed(self): - # Create a non-reversible generator from a sequence - with self.assertRaises(TypeError): - reversed(x for x in range(10)) - - self.assertEqual(list(reversed(range(10))), - list(mi.always_reversible(x for x in range(10)))) - self.assertEqual(list(reversed([1, 2, 3])), - list(mi.always_reversible(x for x in [1, 2, 3]))) - self.assertNotEqual(reversed((1, 2)).__class__, - mi.always_reversible(x for x in (1, 2)).__class__) - - -class CircularShiftsTests(TestCase): - def test_empty(self): - # empty iterable -> empty list - self.assertEqual(list(mi.circular_shifts([])), []) - - def test_simple_circular_shifts(self): - # test the a simple iterator case - self.assertEqual( - mi.circular_shifts(range(4)), - [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] - ) - - def test_duplicates(self): - # test non-distinct entries - self.assertEqual( - mi.circular_shifts([0, 1, 0, 1]), - [(0, 1, 0, 1), (1, 0, 1, 0), (0, 1, 0, 1), (1, 0, 1, 0)] - ) - - -class MakeDecoratorTests(TestCase): - def test_basic(self): - slicer = mi.make_decorator(islice) - - @slicer(1, 10, 2) - def user_function(arg_1, arg_2, kwarg_1=None): - self.assertEqual(arg_1, 'arg_1') - self.assertEqual(arg_2, 'arg_2') - self.assertEqual(kwarg_1, 'kwarg_1') - return map(str, count()) - - it = user_function('arg_1', 'arg_2', kwarg_1='kwarg_1') - actual = list(it) - expected = ['1', '3', '5', '7', '9'] - self.assertEqual(actual, expected) - - def test_result_index(self): - def stringify(*args, **kwargs): - self.assertEqual(args[0], 'arg_0') - iterable = args[1] - self.assertEqual(args[2], 'arg_2') - self.assertEqual(kwargs['kwarg_1'], 'kwarg_1') - return map(str, iterable) - - stringifier = mi.make_decorator(stringify, result_index=1) - - @stringifier('arg_0', 'arg_2', kwarg_1='kwarg_1') - def user_function(n): - return count(n) - - it = user_function(1) - actual = mi.take(5, it) - expected = ['1', '2', '3', '4', '5'] - self.assertEqual(actual, expected) - - def test_wrap_class(self): - seeker = mi.make_decorator(mi.seekable) - - @seeker() - def user_function(n): - return map(str, range(n)) - - it = user_function(5) - self.assertEqual(list(it), ['0', '1', '2', '3', '4']) - - it.seek(0) - self.assertEqual(list(it), ['0', '1', '2', '3', '4']) - - -class MapReduceTests(TestCase): - def test_default(self): - iterable = (str(x) for x in range(5)) - keyfunc = lambda x: int(x) // 2 - actual = sorted(mi.map_reduce(iterable, keyfunc).items()) - expected = [(0, ['0', '1']), (1, ['2', '3']), (2, ['4'])] - self.assertEqual(actual, expected) - - def test_valuefunc(self): - iterable = (str(x) for x in range(5)) - keyfunc = lambda x: int(x) // 2 - valuefunc = int - actual = sorted(mi.map_reduce(iterable, keyfunc, valuefunc).items()) - expected = [(0, [0, 1]), (1, [2, 3]), (2, [4])] - self.assertEqual(actual, expected) - - def test_reducefunc(self): - iterable = (str(x) for x in range(5)) - keyfunc = lambda x: int(x) // 2 - valuefunc = int - reducefunc = lambda value_list: reduce(mul, value_list, 1) - actual = sorted( - mi.map_reduce(iterable, keyfunc, valuefunc, reducefunc).items() - ) - expected = [(0, 0), (1, 6), (2, 4)] - self.assertEqual(actual, expected) - - def test_ret(self): - d = mi.map_reduce([1, 0, 2, 0, 1, 0], bool) - self.assertEqual(d, {False: [0, 0, 0], True: [1, 2, 1]}) - self.assertRaises(KeyError, lambda: d[None].append(1)) - - -class RlocateTests(TestCase): - def test_default_pred(self): - iterable = [0, 1, 1, 0, 1, 0, 0] - for it in (iterable[:], iter(iterable)): - actual = list(mi.rlocate(it)) - expected = [4, 2, 1] - self.assertEqual(actual, expected) - - def test_no_matches(self): - iterable = [0, 0, 0] - for it in (iterable[:], iter(iterable)): - actual = list(mi.rlocate(it)) - expected = [] - self.assertEqual(actual, expected) - - def test_custom_pred(self): - iterable = ['0', 1, 1, '0', 1, '0', '0'] - pred = lambda x: x == '0' - for it in (iterable[:], iter(iterable)): - actual = list(mi.rlocate(it, pred)) - expected = [6, 5, 3, 0] - self.assertEqual(actual, expected) - - def test_efficient_reversal(self): - iterable = range(9 ** 9) # Is efficiently reversible - target = 9 ** 9 - 2 - pred = lambda x: x == target # Find-able from the right - actual = next(mi.rlocate(iterable, pred)) - self.assertEqual(actual, target) - - def test_window_size(self): - iterable = ['0', 1, 1, '0', 1, '0', '0'] - pred = lambda *args: args == ('0', 1) - for it in (iterable, iter(iterable)): - actual = list(mi.rlocate(it, pred, window_size=2)) - expected = [3, 0] - self.assertEqual(actual, expected) - - def test_window_size_large(self): - iterable = [1, 2, 3, 4] - pred = lambda a, b, c, d, e: True - for it in (iterable, iter(iterable)): - actual = list(mi.rlocate(iterable, pred, window_size=5)) - expected = [0] - self.assertEqual(actual, expected) - - def test_window_size_zero(self): - iterable = [1, 2, 3, 4] - pred = lambda: True - for it in (iterable, iter(iterable)): - with self.assertRaises(ValueError): - list(mi.locate(iterable, pred, window_size=0)) - - -class ReplaceTests(TestCase): - def test_basic(self): - iterable = range(10) - pred = lambda x: x % 2 == 0 - substitutes = [] - actual = list(mi.replace(iterable, pred, substitutes)) - expected = [1, 3, 5, 7, 9] - self.assertEqual(actual, expected) - - def test_count(self): - iterable = range(10) - pred = lambda x: x % 2 == 0 - substitutes = [] - actual = list(mi.replace(iterable, pred, substitutes, count=4)) - expected = [1, 3, 5, 7, 8, 9] - self.assertEqual(actual, expected) - - def test_window_size(self): - iterable = range(10) - pred = lambda *args: args == (0, 1, 2) - substitutes = [] - actual = list(mi.replace(iterable, pred, substitutes, window_size=3)) - expected = [3, 4, 5, 6, 7, 8, 9] - self.assertEqual(actual, expected) - - def test_window_size_end(self): - iterable = range(10) - pred = lambda *args: args == (7, 8, 9) - substitutes = [] - actual = list(mi.replace(iterable, pred, substitutes, window_size=3)) - expected = [0, 1, 2, 3, 4, 5, 6] - self.assertEqual(actual, expected) - - def test_window_size_count(self): - iterable = range(10) - pred = lambda *args: (args == (0, 1, 2)) or (args == (7, 8, 9)) - substitutes = [] - actual = list( - mi.replace(iterable, pred, substitutes, count=1, window_size=3) - ) - expected = [3, 4, 5, 6, 7, 8, 9] - self.assertEqual(actual, expected) - - def test_window_size_large(self): - iterable = range(4) - pred = lambda a, b, c, d, e: True - substitutes = [5, 6, 7] - actual = list(mi.replace(iterable, pred, substitutes, window_size=5)) - expected = [5, 6, 7] - self.assertEqual(actual, expected) - - def test_window_size_zero(self): - iterable = range(10) - pred = lambda *args: True - substitutes = [] - with self.assertRaises(ValueError): - list(mi.replace(iterable, pred, substitutes, window_size=0)) - - def test_iterable_substitutes(self): - iterable = range(5) - pred = lambda x: x % 2 == 0 - substitutes = iter('__') - actual = list(mi.replace(iterable, pred, substitutes)) - expected = ['_', '_', 1, '_', '_', 3, '_', '_'] - self.assertEqual(actual, expected) diff --git a/libs/common/more_itertools/tests/test_recipes.py b/libs/common/more_itertools/tests/test_recipes.py deleted file mode 100644 index b3cfb62f..00000000 --- a/libs/common/more_itertools/tests/test_recipes.py +++ /dev/null @@ -1,616 +0,0 @@ -from doctest import DocTestSuite -from unittest import TestCase - -from itertools import combinations -from six.moves import range - -import more_itertools as mi - - -def load_tests(loader, tests, ignore): - # Add the doctests - tests.addTests(DocTestSuite('more_itertools.recipes')) - return tests - - -class AccumulateTests(TestCase): - """Tests for ``accumulate()``""" - - def test_empty(self): - """Test that an empty input returns an empty output""" - self.assertEqual(list(mi.accumulate([])), []) - - def test_default(self): - """Test accumulate with the default function (addition)""" - self.assertEqual(list(mi.accumulate([1, 2, 3])), [1, 3, 6]) - - def test_bogus_function(self): - """Test accumulate with an invalid function""" - with self.assertRaises(TypeError): - list(mi.accumulate([1, 2, 3], func=lambda x: x)) - - def test_custom_function(self): - """Test accumulate with a custom function""" - self.assertEqual( - list(mi.accumulate((1, 2, 3, 2, 1), func=max)), [1, 2, 3, 3, 3] - ) - - -class TakeTests(TestCase): - """Tests for ``take()``""" - - def test_simple_take(self): - """Test basic usage""" - t = mi.take(5, range(10)) - self.assertEqual(t, [0, 1, 2, 3, 4]) - - def test_null_take(self): - """Check the null case""" - t = mi.take(0, range(10)) - self.assertEqual(t, []) - - def test_negative_take(self): - """Make sure taking negative items results in a ValueError""" - self.assertRaises(ValueError, lambda: mi.take(-3, range(10))) - - def test_take_too_much(self): - """Taking more than an iterator has remaining should return what the - iterator has remaining. - - """ - t = mi.take(10, range(5)) - self.assertEqual(t, [0, 1, 2, 3, 4]) - - -class TabulateTests(TestCase): - """Tests for ``tabulate()``""" - - def test_simple_tabulate(self): - """Test the happy path""" - t = mi.tabulate(lambda x: x) - f = tuple([next(t) for _ in range(3)]) - self.assertEqual(f, (0, 1, 2)) - - def test_count(self): - """Ensure tabulate accepts specific count""" - t = mi.tabulate(lambda x: 2 * x, -1) - f = (next(t), next(t), next(t)) - self.assertEqual(f, (-2, 0, 2)) - - -class TailTests(TestCase): - """Tests for ``tail()``""" - - def test_greater(self): - """Length of iterable is greater than requested tail""" - self.assertEqual(list(mi.tail(3, 'ABCDEFG')), ['E', 'F', 'G']) - - def test_equal(self): - """Length of iterable is equal to the requested tail""" - self.assertEqual( - list(mi.tail(7, 'ABCDEFG')), ['A', 'B', 'C', 'D', 'E', 'F', 'G'] - ) - - def test_less(self): - """Length of iterable is less than requested tail""" - self.assertEqual( - list(mi.tail(8, 'ABCDEFG')), ['A', 'B', 'C', 'D', 'E', 'F', 'G'] - ) - - -class ConsumeTests(TestCase): - """Tests for ``consume()``""" - - def test_sanity(self): - """Test basic functionality""" - r = (x for x in range(10)) - mi.consume(r, 3) - self.assertEqual(3, next(r)) - - def test_null_consume(self): - """Check the null case""" - r = (x for x in range(10)) - mi.consume(r, 0) - self.assertEqual(0, next(r)) - - def test_negative_consume(self): - """Check that negative consumsion throws an error""" - r = (x for x in range(10)) - self.assertRaises(ValueError, lambda: mi.consume(r, -1)) - - def test_total_consume(self): - """Check that iterator is totally consumed by default""" - r = (x for x in range(10)) - mi.consume(r) - self.assertRaises(StopIteration, lambda: next(r)) - - -class NthTests(TestCase): - """Tests for ``nth()``""" - - def test_basic(self): - """Make sure the nth item is returned""" - l = range(10) - for i, v in enumerate(l): - self.assertEqual(mi.nth(l, i), v) - - def test_default(self): - """Ensure a default value is returned when nth item not found""" - l = range(3) - self.assertEqual(mi.nth(l, 100, "zebra"), "zebra") - - def test_negative_item_raises(self): - """Ensure asking for a negative item raises an exception""" - self.assertRaises(ValueError, lambda: mi.nth(range(10), -3)) - - -class AllEqualTests(TestCase): - """Tests for ``all_equal()``""" - - def test_true(self): - """Everything is equal""" - self.assertTrue(mi.all_equal('aaaaaa')) - self.assertTrue(mi.all_equal([0, 0, 0, 0])) - - def test_false(self): - """Not everything is equal""" - self.assertFalse(mi.all_equal('aaaaab')) - self.assertFalse(mi.all_equal([0, 0, 0, 1])) - - def test_tricky(self): - """Not everything is identical, but everything is equal""" - items = [1, complex(1, 0), 1.0] - self.assertTrue(mi.all_equal(items)) - - def test_empty(self): - """Return True if the iterable is empty""" - self.assertTrue(mi.all_equal('')) - self.assertTrue(mi.all_equal([])) - - def test_one(self): - """Return True if the iterable is singular""" - self.assertTrue(mi.all_equal('0')) - self.assertTrue(mi.all_equal([0])) - - -class QuantifyTests(TestCase): - """Tests for ``quantify()``""" - - def test_happy_path(self): - """Make sure True count is returned""" - q = [True, False, True] - self.assertEqual(mi.quantify(q), 2) - - def test_custom_predicate(self): - """Ensure non-default predicates return as expected""" - q = range(10) - self.assertEqual(mi.quantify(q, lambda x: x % 2 == 0), 5) - - -class PadnoneTests(TestCase): - """Tests for ``padnone()``""" - - def test_happy_path(self): - """wrapper iterator should return None indefinitely""" - r = range(2) - p = mi.padnone(r) - self.assertEqual([0, 1, None, None], [next(p) for _ in range(4)]) - - -class NcyclesTests(TestCase): - """Tests for ``nyclces()``""" - - def test_happy_path(self): - """cycle a sequence three times""" - r = ["a", "b", "c"] - n = mi.ncycles(r, 3) - self.assertEqual( - ["a", "b", "c", "a", "b", "c", "a", "b", "c"], - list(n) - ) - - def test_null_case(self): - """asking for 0 cycles should return an empty iterator""" - n = mi.ncycles(range(100), 0) - self.assertRaises(StopIteration, lambda: next(n)) - - def test_pathalogical_case(self): - """asking for negative cycles should return an empty iterator""" - n = mi.ncycles(range(100), -10) - self.assertRaises(StopIteration, lambda: next(n)) - - -class DotproductTests(TestCase): - """Tests for ``dotproduct()``'""" - - def test_happy_path(self): - """simple dotproduct example""" - self.assertEqual(400, mi.dotproduct([10, 10], [20, 20])) - - -class FlattenTests(TestCase): - """Tests for ``flatten()``""" - - def test_basic_usage(self): - """ensure list of lists is flattened one level""" - f = [[0, 1, 2], [3, 4, 5]] - self.assertEqual(list(range(6)), list(mi.flatten(f))) - - def test_single_level(self): - """ensure list of lists is flattened only one level""" - f = [[0, [1, 2]], [[3, 4], 5]] - self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f))) - - -class RepeatfuncTests(TestCase): - """Tests for ``repeatfunc()``""" - - def test_simple_repeat(self): - """test simple repeated functions""" - r = mi.repeatfunc(lambda: 5) - self.assertEqual([5, 5, 5, 5, 5], [next(r) for _ in range(5)]) - - def test_finite_repeat(self): - """ensure limited repeat when times is provided""" - r = mi.repeatfunc(lambda: 5, times=5) - self.assertEqual([5, 5, 5, 5, 5], list(r)) - - def test_added_arguments(self): - """ensure arguments are applied to the function""" - r = mi.repeatfunc(lambda x: x, 2, 3) - self.assertEqual([3, 3], list(r)) - - def test_null_times(self): - """repeat 0 should return an empty iterator""" - r = mi.repeatfunc(range, 0, 3) - self.assertRaises(StopIteration, lambda: next(r)) - - -class PairwiseTests(TestCase): - """Tests for ``pairwise()``""" - - def test_base_case(self): - """ensure an iterable will return pairwise""" - p = mi.pairwise([1, 2, 3]) - self.assertEqual([(1, 2), (2, 3)], list(p)) - - def test_short_case(self): - """ensure an empty iterator if there's not enough values to pair""" - p = mi.pairwise("a") - self.assertRaises(StopIteration, lambda: next(p)) - - -class GrouperTests(TestCase): - """Tests for ``grouper()``""" - - def test_even(self): - """Test when group size divides evenly into the length of - the iterable. - - """ - self.assertEqual( - list(mi.grouper(3, 'ABCDEF')), [('A', 'B', 'C'), ('D', 'E', 'F')] - ) - - def test_odd(self): - """Test when group size does not divide evenly into the length of the - iterable. - - """ - self.assertEqual( - list(mi.grouper(3, 'ABCDE')), [('A', 'B', 'C'), ('D', 'E', None)] - ) - - def test_fill_value(self): - """Test that the fill value is used to pad the final group""" - self.assertEqual( - list(mi.grouper(3, 'ABCDE', 'x')), - [('A', 'B', 'C'), ('D', 'E', 'x')] - ) - - -class RoundrobinTests(TestCase): - """Tests for ``roundrobin()``""" - - def test_even_groups(self): - """Ensure ordered output from evenly populated iterables""" - self.assertEqual( - list(mi.roundrobin('ABC', [1, 2, 3], range(3))), - ['A', 1, 0, 'B', 2, 1, 'C', 3, 2] - ) - - def test_uneven_groups(self): - """Ensure ordered output from unevenly populated iterables""" - self.assertEqual( - list(mi.roundrobin('ABCD', [1, 2], range(0))), - ['A', 1, 'B', 2, 'C', 'D'] - ) - - -class PartitionTests(TestCase): - """Tests for ``partition()``""" - - def test_bool(self): - """Test when pred() returns a boolean""" - lesser, greater = mi.partition(lambda x: x > 5, range(10)) - self.assertEqual(list(lesser), [0, 1, 2, 3, 4, 5]) - self.assertEqual(list(greater), [6, 7, 8, 9]) - - def test_arbitrary(self): - """Test when pred() returns an integer""" - divisibles, remainders = mi.partition(lambda x: x % 3, range(10)) - self.assertEqual(list(divisibles), [0, 3, 6, 9]) - self.assertEqual(list(remainders), [1, 2, 4, 5, 7, 8]) - - -class PowersetTests(TestCase): - """Tests for ``powerset()``""" - - def test_combinatorics(self): - """Ensure a proper enumeration""" - p = mi.powerset([1, 2, 3]) - self.assertEqual( - list(p), - [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)] - ) - - -class UniqueEverseenTests(TestCase): - """Tests for ``unique_everseen()``""" - - def test_everseen(self): - """ensure duplicate elements are ignored""" - u = mi.unique_everseen('AAAABBBBCCDAABBB') - self.assertEqual( - ['A', 'B', 'C', 'D'], - list(u) - ) - - def test_custom_key(self): - """ensure the custom key comparison works""" - u = mi.unique_everseen('aAbACCc', key=str.lower) - self.assertEqual(list('abC'), list(u)) - - def test_unhashable(self): - """ensure things work for unhashable items""" - iterable = ['a', [1, 2, 3], [1, 2, 3], 'a'] - u = mi.unique_everseen(iterable) - self.assertEqual(list(u), ['a', [1, 2, 3]]) - - def test_unhashable_key(self): - """ensure things work for unhashable items with a custom key""" - iterable = ['a', [1, 2, 3], [1, 2, 3], 'a'] - u = mi.unique_everseen(iterable, key=lambda x: x) - self.assertEqual(list(u), ['a', [1, 2, 3]]) - - -class UniqueJustseenTests(TestCase): - """Tests for ``unique_justseen()``""" - - def test_justseen(self): - """ensure only last item is remembered""" - u = mi.unique_justseen('AAAABBBCCDABB') - self.assertEqual(list('ABCDAB'), list(u)) - - def test_custom_key(self): - """ensure the custom key comparison works""" - u = mi.unique_justseen('AABCcAD', str.lower) - self.assertEqual(list('ABCAD'), list(u)) - - -class IterExceptTests(TestCase): - """Tests for ``iter_except()``""" - - def test_exact_exception(self): - """ensure the exact specified exception is caught""" - l = [1, 2, 3] - i = mi.iter_except(l.pop, IndexError) - self.assertEqual(list(i), [3, 2, 1]) - - def test_generic_exception(self): - """ensure the generic exception can be caught""" - l = [1, 2] - i = mi.iter_except(l.pop, Exception) - self.assertEqual(list(i), [2, 1]) - - def test_uncaught_exception_is_raised(self): - """ensure a non-specified exception is raised""" - l = [1, 2, 3] - i = mi.iter_except(l.pop, KeyError) - self.assertRaises(IndexError, lambda: list(i)) - - def test_first(self): - """ensure first is run before the function""" - l = [1, 2, 3] - f = lambda: 25 - i = mi.iter_except(l.pop, IndexError, f) - self.assertEqual(list(i), [25, 3, 2, 1]) - - -class FirstTrueTests(TestCase): - """Tests for ``first_true()``""" - - def test_something_true(self): - """Test with no keywords""" - self.assertEqual(mi.first_true(range(10)), 1) - - def test_nothing_true(self): - """Test default return value.""" - self.assertIsNone(mi.first_true([0, 0, 0])) - - def test_default(self): - """Test with a default keyword""" - self.assertEqual(mi.first_true([0, 0, 0], default='!'), '!') - - def test_pred(self): - """Test with a custom predicate""" - self.assertEqual( - mi.first_true([2, 4, 6], pred=lambda x: x % 3 == 0), 6 - ) - - -class RandomProductTests(TestCase): - """Tests for ``random_product()`` - - Since random.choice() has different results with the same seed across - python versions 2.x and 3.x, these tests use highly probably events to - create predictable outcomes across platforms. - """ - - def test_simple_lists(self): - """Ensure that one item is chosen from each list in each pair. - Also ensure that each item from each list eventually appears in - the chosen combinations. - - Odds are roughly 1 in 7.1 * 10e16 that one item from either list will - not be chosen after 100 samplings of one item from each list. Just to - be safe, better use a known random seed, too. - - """ - nums = [1, 2, 3] - lets = ['a', 'b', 'c'] - n, m = zip(*[mi.random_product(nums, lets) for _ in range(100)]) - n, m = set(n), set(m) - self.assertEqual(n, set(nums)) - self.assertEqual(m, set(lets)) - self.assertEqual(len(n), len(nums)) - self.assertEqual(len(m), len(lets)) - - def test_list_with_repeat(self): - """ensure multiple items are chosen, and that they appear to be chosen - from one list then the next, in proper order. - - """ - nums = [1, 2, 3] - lets = ['a', 'b', 'c'] - r = list(mi.random_product(nums, lets, repeat=100)) - self.assertEqual(2 * 100, len(r)) - n, m = set(r[::2]), set(r[1::2]) - self.assertEqual(n, set(nums)) - self.assertEqual(m, set(lets)) - self.assertEqual(len(n), len(nums)) - self.assertEqual(len(m), len(lets)) - - -class RandomPermutationTests(TestCase): - """Tests for ``random_permutation()``""" - - def test_full_permutation(self): - """ensure every item from the iterable is returned in a new ordering - - 15 elements have a 1 in 1.3 * 10e12 of appearing in sorted order, so - we fix a seed value just to be sure. - - """ - i = range(15) - r = mi.random_permutation(i) - self.assertEqual(set(i), set(r)) - if i == r: - raise AssertionError("Values were not permuted") - - def test_partial_permutation(self): - """ensure all returned items are from the iterable, that the returned - permutation is of the desired length, and that all items eventually - get returned. - - Sampling 100 permutations of length 5 from a set of 15 leaves a - (2/3)^100 chance that an item will not be chosen. Multiplied by 15 - items, there is a 1 in 2.6e16 chance that at least 1 item will not - show up in the resulting output. Using a random seed will fix that. - - """ - items = range(15) - item_set = set(items) - all_items = set() - for _ in range(100): - permutation = mi.random_permutation(items, 5) - self.assertEqual(len(permutation), 5) - permutation_set = set(permutation) - self.assertLessEqual(permutation_set, item_set) - all_items |= permutation_set - self.assertEqual(all_items, item_set) - - -class RandomCombinationTests(TestCase): - """Tests for ``random_combination()``""" - - def test_pseudorandomness(self): - """ensure different subsets of the iterable get returned over many - samplings of random combinations""" - items = range(15) - all_items = set() - for _ in range(50): - combination = mi.random_combination(items, 5) - all_items |= set(combination) - self.assertEqual(all_items, set(items)) - - def test_no_replacement(self): - """ensure that elements are sampled without replacement""" - items = range(15) - for _ in range(50): - combination = mi.random_combination(items, len(items)) - self.assertEqual(len(combination), len(set(combination))) - self.assertRaises( - ValueError, lambda: mi.random_combination(items, len(items) + 1) - ) - - -class RandomCombinationWithReplacementTests(TestCase): - """Tests for ``random_combination_with_replacement()``""" - - def test_replacement(self): - """ensure that elements are sampled with replacement""" - items = range(5) - combo = mi.random_combination_with_replacement(items, len(items) * 2) - self.assertEqual(2 * len(items), len(combo)) - if len(set(combo)) == len(combo): - raise AssertionError("Combination contained no duplicates") - - def test_pseudorandomness(self): - """ensure different subsets of the iterable get returned over many - samplings of random combinations""" - items = range(15) - all_items = set() - for _ in range(50): - combination = mi.random_combination_with_replacement(items, 5) - all_items |= set(combination) - self.assertEqual(all_items, set(items)) - - -class NthCombinationTests(TestCase): - def test_basic(self): - iterable = 'abcdefg' - r = 4 - for index, expected in enumerate(combinations(iterable, r)): - actual = mi.nth_combination(iterable, r, index) - self.assertEqual(actual, expected) - - def test_long(self): - actual = mi.nth_combination(range(180), 4, 2000000) - expected = (2, 12, 35, 126) - self.assertEqual(actual, expected) - - def test_invalid_r(self): - for r in (-1, 3): - with self.assertRaises(ValueError): - mi.nth_combination([], r, 0) - - def test_invalid_index(self): - with self.assertRaises(IndexError): - mi.nth_combination('abcdefg', 3, -36) - - -class PrependTests(TestCase): - def test_basic(self): - value = 'a' - iterator = iter('bcdefg') - actual = list(mi.prepend(value, iterator)) - expected = list('abcdefg') - self.assertEqual(actual, expected) - - def test_multiple(self): - value = 'ab' - iterator = iter('cdefg') - actual = tuple(mi.prepend(value, iterator)) - expected = ('ab',) + tuple('cdefg') - self.assertEqual(actual, expected) diff --git a/libs/common/oauthlib/__init__.py b/libs/common/oauthlib/__init__.py index 69dd1136..d9a5e38e 100644 --- a/libs/common/oauthlib/__init__.py +++ b/libs/common/oauthlib/__init__.py @@ -12,7 +12,7 @@ import logging from logging import NullHandler __author__ = 'The OAuthlib Community' -__version__ = '3.1.0' +__version__ = '3.2.2' logging.getLogger('oauthlib').addHandler(NullHandler()) diff --git a/libs/common/oauthlib/common.py b/libs/common/oauthlib/common.py index 5aeb0150..395e75ef 100644 --- a/libs/common/oauthlib/common.py +++ b/libs/common/oauthlib/common.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.common ~~~~~~~~~~~~~~ @@ -6,34 +5,22 @@ oauthlib.common This module provides data structures and utilities common to all implementations of OAuth. """ -from __future__ import absolute_import, unicode_literals - import collections import datetime import logging import re -import sys import time +import urllib.parse as urlparse +from urllib.parse import ( + quote as _quote, unquote as _unquote, urlencode as _urlencode, +) + from . import get_debug try: - from secrets import randbits - from secrets import SystemRandom + from secrets import SystemRandom, randbits except ImportError: - from random import getrandbits as randbits - from random import SystemRandom -try: - from urllib import quote as _quote - from urllib import unquote as _unquote - from urllib import urlencode as _urlencode -except ImportError: - from urllib.parse import quote as _quote - from urllib.parse import unquote as _unquote - from urllib.parse import urlencode as _urlencode -try: - import urlparse -except ImportError: - import urllib.parse as urlparse + from random import SystemRandom, getrandbits as randbits UNICODE_ASCII_CHARACTER_SET = ('abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -51,17 +38,10 @@ always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' log = logging.getLogger('oauthlib') -PY3 = sys.version_info[0] == 3 - -if PY3: - unicode_type = str -else: - unicode_type = unicode - # 'safe' must be bytes (Python 2.6 requires bytes, other versions allow either) def quote(s, safe=b'/'): - s = s.encode('utf-8') if isinstance(s, unicode_type) else s + s = s.encode('utf-8') if isinstance(s, str) else s s = _quote(s, safe) # PY3 always returns unicode. PY2 may return either, depending on whether # it had to modify the string. @@ -83,7 +63,7 @@ def unquote(s): def urlencode(params): utf8_params = encode_params_utf8(params) urlencoded = _urlencode(utf8_params) - if isinstance(urlencoded, unicode_type): # PY3 returns unicode + if isinstance(urlencoded, str): return urlencoded else: return urlencoded.decode("utf-8") @@ -96,8 +76,8 @@ def encode_params_utf8(params): encoded = [] for k, v in params: encoded.append(( - k.encode('utf-8') if isinstance(k, unicode_type) else k, - v.encode('utf-8') if isinstance(v, unicode_type) else v)) + k.encode('utf-8') if isinstance(k, str) else k, + v.encode('utf-8') if isinstance(v, str) else v)) return encoded @@ -141,22 +121,6 @@ def urldecode(query): if INVALID_HEX_PATTERN.search(query): raise ValueError('Invalid hex encoding in query string.') - # We encode to utf-8 prior to parsing because parse_qsl behaves - # differently on unicode input in python 2 and 3. - # Python 2.7 - # >>> urlparse.parse_qsl(u'%E5%95%A6%E5%95%A6') - # u'\xe5\x95\xa6\xe5\x95\xa6' - # Python 2.7, non unicode input gives the same - # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6') - # '\xe5\x95\xa6\xe5\x95\xa6' - # but now we can decode it to unicode - # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6').decode('utf-8') - # u'\u5566\u5566' - # Python 3.3 however - # >>> urllib.parse.parse_qsl(u'%E5%95%A6%E5%95%A6') - # u'\u5566\u5566' - query = query.encode( - 'utf-8') if not PY3 and isinstance(query, unicode_type) else query # We want to allow queries such as "c2" whereas urlparse.parse_qsl # with the strict_parsing flag will not. params = urlparse.parse_qsl(query, keep_blank_values=True) @@ -173,7 +137,7 @@ def extract_params(raw): empty list of parameters. Any other input will result in a return value of None. """ - if isinstance(raw, (bytes, unicode_type)): + if isinstance(raw, (bytes, str)): try: params = urldecode(raw) except ValueError: @@ -206,7 +170,7 @@ def generate_nonce(): .. _`section 3.2.1`: https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1 .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 """ - return unicode_type(unicode_type(randbits(64)) + generate_timestamp()) + return str(str(randbits(64)) + generate_timestamp()) def generate_timestamp(): @@ -218,7 +182,7 @@ def generate_timestamp(): .. _`section 3.2.1`: https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1 .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 """ - return unicode_type(int(time.time())) + return str(int(time.time())) def generate_token(length=30, chars=UNICODE_ASCII_CHARACTER_SET): @@ -305,11 +269,11 @@ def safe_string_equals(a, b): def to_unicode(data, encoding='UTF-8'): """Convert a number of different types of objects to unicode.""" - if isinstance(data, unicode_type): + if isinstance(data, str): return data if isinstance(data, bytes): - return unicode_type(data, encoding=encoding) + return str(data, encoding=encoding) if hasattr(data, '__iter__'): try: @@ -323,7 +287,7 @@ def to_unicode(data, encoding='UTF-8'): # We support 2.6 which lacks dict comprehensions if hasattr(data, 'items'): data = data.items() - return dict(((to_unicode(k, encoding), to_unicode(v, encoding)) for k, v in data)) + return {to_unicode(k, encoding): to_unicode(v, encoding) for k, v in data} return data @@ -335,7 +299,7 @@ class CaseInsensitiveDict(dict): proxy = {} def __init__(self, data): - self.proxy = dict((k.lower(), k) for k in data) + self.proxy = {k.lower(): k for k in data} for k in data: self[k] = data[k] @@ -344,27 +308,27 @@ class CaseInsensitiveDict(dict): def __delitem__(self, k): key = self.proxy[k.lower()] - super(CaseInsensitiveDict, self).__delitem__(key) + super().__delitem__(key) del self.proxy[k.lower()] def __getitem__(self, k): key = self.proxy[k.lower()] - return super(CaseInsensitiveDict, self).__getitem__(key) + return super().__getitem__(key) def get(self, k, default=None): return self[k] if k in self else default def __setitem__(self, k, v): - super(CaseInsensitiveDict, self).__setitem__(k, v) + super().__setitem__(k, v) self.proxy[k.lower()] = k def update(self, *args, **kwargs): - super(CaseInsensitiveDict, self).update(*args, **kwargs) + super().update(*args, **kwargs) for k in dict(*args, **kwargs): self.proxy[k.lower()] = k -class Request(object): +class Request: """A malleable representation of a signable HTTP request. @@ -444,7 +408,7 @@ class Request(object): body = SANITIZE_PATTERN.sub('\1', str(body)) if 'Authorization' in headers: headers['Authorization'] = '' - return '' % ( + return ''.format( self.uri, self.http_method, headers, body) @property diff --git a/libs/common/oauthlib/oauth1/__init__.py b/libs/common/oauthlib/oauth1/__init__.py index dc908d4e..9caf12a9 100644 --- a/libs/common/oauthlib/oauth1/__init__.py +++ b/libs/common/oauthlib/oauth1/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth1 ~~~~~~~~~~~~~~ @@ -6,14 +5,19 @@ oauthlib.oauth1 This module is a wrapper for the most recent implementation of OAuth 1.0 Client and Server classes. """ -from __future__ import absolute_import, unicode_literals - -from .rfc5849 import Client -from .rfc5849 import SIGNATURE_HMAC, SIGNATURE_HMAC_SHA1, SIGNATURE_HMAC_SHA256, SIGNATURE_RSA, SIGNATURE_PLAINTEXT -from .rfc5849 import SIGNATURE_TYPE_AUTH_HEADER, SIGNATURE_TYPE_QUERY -from .rfc5849 import SIGNATURE_TYPE_BODY +from .rfc5849 import ( + SIGNATURE_HMAC, SIGNATURE_HMAC_SHA1, SIGNATURE_HMAC_SHA256, + SIGNATURE_HMAC_SHA512, SIGNATURE_PLAINTEXT, SIGNATURE_RSA, + SIGNATURE_RSA_SHA1, SIGNATURE_RSA_SHA256, SIGNATURE_RSA_SHA512, + SIGNATURE_TYPE_AUTH_HEADER, SIGNATURE_TYPE_BODY, SIGNATURE_TYPE_QUERY, + Client, +) +from .rfc5849.endpoints import ( + AccessTokenEndpoint, AuthorizationEndpoint, RequestTokenEndpoint, + ResourceEndpoint, SignatureOnlyEndpoint, WebApplicationServer, +) +from .rfc5849.errors import ( + InsecureTransportError, InvalidClientError, InvalidRequestError, + InvalidSignatureMethodError, OAuth1Error, +) from .rfc5849.request_validator import RequestValidator -from .rfc5849.endpoints import RequestTokenEndpoint, AuthorizationEndpoint -from .rfc5849.endpoints import AccessTokenEndpoint, ResourceEndpoint -from .rfc5849.endpoints import SignatureOnlyEndpoint, WebApplicationServer -from .rfc5849.errors import InsecureTransportError, InvalidClientError, InvalidRequestError, InvalidSignatureMethodError, OAuth1Error diff --git a/libs/common/oauthlib/oauth1/rfc5849/__init__.py b/libs/common/oauthlib/oauth1/rfc5849/__init__.py index 4f462bbc..c559251f 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/__init__.py +++ b/libs/common/oauthlib/oauth1/rfc5849/__init__.py @@ -1,33 +1,68 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth1.rfc5849 ~~~~~~~~~~~~~~ This module is an implementation of various logic needed for signing and checking OAuth 1.0 RFC 5849 requests. + +It supports all three standard signature methods defined in RFC 5849: + +- HMAC-SHA1 +- RSA-SHA1 +- PLAINTEXT + +It also supports signature methods that are not defined in RFC 5849. These are +based on the standard ones but replace SHA-1 with the more secure SHA-256: + +- HMAC-SHA256 +- RSA-SHA256 + """ -from __future__ import absolute_import, unicode_literals import base64 import hashlib import logging +import urllib.parse as urlparse + +from oauthlib.common import ( + Request, generate_nonce, generate_timestamp, to_unicode, urlencode, +) + +from . import parameters, signature + log = logging.getLogger(__name__) -import sys -try: - import urlparse -except ImportError: - import urllib.parse as urlparse - -from oauthlib.common import Request, urlencode, generate_nonce -from oauthlib.common import generate_timestamp, to_unicode -from . import parameters, signature +# Available signature methods +# +# Note: SIGNATURE_HMAC and SIGNATURE_RSA are kept for backward compatibility +# with previous versions of this library, when it the only HMAC-based and +# RSA-based signature methods were HMAC-SHA1 and RSA-SHA1. But now that it +# supports other hashing algorithms besides SHA1, explicitly identifying which +# hashing algorithm is being used is recommended. +# +# Note: if additional values are defined here, don't forget to update the +# imports in "../__init__.py" so they are available outside this module. SIGNATURE_HMAC_SHA1 = "HMAC-SHA1" SIGNATURE_HMAC_SHA256 = "HMAC-SHA256" -SIGNATURE_HMAC = SIGNATURE_HMAC_SHA1 -SIGNATURE_RSA = "RSA-SHA1" +SIGNATURE_HMAC_SHA512 = "HMAC-SHA512" +SIGNATURE_HMAC = SIGNATURE_HMAC_SHA1 # deprecated variable for HMAC-SHA1 + +SIGNATURE_RSA_SHA1 = "RSA-SHA1" +SIGNATURE_RSA_SHA256 = "RSA-SHA256" +SIGNATURE_RSA_SHA512 = "RSA-SHA512" +SIGNATURE_RSA = SIGNATURE_RSA_SHA1 # deprecated variable for RSA-SHA1 + SIGNATURE_PLAINTEXT = "PLAINTEXT" -SIGNATURE_METHODS = (SIGNATURE_HMAC_SHA1, SIGNATURE_HMAC_SHA256, SIGNATURE_RSA, SIGNATURE_PLAINTEXT) + +SIGNATURE_METHODS = ( + SIGNATURE_HMAC_SHA1, + SIGNATURE_HMAC_SHA256, + SIGNATURE_HMAC_SHA512, + SIGNATURE_RSA_SHA1, + SIGNATURE_RSA_SHA256, + SIGNATURE_RSA_SHA512, + SIGNATURE_PLAINTEXT +) SIGNATURE_TYPE_AUTH_HEADER = 'AUTH_HEADER' SIGNATURE_TYPE_QUERY = 'QUERY' @@ -36,13 +71,16 @@ SIGNATURE_TYPE_BODY = 'BODY' CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' -class Client(object): +class Client: """A client used to sign OAuth 1.0 RFC 5849 requests.""" SIGNATURE_METHODS = { SIGNATURE_HMAC_SHA1: signature.sign_hmac_sha1_with_client, SIGNATURE_HMAC_SHA256: signature.sign_hmac_sha256_with_client, - SIGNATURE_RSA: signature.sign_rsa_sha1_with_client, + SIGNATURE_HMAC_SHA512: signature.sign_hmac_sha512_with_client, + SIGNATURE_RSA_SHA1: signature.sign_rsa_sha1_with_client, + SIGNATURE_RSA_SHA256: signature.sign_rsa_sha256_with_client, + SIGNATURE_RSA_SHA512: signature.sign_rsa_sha512_with_client, SIGNATURE_PLAINTEXT: signature.sign_plaintext_with_client } @@ -106,8 +144,8 @@ class Client(object): attrs['rsa_key'] = '****' if attrs['rsa_key'] else None attrs[ 'resource_owner_secret'] = '****' if attrs['resource_owner_secret'] else None - attribute_str = ', '.join('%s=%s' % (k, v) for k, v in attrs.items()) - return '<%s %s>' % (self.__class__.__name__, attribute_str) + attribute_str = ', '.join('{}={}'.format(k, v) for k, v in attrs.items()) + return '<{} {}>'.format(self.__class__.__name__, attribute_str) def get_oauth_signature(self, request): """Get an OAuth signature to be used in signing a request @@ -130,24 +168,24 @@ class Client(object): uri_query=urlparse.urlparse(uri).query, body=body, headers=headers) - log.debug("Collected params: {0}".format(collected_params)) + log.debug("Collected params: {}".format(collected_params)) normalized_params = signature.normalize_parameters(collected_params) normalized_uri = signature.base_string_uri(uri, headers.get('Host', None)) - log.debug("Normalized params: {0}".format(normalized_params)) - log.debug("Normalized URI: {0}".format(normalized_uri)) + log.debug("Normalized params: {}".format(normalized_params)) + log.debug("Normalized URI: {}".format(normalized_uri)) base_string = signature.signature_base_string(request.http_method, normalized_uri, normalized_params) - log.debug("Signing: signature base string: {0}".format(base_string)) + log.debug("Signing: signature base string: {}".format(base_string)) if self.signature_method not in self.SIGNATURE_METHODS: raise ValueError('Invalid signature method.') sig = self.SIGNATURE_METHODS[self.signature_method](base_string, self) - log.debug("Signature: {0}".format(sig)) + log.debug("Signature: {}".format(sig)) return sig def get_oauth_params(self, request): @@ -278,8 +316,8 @@ class Client(object): # header field set to "application/x-www-form-urlencoded". elif not should_have_params and has_params: raise ValueError( - "Body contains parameters but Content-Type header was {0} " - "instead of {1}".format(content_type or "not set", + "Body contains parameters but Content-Type header was {} " + "instead of {}".format(content_type or "not set", CONTENT_TYPE_FORM_URLENCODED)) # 3.5.2. Form-Encoded Body diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/__init__.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/__init__.py index b16ccba6..9f30389f 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/__init__.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/__init__.py @@ -1,9 +1,8 @@ -from __future__ import absolute_import - +from .access_token import AccessTokenEndpoint +from .authorization import AuthorizationEndpoint from .base import BaseEndpoint from .request_token import RequestTokenEndpoint -from .authorization import AuthorizationEndpoint -from .access_token import AccessTokenEndpoint from .resource import ResourceEndpoint from .signature_only import SignatureOnlyEndpoint -from .pre_configured import WebApplicationServer + +from .pre_configured import WebApplicationServer # isort:skip diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/access_token.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/access_token.py index bea82741..13665db0 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/access_token.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/access_token.py @@ -8,8 +8,6 @@ OAuth 1.0 RFC 5849. It validates the correctness of access token requests, creates and persists tokens as well as create the proper response to be returned to the client. """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib.common import urlencode diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/authorization.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/authorization.py index b465946f..00d9576b 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/authorization.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/authorization.py @@ -6,18 +6,13 @@ oauthlib.oauth1.rfc5849.endpoints.authorization This module is an implementation of various logic needed for signing and checking OAuth 1.0 RFC 5849 requests. """ -from __future__ import absolute_import, unicode_literals +from urllib.parse import urlencode -from oauthlib.common import Request, add_params_to_uri +from oauthlib.common import add_params_to_uri from .. import errors from .base import BaseEndpoint -try: - from urllib import urlencode -except ImportError: - from urllib.parse import urlencode - class AuthorizationEndpoint(BaseEndpoint): diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/base.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/base.py index f005256b..7831be7c 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/base.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/base.py @@ -6,18 +6,19 @@ oauthlib.oauth1.rfc5849.endpoints.base This module is an implementation of various logic needed for signing and checking OAuth 1.0 RFC 5849 requests. """ -from __future__ import absolute_import, unicode_literals - import time from oauthlib.common import CaseInsensitiveDict, Request, generate_token -from .. import (CONTENT_TYPE_FORM_URLENCODED, SIGNATURE_HMAC_SHA1, SIGNATURE_HMAC_SHA256, SIGNATURE_RSA, - SIGNATURE_TYPE_AUTH_HEADER, SIGNATURE_TYPE_BODY, - SIGNATURE_TYPE_QUERY, errors, signature, utils) +from .. import ( + CONTENT_TYPE_FORM_URLENCODED, SIGNATURE_HMAC_SHA1, SIGNATURE_HMAC_SHA256, + SIGNATURE_HMAC_SHA512, SIGNATURE_PLAINTEXT, SIGNATURE_RSA_SHA1, + SIGNATURE_RSA_SHA256, SIGNATURE_RSA_SHA512, SIGNATURE_TYPE_AUTH_HEADER, + SIGNATURE_TYPE_BODY, SIGNATURE_TYPE_QUERY, errors, signature, utils, +) -class BaseEndpoint(object): +class BaseEndpoint: def __init__(self, request_validator, token_generator=None): self.request_validator = request_validator @@ -131,7 +132,7 @@ class BaseEndpoint(object): if (not request.signature_method in self.request_validator.allowed_signature_methods): raise errors.InvalidSignatureMethodError( - description="Invalid signature, %s not in %r." % ( + description="Invalid signature, {} not in {!r}.".format( request.signature_method, self.request_validator.allowed_signature_methods)) @@ -179,38 +180,65 @@ class BaseEndpoint(object): def _check_signature(self, request, is_token_request=False): # ---- RSA Signature verification ---- - if request.signature_method == SIGNATURE_RSA: + if request.signature_method == SIGNATURE_RSA_SHA1 or \ + request.signature_method == SIGNATURE_RSA_SHA256 or \ + request.signature_method == SIGNATURE_RSA_SHA512: + # RSA-based signature method + # The server verifies the signature per `[RFC3447] section 8.2.2`_ # .. _`[RFC3447] section 8.2.2`: https://tools.ietf.org/html/rfc3447#section-8.2.1 + rsa_key = self.request_validator.get_rsa_key( request.client_key, request) - valid_signature = signature.verify_rsa_sha1(request, rsa_key) + + if request.signature_method == SIGNATURE_RSA_SHA1: + valid_signature = signature.verify_rsa_sha1(request, rsa_key) + elif request.signature_method == SIGNATURE_RSA_SHA256: + valid_signature = signature.verify_rsa_sha256(request, rsa_key) + elif request.signature_method == SIGNATURE_RSA_SHA512: + valid_signature = signature.verify_rsa_sha512(request, rsa_key) + else: + valid_signature = False # ---- HMAC or Plaintext Signature verification ---- else: + # Non-RSA based signature method + # Servers receiving an authenticated request MUST validate it by: # Recalculating the request signature independently as described in # `Section 3.4`_ and comparing it to the value received from the # client via the "oauth_signature" parameter. # .. _`Section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 + client_secret = self.request_validator.get_client_secret( request.client_key, request) + resource_owner_secret = None if request.resource_owner_key: if is_token_request: - resource_owner_secret = self.request_validator.get_request_token_secret( - request.client_key, request.resource_owner_key, request) + resource_owner_secret = \ + self.request_validator.get_request_token_secret( + request.client_key, request.resource_owner_key, + request) else: - resource_owner_secret = self.request_validator.get_access_token_secret( - request.client_key, request.resource_owner_key, request) + resource_owner_secret = \ + self.request_validator.get_access_token_secret( + request.client_key, request.resource_owner_key, + request) if request.signature_method == SIGNATURE_HMAC_SHA1: - valid_signature = signature.verify_hmac_sha1(request, - client_secret, resource_owner_secret) + valid_signature = signature.verify_hmac_sha1( + request, client_secret, resource_owner_secret) elif request.signature_method == SIGNATURE_HMAC_SHA256: - valid_signature = signature.verify_hmac_sha256(request, - client_secret, resource_owner_secret) + valid_signature = signature.verify_hmac_sha256( + request, client_secret, resource_owner_secret) + elif request.signature_method == SIGNATURE_HMAC_SHA512: + valid_signature = signature.verify_hmac_sha512( + request, client_secret, resource_owner_secret) + elif request.signature_method == SIGNATURE_PLAINTEXT: + valid_signature = signature.verify_plaintext( + request, client_secret, resource_owner_secret) else: - valid_signature = signature.verify_plaintext(request, - client_secret, resource_owner_secret) + valid_signature = False + return valid_signature diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py index f89393a5..23e3cfc8 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py @@ -1,7 +1,7 @@ -from __future__ import absolute_import, unicode_literals - -from . import (AccessTokenEndpoint, AuthorizationEndpoint, - RequestTokenEndpoint, ResourceEndpoint) +from . import ( + AccessTokenEndpoint, AuthorizationEndpoint, RequestTokenEndpoint, + ResourceEndpoint, +) class WebApplicationServer(RequestTokenEndpoint, AuthorizationEndpoint, diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/request_token.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/request_token.py index e9ca331d..0323cfb8 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/request_token.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/request_token.py @@ -8,8 +8,6 @@ OAuth 1.0 RFC 5849. It validates the correctness of request token requests, creates and persists tokens as well as create the proper response to be returned to the client. """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib.common import urlencode @@ -129,7 +127,7 @@ class RequestTokenEndpoint(BaseEndpoint): request.client_key, request) if not self.request_validator.check_realms(request.realms): raise errors.InvalidRequestError( - description='Invalid realm %s. Allowed are %r.' % ( + description='Invalid realm {}. Allowed are {!r}.'.format( request.realms, self.request_validator.realms)) if not request.redirect_uri: @@ -154,7 +152,7 @@ class RequestTokenEndpoint(BaseEndpoint): request.client_key = self.request_validator.dummy_client # Note that `realm`_ is only used in authorization headers and how - # it should be interepreted is not included in the OAuth spec. + # it should be interpreted is not included in the OAuth spec. # However they could be seen as a scope or realm to which the # client has access and as such every client should be checked # to ensure it is authorized access to that scope or realm. @@ -166,7 +164,7 @@ class RequestTokenEndpoint(BaseEndpoint): # workflow where a client requests access to a specific realm. # This first step (obtaining request token) need not require a realm # and can then be identified by checking the require_resource_owner - # flag and abscence of realm. + # flag and absence of realm. # # Clients obtaining an access token will not supply a realm and it will # not be checked. Instead the previously requested realm should be diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/resource.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/resource.py index f82e8b1b..8641152e 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/resource.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/resource.py @@ -6,8 +6,6 @@ oauthlib.oauth1.rfc5849.endpoints.resource This module is an implementation of the resource protection provider logic of OAuth 1.0 RFC 5849. """ -from __future__ import absolute_import, unicode_literals - import logging from .. import errors @@ -115,7 +113,7 @@ class ResourceEndpoint(BaseEndpoint): request.resource_owner_key = self.request_validator.dummy_access_token # Note that `realm`_ is only used in authorization headers and how - # it should be interepreted is not included in the OAuth spec. + # it should be interpreted is not included in the OAuth spec. # However they could be seen as a scope or realm to which the # client has access and as such every client should be checked # to ensure it is authorized access to that scope or realm. @@ -127,7 +125,7 @@ class ResourceEndpoint(BaseEndpoint): # workflow where a client requests access to a specific realm. # This first step (obtaining request token) need not require a realm # and can then be identified by checking the require_resource_owner - # flag and abscence of realm. + # flag and absence of realm. # # Clients obtaining an access token will not supply a realm and it will # not be checked. Instead the previously requested realm should be diff --git a/libs/common/oauthlib/oauth1/rfc5849/endpoints/signature_only.py b/libs/common/oauthlib/oauth1/rfc5849/endpoints/signature_only.py index 42977706..d693ccb7 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/endpoints/signature_only.py +++ b/libs/common/oauthlib/oauth1/rfc5849/endpoints/signature_only.py @@ -6,8 +6,6 @@ oauthlib.oauth1.rfc5849.endpoints.signature_only This module is an implementation of the signing logic of OAuth 1.0 RFC 5849. """ -from __future__ import absolute_import, unicode_literals - import logging from .. import errors diff --git a/libs/common/oauthlib/oauth1/rfc5849/errors.py b/libs/common/oauthlib/oauth1/rfc5849/errors.py index a5c59bdd..8774d407 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/errors.py +++ b/libs/common/oauthlib/oauth1/rfc5849/errors.py @@ -1,4 +1,3 @@ -# coding=utf-8 """ oauthlib.oauth1.rfc5849.errors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,8 +5,6 @@ oauthlib.oauth1.rfc5849.errors Error used both by OAuth 1 clients and provicers to represent the spec defined error responses for all four core grant types. """ -from __future__ import unicode_literals - from oauthlib.common import add_params_to_uri, urlencode @@ -37,10 +34,10 @@ class OAuth1Error(Exception): request: Oauthlib Request object """ self.description = description or self.description - message = '(%s) %s' % (self.error, self.description) + message = '({}) {}'.format(self.error, self.description) if request: message += ' ' + repr(request) - super(OAuth1Error, self).__init__(message) + super().__init__(message) self.uri = uri self.status_code = status_code diff --git a/libs/common/oauthlib/oauth1/rfc5849/parameters.py b/libs/common/oauthlib/oauth1/rfc5849/parameters.py index db4400e9..2163772d 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/parameters.py +++ b/libs/common/oauthlib/oauth1/rfc5849/parameters.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.parameters ~~~~~~~~~~~~~~~~~~~ @@ -7,17 +6,12 @@ This module contains methods related to `section 3.5`_ of the OAuth 1.0a spec. .. _`section 3.5`: https://tools.ietf.org/html/rfc5849#section-3.5 """ -from __future__ import absolute_import, unicode_literals +from urllib.parse import urlparse, urlunparse from oauthlib.common import extract_params, urlencode from . import utils -try: - from urlparse import urlparse, urlunparse -except ImportError: # noqa - from urllib.parse import urlparse, urlunparse - # TODO: do we need filter_params now that oauth_params are handled by Request? # We can easily pass in just oauth protocol params. @@ -61,7 +55,7 @@ def prepare_headers(oauth_params, headers=None, realm=None): # 2. Each parameter's name is immediately followed by an "=" character # (ASCII code 61), a """ character (ASCII code 34), the parameter # value (MAY be empty), and another """ character (ASCII code 34). - part = '{0}="{1}"'.format(escaped_name, escaped_value) + part = '{}="{}"'.format(escaped_name, escaped_value) authorization_header_parameters_parts.append(part) diff --git a/libs/common/oauthlib/oauth1/rfc5849/request_validator.py b/libs/common/oauthlib/oauth1/rfc5849/request_validator.py index 330bcbb8..e937aabf 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/request_validator.py +++ b/libs/common/oauthlib/oauth1/rfc5849/request_validator.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth1.rfc5849 ~~~~~~~~~~~~~~ @@ -6,14 +5,10 @@ oauthlib.oauth1.rfc5849 This module is an implementation of various logic needed for signing and checking OAuth 1.0 RFC 5849 requests. """ -from __future__ import absolute_import, unicode_literals - -import sys - from . import SIGNATURE_METHODS, utils -class RequestValidator(object): +class RequestValidator: """A validator/datastore interaction base class for OAuth 1 providers. @@ -24,7 +19,7 @@ class RequestValidator(object): Methods used to check the format of input parameters. Common tests include length, character set, membership, range or pattern. These tests are referred to as `whitelisting or blacklisting`_. Whitelisting is better - but blacklisting can be usefull to spot malicious activity. + but blacklisting can be useful to spot malicious activity. The following have methods a default implementation: - check_client_key @@ -197,7 +192,7 @@ class RequestValidator(object): def check_realms(self, realms): """Check that the realm is one of a set allowed realms.""" - return all((r in self.realms for r in realms)) + return all(r in self.realms for r in realms) def _subclass_must_implement(self, fn): """ @@ -448,7 +443,7 @@ class RequestValidator(object): :type request: oauthlib.common.Request :returns: None - Per `Section 2.3`__ of the spec: + Per `Section 2.3`_ of the spec: "The server MUST (...) ensure that the temporary credentials have not expired or been used before." @@ -836,7 +831,7 @@ class RequestValidator(object): """Associate an authorization verifier with a request token. :param token: A request token string. - :param verifier A dictionary containing the oauth_verifier and + :param verifier: A dictionary containing the oauth_verifier and oauth_token :param request: OAuthlib request. :type request: oauthlib.common.Request diff --git a/libs/common/oauthlib/oauth1/rfc5849/signature.py b/libs/common/oauthlib/oauth1/rfc5849/signature.py index a60bee29..9cb1a517 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/signature.py +++ b/libs/common/oauthlib/oauth1/rfc5849/signature.py @@ -1,74 +1,70 @@ -# -*- coding: utf-8 -*- """ -oauthlib.oauth1.rfc5849.signature -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This module is an implementation of `section 3.4`_ of RFC 5849. -This module represents a direct implementation of `section 3.4`_ of the spec. - -Terminology: - * Client: software interfacing with an OAuth API - * Server: the API provider - * Resource Owner: the user who is granting authorization to the client +**Usage** Steps for signing a request: -1. Collect parameters from the uri query, auth header, & body -2. Normalize those parameters -3. Normalize the uri -4. Pass the normalized uri, normalized parameters, and http method to - construct the base string -5. Pass the base string and any keys needed to a signing function +1. Collect parameters from the request using ``collect_parameters``. +2. Normalize those parameters using ``normalize_parameters``. +3. Create the *base string URI* using ``base_string_uri``. +4. Create the *signature base string* from the above three components + using ``signature_base_string``. +5. Pass the *signature base string* and the client credentials to one of the + sign-with-client functions. The HMAC-based signing functions needs + client credentials with secrets. The RSA-based signing functions needs + client credentials with an RSA private key. + +To verify a request, pass the request and credentials to one of the verify +functions. The HMAC-based signing functions needs the shared secrets. The +RSA-based verify functions needs the RSA public key. + +**Scope** + +All of the functions in this module should be considered internal to OAuthLib, +since they are not imported into the "oauthlib.oauth1" module. Programs using +OAuthLib should not use directly invoke any of the functions in this module. + +**Deprecated functions** + +The "sign_" methods that are not "_with_client" have been deprecated. They may +be removed in a future release. Since they are all internal functions, this +should have no impact on properly behaving programs. .. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 """ -from __future__ import absolute_import, unicode_literals import binascii import hashlib import hmac +import ipaddress import logging +import urllib.parse as urlparse +import warnings -from oauthlib.common import (extract_params, safe_string_equals, - unicode_type, urldecode) +from oauthlib.common import extract_params, safe_string_equals, urldecode from . import utils -try: - import urlparse -except ImportError: - import urllib.parse as urlparse - log = logging.getLogger(__name__) -def signature_base_string(http_method, base_str_uri, - normalized_encoded_request_parameters): - """**Construct the signature base string.** - Per `section 3.4.1.1`_ of the spec. +# ==== Common functions ========================================== - For example, the HTTP request:: +def signature_base_string( + http_method: str, + base_str_uri: str, + normalized_encoded_request_parameters: str) -> str: + """ + Construct the signature base string. - POST /request?b5=%3D%253D&a3=a&c%40=&a2=r%20b HTTP/1.1 - Host: example.com - Content-Type: application/x-www-form-urlencoded - Authorization: OAuth realm="Example", - oauth_consumer_key="9djdj82h48djs9d2", - oauth_token="kkk9d7dh3k39sjv7", - oauth_signature_method="HMAC-SHA1", - oauth_timestamp="137131201", - oauth_nonce="7d8f3e4a", - oauth_signature="bYT5CMsGcbgUdFHObYMEfcx6bsw%3D" + The *signature base string* is the value that is calculated and signed by + the client. It is also independently calculated by the server to verify + the signature, and therefore must produce the exact same value at both + ends or the signature won't verify. - c2&a3=2+q - - is represented by the following signature base string (line breaks - are for display purposes only):: - - POST&http%3A%2F%2Fexample.com%2Frequest&a2%3Dr%2520b%26a3%3D2%2520q - %26a3%3Da%26b5%3D%253D%25253D%26c%2540%3D%26c2%3D%26oauth_consumer_ - key%3D9djdj82h48djs9d2%26oauth_nonce%3D7d8f3e4a%26oauth_signature_m - ethod%3DHMAC-SHA1%26oauth_timestamp%3D137131201%26oauth_token%3Dkkk - 9d7dh3k39sjv7 + The rules for calculating the *signature base string* are defined in + section 3.4.1.1`_ of RFC 5849. .. _`section 3.4.1.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.1 """ @@ -90,7 +86,7 @@ def signature_base_string(http_method, base_str_uri, # (`Section 3.6`_). # # .. _`Section 3.4.1.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.2 - # .. _`Section 3.4.6`: https://tools.ietf.org/html/rfc5849#section-3.4.6 + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 base_string += utils.escape(base_str_uri) # 4. An "&" character (ASCII code 38). @@ -99,40 +95,48 @@ def signature_base_string(http_method, base_str_uri, # 5. The request parameters as normalized in `Section 3.4.1.3.2`_, after # being encoded (`Section 3.6`). # - # .. _`Section 3.4.1.3.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.2 - # .. _`Section 3.4.6`: https://tools.ietf.org/html/rfc5849#section-3.4.6 + # .. _`Sec 3.4.1.3.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.2 + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 base_string += utils.escape(normalized_encoded_request_parameters) return base_string -def base_string_uri(uri, host=None): - """**Base String URI** - Per `section 3.4.1.2`_ of RFC 5849. +def base_string_uri(uri: str, host: str = None) -> str: + """ + Calculates the _base string URI_. - For example, the HTTP request:: + The *base string URI* is one of the components that make up the + *signature base string*. - GET /r%20v/X?id=123 HTTP/1.1 - Host: EXAMPLE.COM:80 + The ``host`` is optional. If provided, it is used to override any host and + port values in the ``uri``. The value for ``host`` is usually extracted from + the "Host" request header from the HTTP request. Its value may be just the + hostname, or the hostname followed by a colon and a TCP/IP port number + (hostname:port). If a value for the``host`` is provided but it does not + contain a port number, the default port number is used (i.e. if the ``uri`` + contained a port number, it will be discarded). - is represented by the base string URI: "http://example.com/r%20v/X". - - In another example, the HTTPS request:: - - GET /?q=1 HTTP/1.1 - Host: www.example.net:8080 - - is represented by the base string URI: "https://www.example.net:8080/". + The rules for calculating the *base string URI* are defined in + section 3.4.1.2`_ of RFC 5849. .. _`section 3.4.1.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.2 - The host argument overrides the netloc part of the uri argument. + :param uri: URI + :param host: hostname with optional port number, separated by a colon + :return: base string URI """ - if not isinstance(uri, unicode_type): - raise ValueError('uri must be a unicode object.') + + if not isinstance(uri, str): + raise ValueError('uri must be a string.') # FIXME: urlparse does not support unicode - scheme, netloc, path, params, query, fragment = urlparse.urlparse(uri) + output = urlparse.urlparse(uri) + scheme = output.scheme + hostname = output.hostname + port = output.port + path = output.path + params = output.params # The scheme, authority, and path of the request resource URI `RFC3986` # are included by constructing an "http" or "https" URI representing @@ -140,26 +144,36 @@ def base_string_uri(uri, host=None): # # .. _`RFC3986`: https://tools.ietf.org/html/rfc3986 - if not scheme or not netloc: - raise ValueError('uri must include a scheme and netloc') + if not scheme: + raise ValueError('missing scheme') # Per `RFC 2616 section 5.1.2`_: # # Note that the absolute path cannot be empty; if none is present in # the original URI, it MUST be given as "/" (the server root). # - # .. _`RFC 2616 section 5.1.2`: https://tools.ietf.org/html/rfc2616#section-5.1.2 + # .. _`RFC 2616 5.1.2`: https://tools.ietf.org/html/rfc2616#section-5.1.2 if not path: path = '/' # 1. The scheme and host MUST be in lowercase. scheme = scheme.lower() - netloc = netloc.lower() + # Note: if ``host`` is used, it will be converted to lowercase below + if hostname is not None: + hostname = hostname.lower() # 2. The host and port values MUST match the content of the HTTP # request "Host" header field. if host is not None: - netloc = host.lower() + # NOTE: override value in uri with provided host + # Host argument is equal to netloc. It means it's missing scheme. + # Add it back, before parsing. + + host = host.lower() + host = f"{scheme}://{host}" + output = urlparse.urlparse(host) + hostname = output.hostname + port = output.port # 3. The port MUST be included if it is not the default port for the # scheme, and MUST be excluded if it is the default. Specifically, @@ -169,14 +183,29 @@ def base_string_uri(uri, host=None): # # .. _`RFC2616`: https://tools.ietf.org/html/rfc2616 # .. _`RFC2818`: https://tools.ietf.org/html/rfc2818 - default_ports = ( - ('http', '80'), - ('https', '443'), - ) - if ':' in netloc: - host, port = netloc.split(':', 1) - if (scheme, port) in default_ports: - netloc = host + + if hostname is None: + raise ValueError('missing host') + + # NOTE: Try guessing if we're dealing with IP or hostname + try: + hostname = ipaddress.ip_address(hostname) + except ValueError: + pass + + if isinstance(hostname, ipaddress.IPv6Address): + hostname = f"[{hostname}]" + elif isinstance(hostname, ipaddress.IPv4Address): + hostname = f"{hostname}" + + if port is not None and not (0 < port <= 65535): + raise ValueError('port out of range') # 16-bit unsigned ints + if (scheme, port) in (('http', 80), ('https', 443)): + netloc = hostname # default port for scheme: exclude port num + elif port: + netloc = f"{hostname}:{port}" # use hostname:port + else: + netloc = hostname v = urlparse.urlunparse((scheme, netloc, path, params, '', '')) @@ -205,77 +234,29 @@ def base_string_uri(uri, host=None): return v.replace(' ', '%20') -# ** Request Parameters ** -# -# Per `section 3.4.1.3`_ of the spec. -# -# In order to guarantee a consistent and reproducible representation of -# the request parameters, the parameters are collected and decoded to -# their original decoded form. They are then sorted and encoded in a -# particular manner that is often different from their original -# encoding scheme, and concatenated into a single string. -# -# .. _`section 3.4.1.3`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3 - -def collect_parameters(uri_query='', body=[], headers=None, +def collect_parameters(uri_query='', body=None, headers=None, exclude_oauth_signature=True, with_realm=False): - """**Parameter Sources** + """ + Gather the request parameters from all the parameter sources. + + This function is used to extract all the parameters, which are then passed + to ``normalize_parameters`` to produce one of the components that make up + the *signature base string*. Parameters starting with `oauth_` will be unescaped. Body parameters must be supplied as a dict, a list of 2-tuples, or a - formencoded query string. + form encoded query string. Headers must be supplied as a dict. - Per `section 3.4.1.3.1`_ of the spec. + The rules where the parameters must be sourced from are defined in + `section 3.4.1.3.1`_ of RFC 5849. - For example, the HTTP request:: - - POST /request?b5=%3D%253D&a3=a&c%40=&a2=r%20b HTTP/1.1 - Host: example.com - Content-Type: application/x-www-form-urlencoded - Authorization: OAuth realm="Example", - oauth_consumer_key="9djdj82h48djs9d2", - oauth_token="kkk9d7dh3k39sjv7", - oauth_signature_method="HMAC-SHA1", - oauth_timestamp="137131201", - oauth_nonce="7d8f3e4a", - oauth_signature="djosJKDKJSD8743243%2Fjdk33klY%3D" - - c2&a3=2+q - - contains the following (fully decoded) parameters used in the - signature base sting:: - - +------------------------+------------------+ - | Name | Value | - +------------------------+------------------+ - | b5 | =%3D | - | a3 | a | - | c@ | | - | a2 | r b | - | oauth_consumer_key | 9djdj82h48djs9d2 | - | oauth_token | kkk9d7dh3k39sjv7 | - | oauth_signature_method | HMAC-SHA1 | - | oauth_timestamp | 137131201 | - | oauth_nonce | 7d8f3e4a | - | c2 | | - | a3 | 2 q | - +------------------------+------------------+ - - Note that the value of "b5" is "=%3D" and not "==". Both "c@" and - "c2" have empty values. While the encoding rules specified in this - specification for the purpose of constructing the signature base - string exclude the use of a "+" character (ASCII code 43) to - represent an encoded space character (ASCII code 32), this practice - is widely used in "application/x-www-form-urlencoded" encoded values, - and MUST be properly decoded, as demonstrated by one of the "a3" - parameter instances (the "a3" parameter is used twice in this - request). - - .. _`section 3.4.1.3.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.1 + .. _`Sec 3.4.1.3.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.1 """ + if body is None: + body = [] headers = headers or {} params = [] @@ -286,11 +267,11 @@ def collect_parameters(uri_query='', body=[], headers=None, # `RFC3986, Section 3.4`_. The query component is parsed into a list # of name/value pairs by treating it as an # "application/x-www-form-urlencoded" string, separating the names - # and values and decoding them as defined by - # `W3C.REC-html40-19980424`_, Section 17.13.4. + # and values and decoding them as defined by W3C.REC-html40-19980424 + # `W3C-HTML-4.0`_, Section 17.13.4. # - # .. _`RFC3986, Section 3.4`: https://tools.ietf.org/html/rfc3986#section-3.4 - # .. _`W3C.REC-html40-19980424`: https://tools.ietf.org/html/rfc5849#ref-W3C.REC-html40-19980424 + # .. _`RFC3986, Sec 3.4`: https://tools.ietf.org/html/rfc3986#section-3.4 + # .. _`W3C-HTML-4.0`: https://www.w3.org/TR/1998/REC-html40-19980424/ if uri_query: params.extend(urldecode(uri_query)) @@ -301,7 +282,7 @@ def collect_parameters(uri_query='', body=[], headers=None, # # .. _`Section 3.5.1`: https://tools.ietf.org/html/rfc5849#section-3.5.1 if headers: - headers_lower = dict((k.lower(), v) for k, v in headers.items()) + headers_lower = {k.lower(): v for k, v in headers.items()} authorization_header = headers_lower.get('authorization') if authorization_header is not None: params.extend([i for i in utils.parse_authorization_header( @@ -313,12 +294,12 @@ def collect_parameters(uri_query='', body=[], headers=None, # # * The entity-body follows the encoding requirements of the # "application/x-www-form-urlencoded" content-type as defined by - # `W3C.REC-html40-19980424`_. + # W3C.REC-html40-19980424 `W3C-HTML-4.0`_. # * The HTTP request entity-header includes the "Content-Type" # header field set to "application/x-www-form-urlencoded". # - # .._`W3C.REC-html40-19980424`: https://tools.ietf.org/html/rfc5849#ref-W3C.REC-html40-19980424 + # .. _`W3C-HTML-4.0`: https://www.w3.org/TR/1998/REC-html40-19980424/ # TODO: enforce header param inclusion conditions bodyparams = extract_params(body) or [] @@ -340,75 +321,17 @@ def collect_parameters(uri_query='', body=[], headers=None, return unescaped_params -def normalize_parameters(params): - """**Parameters Normalization** - Per `section 3.4.1.3.2`_ of the spec. +def normalize_parameters(params) -> str: + """ + Calculate the normalized request parameters. - For example, the list of parameters from the previous section would - be normalized as follows: + The *normalized request parameters* is one of the components that make up + the *signature base string*. - Encoded:: + The rules for parameter normalization are defined in `section 3.4.1.3.2`_ of + RFC 5849. - +------------------------+------------------+ - | Name | Value | - +------------------------+------------------+ - | b5 | %3D%253D | - | a3 | a | - | c%40 | | - | a2 | r%20b | - | oauth_consumer_key | 9djdj82h48djs9d2 | - | oauth_token | kkk9d7dh3k39sjv7 | - | oauth_signature_method | HMAC-SHA1 | - | oauth_timestamp | 137131201 | - | oauth_nonce | 7d8f3e4a | - | c2 | | - | a3 | 2%20q | - +------------------------+------------------+ - - Sorted:: - - +------------------------+------------------+ - | Name | Value | - +------------------------+------------------+ - | a2 | r%20b | - | a3 | 2%20q | - | a3 | a | - | b5 | %3D%253D | - | c%40 | | - | c2 | | - | oauth_consumer_key | 9djdj82h48djs9d2 | - | oauth_nonce | 7d8f3e4a | - | oauth_signature_method | HMAC-SHA1 | - | oauth_timestamp | 137131201 | - | oauth_token | kkk9d7dh3k39sjv7 | - +------------------------+------------------+ - - Concatenated Pairs:: - - +-------------------------------------+ - | Name=Value | - +-------------------------------------+ - | a2=r%20b | - | a3=2%20q | - | a3=a | - | b5=%3D%253D | - | c%40= | - | c2= | - | oauth_consumer_key=9djdj82h48djs9d2 | - | oauth_nonce=7d8f3e4a | - | oauth_signature_method=HMAC-SHA1 | - | oauth_timestamp=137131201 | - | oauth_token=kkk9d7dh3k39sjv7 | - +-------------------------------------+ - - and concatenated together into a single string (line breaks are for - display purposes only):: - - a2=r%20b&a3=2%20q&a3=a&b5=%3D%253D&c%40=&c2=&oauth_consumer_key=9dj - dj82h48djs9d2&oauth_nonce=7d8f3e4a&oauth_signature_method=HMAC-SHA1 - &oauth_timestamp=137131201&oauth_token=kkk9d7dh3k39sjv7 - - .. _`section 3.4.1.3.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.2 + .. _`Sec 3.4.1.3.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.2 """ # The parameters collected in `Section 3.4.1.3`_ are normalized into a @@ -430,7 +353,7 @@ def normalize_parameters(params): # 3. The name of each parameter is concatenated to its corresponding # value using an "=" character (ASCII code 61) as a separator, even # if the value is empty. - parameter_parts = ['{0}={1}'.format(k, v) for k, v in key_values] + parameter_parts = ['{}={}'.format(k, v) for k, v in key_values] # 4. The sorted name/value pairs are concatenated together into a # single string by using an "&" character (ASCII code 38) as @@ -438,72 +361,14 @@ def normalize_parameters(params): return '&'.join(parameter_parts) -def sign_hmac_sha1_with_client(base_string, client): - return sign_hmac_sha1(base_string, - client.client_secret, - client.resource_owner_secret - ) +# ==== Common functions for HMAC-based signature methods ========= - -def sign_hmac_sha1(base_string, client_secret, resource_owner_secret): - """**HMAC-SHA1** - - The "HMAC-SHA1" signature method uses the HMAC-SHA1 signature - algorithm as defined in `RFC2104`_:: - - digest = HMAC-SHA1 (key, text) - - Per `section 3.4.2`_ of the spec. - - .. _`RFC2104`: https://tools.ietf.org/html/rfc2104 - .. _`section 3.4.2`: https://tools.ietf.org/html/rfc5849#section-3.4.2 +def _sign_hmac(hash_algorithm_name: str, + sig_base_str: str, + client_secret: str, + resource_owner_secret: str): """ - - # The HMAC-SHA1 function variables are used in following way: - - # text is set to the value of the signature base string from - # `Section 3.4.1.1`_. - # - # .. _`Section 3.4.1.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.1 - text = base_string - - # key is set to the concatenated values of: - # 1. The client shared-secret, after being encoded (`Section 3.6`_). - # - # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 - key = utils.escape(client_secret or '') - - # 2. An "&" character (ASCII code 38), which MUST be included - # even when either secret is empty. - key += '&' - - # 3. The token shared-secret, after being encoded (`Section 3.6`_). - # - # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 - key += utils.escape(resource_owner_secret or '') - - # FIXME: HMAC does not support unicode! - key_utf8 = key.encode('utf-8') - text_utf8 = text.encode('utf-8') - signature = hmac.new(key_utf8, text_utf8, hashlib.sha1) - - # digest is used to set the value of the "oauth_signature" protocol - # parameter, after the result octet string is base64-encoded - # per `RFC2045, Section 6.8`. - # - # .. _`RFC2045, Section 6.8`: https://tools.ietf.org/html/rfc2045#section-6.8 - return binascii.b2a_base64(signature.digest())[:-1].decode('utf-8') - - -def sign_hmac_sha256_with_client(base_string, client): - return sign_hmac_sha256(base_string, - client.client_secret, - client.resource_owner_secret - ) - - -def sign_hmac_sha256(base_string, client_secret, resource_owner_secret): - """**HMAC-SHA256** + **HMAC-SHA256** The "HMAC-SHA256" signature method uses the HMAC-SHA256 signature algorithm as defined in `RFC4634`_:: @@ -522,7 +387,7 @@ def sign_hmac_sha256(base_string, client_secret, resource_owner_secret): # `Section 3.4.1.1`_. # # .. _`Section 3.4.1.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.1 - text = base_string + text = sig_base_str # key is set to the concatenated values of: # 1. The client shared-secret, after being encoded (`Section 3.6`_). @@ -539,57 +404,403 @@ def sign_hmac_sha256(base_string, client_secret, resource_owner_secret): # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 key += utils.escape(resource_owner_secret or '') + # Get the hashing algorithm to use + + m = { + 'SHA-1': hashlib.sha1, + 'SHA-256': hashlib.sha256, + 'SHA-512': hashlib.sha512, + } + hash_alg = m[hash_algorithm_name] + + # Calculate the signature + # FIXME: HMAC does not support unicode! key_utf8 = key.encode('utf-8') text_utf8 = text.encode('utf-8') - signature = hmac.new(key_utf8, text_utf8, hashlib.sha256) + signature = hmac.new(key_utf8, text_utf8, hash_alg) # digest is used to set the value of the "oauth_signature" protocol # parameter, after the result octet string is base64-encoded # per `RFC2045, Section 6.8`. # - # .. _`RFC2045, Section 6.8`: https://tools.ietf.org/html/rfc2045#section-6.8 + # .. _`RFC2045, Sec 6.8`: https://tools.ietf.org/html/rfc2045#section-6.8 return binascii.b2a_base64(signature.digest())[:-1].decode('utf-8') -_jwtrs1 = None -#jwt has some nice pycrypto/cryptography abstractions -def _jwt_rs1_signing_algorithm(): - global _jwtrs1 - if _jwtrs1 is None: - import jwt.algorithms as jwtalgo - _jwtrs1 = jwtalgo.RSAAlgorithm(jwtalgo.hashes.SHA1) - return _jwtrs1 +def _verify_hmac(hash_algorithm_name: str, + request, + client_secret=None, + resource_owner_secret=None): + """Verify a HMAC-SHA1 signature. -def sign_rsa_sha1(base_string, rsa_private_key): - """**RSA-SHA1** + Per `section 3.4`_ of the spec. - Per `section 3.4.3`_ of the spec. + .. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 - The "RSA-SHA1" signature method uses the RSASSA-PKCS1-v1_5 signature - algorithm as defined in `RFC3447, Section 8.2`_ (also known as - PKCS#1), using SHA-1 as the hash function for EMSA-PKCS1-v1_5. To + To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri + attribute MUST be an absolute URI whose netloc part identifies the + origin server or gateway on which the resource resides. Any Host + item of the request argument's headers dict attribute will be + ignored. + + .. _`RFC2616 section 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 + + """ + norm_params = normalize_parameters(request.params) + bs_uri = base_string_uri(request.uri) + sig_base_str = signature_base_string(request.http_method, bs_uri, + norm_params) + signature = _sign_hmac(hash_algorithm_name, sig_base_str, + client_secret, resource_owner_secret) + match = safe_string_equals(signature, request.signature) + if not match: + log.debug('Verify HMAC failed: signature base string: %s', sig_base_str) + return match + + +# ==== HMAC-SHA1 ================================================= + +def sign_hmac_sha1_with_client(sig_base_str, client): + return _sign_hmac('SHA-1', sig_base_str, + client.client_secret, client.resource_owner_secret) + + +def verify_hmac_sha1(request, client_secret=None, resource_owner_secret=None): + return _verify_hmac('SHA-1', request, client_secret, resource_owner_secret) + + +def sign_hmac_sha1(base_string, client_secret, resource_owner_secret): + """ + Deprecated function for calculating a HMAC-SHA1 signature. + + This function has been replaced by invoking ``sign_hmac`` with "SHA-1" + as the hash algorithm name. + + This function was invoked by sign_hmac_sha1_with_client and + test_signatures.py, but does any application invoke it directly? If not, + it can be removed. + """ + warnings.warn('use sign_hmac_sha1_with_client instead of sign_hmac_sha1', + DeprecationWarning) + + # For some unknown reason, the original implementation assumed base_string + # could either be bytes or str. The signature base string calculating + # function always returned a str, so the new ``sign_rsa`` only expects that. + + base_string = base_string.decode('ascii') \ + if isinstance(base_string, bytes) else base_string + + return _sign_hmac('SHA-1', base_string, + client_secret, resource_owner_secret) + + +# ==== HMAC-SHA256 =============================================== + +def sign_hmac_sha256_with_client(sig_base_str, client): + return _sign_hmac('SHA-256', sig_base_str, + client.client_secret, client.resource_owner_secret) + + +def verify_hmac_sha256(request, client_secret=None, resource_owner_secret=None): + return _verify_hmac('SHA-256', request, + client_secret, resource_owner_secret) + + +def sign_hmac_sha256(base_string, client_secret, resource_owner_secret): + """ + Deprecated function for calculating a HMAC-SHA256 signature. + + This function has been replaced by invoking ``sign_hmac`` with "SHA-256" + as the hash algorithm name. + + This function was invoked by sign_hmac_sha256_with_client and + test_signatures.py, but does any application invoke it directly? If not, + it can be removed. + """ + warnings.warn( + 'use sign_hmac_sha256_with_client instead of sign_hmac_sha256', + DeprecationWarning) + + # For some unknown reason, the original implementation assumed base_string + # could either be bytes or str. The signature base string calculating + # function always returned a str, so the new ``sign_rsa`` only expects that. + + base_string = base_string.decode('ascii') \ + if isinstance(base_string, bytes) else base_string + + return _sign_hmac('SHA-256', base_string, + client_secret, resource_owner_secret) + + +# ==== HMAC-SHA512 =============================================== + +def sign_hmac_sha512_with_client(sig_base_str: str, + client): + return _sign_hmac('SHA-512', sig_base_str, + client.client_secret, client.resource_owner_secret) + + +def verify_hmac_sha512(request, + client_secret: str = None, + resource_owner_secret: str = None): + return _verify_hmac('SHA-512', request, + client_secret, resource_owner_secret) + + +# ==== Common functions for RSA-based signature methods ========== + +_jwt_rsa = {} # cache of RSA-hash implementations from PyJWT jwt.algorithms + + +def _get_jwt_rsa_algorithm(hash_algorithm_name: str): + """ + Obtains an RSAAlgorithm object that implements RSA with the hash algorithm. + + This method maintains the ``_jwt_rsa`` cache. + + Returns a jwt.algorithm.RSAAlgorithm. + """ + if hash_algorithm_name in _jwt_rsa: + # Found in cache: return it + return _jwt_rsa[hash_algorithm_name] + else: + # Not in cache: instantiate a new RSAAlgorithm + + # PyJWT has some nice pycrypto/cryptography abstractions + import jwt.algorithms as jwt_algorithms + m = { + 'SHA-1': jwt_algorithms.hashes.SHA1, + 'SHA-256': jwt_algorithms.hashes.SHA256, + 'SHA-512': jwt_algorithms.hashes.SHA512, + } + v = jwt_algorithms.RSAAlgorithm(m[hash_algorithm_name]) + + _jwt_rsa[hash_algorithm_name] = v # populate cache + + return v + + +def _prepare_key_plus(alg, keystr): + """ + Prepare a PEM encoded key (public or private), by invoking the `prepare_key` + method on alg with the keystr. + + The keystr should be a string or bytes. If the keystr is bytes, it is + decoded as UTF-8 before being passed to prepare_key. Otherwise, it + is passed directly. + """ + if isinstance(keystr, bytes): + keystr = keystr.decode('utf-8') + return alg.prepare_key(keystr) + + +def _sign_rsa(hash_algorithm_name: str, + sig_base_str: str, + rsa_private_key: str): + """ + Calculate the signature for an RSA-based signature method. + + The ``alg`` is used to calculate the digest over the signature base string. + For the "RSA_SHA1" signature method, the alg must be SHA-1. While OAuth 1.0a + only defines the RSA-SHA1 signature method, this function can be used for + other non-standard signature methods that only differ from RSA-SHA1 by the + digest algorithm. + + Signing for the RSA-SHA1 signature method is defined in + `section 3.4.3`_ of RFC 5849. + + The RSASSA-PKCS1-v1_5 signature algorithm used defined by + `RFC3447, Section 8.2`_ (also known as PKCS#1), with the `alg` as the + hash function for EMSA-PKCS1-v1_5. To use this method, the client MUST have established client credentials with the server that included its RSA public key (in a manner that is beyond the scope of this specification). .. _`section 3.4.3`: https://tools.ietf.org/html/rfc5849#section-3.4.3 .. _`RFC3447, Section 8.2`: https://tools.ietf.org/html/rfc3447#section-8.2 - """ - if isinstance(base_string, unicode_type): - base_string = base_string.encode('utf-8') - # TODO: finish RSA documentation - alg = _jwt_rs1_signing_algorithm() + + # Get the implementation of RSA-hash + + alg = _get_jwt_rsa_algorithm(hash_algorithm_name) + + # Check private key + + if not rsa_private_key: + raise ValueError('rsa_private_key required for RSA with ' + + alg.hash_alg.name + ' signature method') + + # Convert the "signature base string" into a sequence of bytes (M) + # + # The signature base string, by definition, only contain printable US-ASCII + # characters. So encoding it as 'ascii' will always work. It will raise a + # ``UnicodeError`` if it can't encode the value, which will never happen + # if the signature base string was created correctly. Therefore, using + # 'ascii' encoding provides an extra level of error checking. + + m = sig_base_str.encode('ascii') + + # Perform signing: S = RSASSA-PKCS1-V1_5-SIGN (K, M) + key = _prepare_key_plus(alg, rsa_private_key) - s=alg.sign(base_string, key) - return binascii.b2a_base64(s)[:-1].decode('utf-8') + s = alg.sign(m, key) + + # base64-encoded per RFC2045 section 6.8. + # + # 1. While b2a_base64 implements base64 defined by RFC 3548. As used here, + # it is the same as base64 defined by RFC 2045. + # 2. b2a_base64 includes a "\n" at the end of its result ([:-1] removes it) + # 3. b2a_base64 produces a binary string. Use decode to produce a str. + # It should only contain only printable US-ASCII characters. + + return binascii.b2a_base64(s)[:-1].decode('ascii') -def sign_rsa_sha1_with_client(base_string, client): - if not client.rsa_key: - raise ValueError('rsa_key is required when using RSA signature method.') - return sign_rsa_sha1(base_string, client.rsa_key) +def _verify_rsa(hash_algorithm_name: str, + request, + rsa_public_key: str): + """ + Verify a base64 encoded signature for a RSA-based signature method. + + The ``alg`` is used to calculate the digest over the signature base string. + For the "RSA_SHA1" signature method, the alg must be SHA-1. While OAuth 1.0a + only defines the RSA-SHA1 signature method, this function can be used for + other non-standard signature methods that only differ from RSA-SHA1 by the + digest algorithm. + + Verification for the RSA-SHA1 signature method is defined in + `section 3.4.3`_ of RFC 5849. + + .. _`section 3.4.3`: https://tools.ietf.org/html/rfc5849#section-3.4.3 + + To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri + attribute MUST be an absolute URI whose netloc part identifies the + origin server or gateway on which the resource resides. Any Host + item of the request argument's headers dict attribute will be + ignored. + + .. _`RFC2616 Sec 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 + """ + + try: + # Calculate the *signature base string* of the actual received request + + norm_params = normalize_parameters(request.params) + bs_uri = base_string_uri(request.uri) + sig_base_str = signature_base_string( + request.http_method, bs_uri, norm_params) + + # Obtain the signature that was received in the request + + sig = binascii.a2b_base64(request.signature.encode('ascii')) + + # Get the implementation of RSA-with-hash algorithm to use + + alg = _get_jwt_rsa_algorithm(hash_algorithm_name) + + # Verify the received signature was produced by the private key + # corresponding to the `rsa_public_key`, signing exact same + # *signature base string*. + # + # RSASSA-PKCS1-V1_5-VERIFY ((n, e), M, S) + + key = _prepare_key_plus(alg, rsa_public_key) + + # The signature base string only contain printable US-ASCII characters. + # The ``encode`` method with the default "strict" error handling will + # raise a ``UnicodeError`` if it can't encode the value. So using + # "ascii" will always work. + + verify_ok = alg.verify(sig_base_str.encode('ascii'), key, sig) + + if not verify_ok: + log.debug('Verify failed: RSA with ' + alg.hash_alg.name + + ': signature base string=%s' + sig_base_str) + return verify_ok + + except UnicodeError: + # A properly encoded signature will only contain printable US-ASCII + # characters. The ``encode`` method with the default "strict" error + # handling will raise a ``UnicodeError`` if it can't decode the value. + # So using "ascii" will work with all valid signatures. But an + # incorrectly or maliciously produced signature could contain other + # bytes. + # + # This implementation treats that situation as equivalent to the + # signature verification having failed. + # + # Note: simply changing the encode to use 'utf-8' will not remove this + # case, since an incorrect or malicious request can contain bytes which + # are invalid as UTF-8. + return False + + +# ==== RSA-SHA1 ================================================== + +def sign_rsa_sha1_with_client(sig_base_str, client): + # For some reason, this function originally accepts both str and bytes. + # This behaviour is preserved here. But won't be done for the newer + # sign_rsa_sha256_with_client and sign_rsa_sha512_with_client functions, + # which will only accept strings. The function to calculate a + # "signature base string" always produces a string, so it is not clear + # why support for bytes would ever be needed. + sig_base_str = sig_base_str.decode('ascii')\ + if isinstance(sig_base_str, bytes) else sig_base_str + + return _sign_rsa('SHA-1', sig_base_str, client.rsa_key) + + +def verify_rsa_sha1(request, rsa_public_key: str): + return _verify_rsa('SHA-1', request, rsa_public_key) + + +def sign_rsa_sha1(base_string, rsa_private_key): + """ + Deprecated function for calculating a RSA-SHA1 signature. + + This function has been replaced by invoking ``sign_rsa`` with "SHA-1" + as the hash algorithm name. + + This function was invoked by sign_rsa_sha1_with_client and + test_signatures.py, but does any application invoke it directly? If not, + it can be removed. + """ + warnings.warn('use _sign_rsa("SHA-1", ...) instead of sign_rsa_sha1', + DeprecationWarning) + + if isinstance(base_string, bytes): + base_string = base_string.decode('ascii') + + return _sign_rsa('SHA-1', base_string, rsa_private_key) + + +# ==== RSA-SHA256 ================================================ + +def sign_rsa_sha256_with_client(sig_base_str: str, client): + return _sign_rsa('SHA-256', sig_base_str, client.rsa_key) + + +def verify_rsa_sha256(request, rsa_public_key: str): + return _verify_rsa('SHA-256', request, rsa_public_key) + + +# ==== RSA-SHA512 ================================================ + +def sign_rsa_sha512_with_client(sig_base_str: str, client): + return _sign_rsa('SHA-512', sig_base_str, client.rsa_key) + + +def verify_rsa_sha512(request, rsa_public_key: str): + return _verify_rsa('SHA-512', request, rsa_public_key) + + +# ==== PLAINTEXT ================================================= + +def sign_plaintext_with_client(_signature_base_string, client): + # _signature_base_string is not used because the signature with PLAINTEXT + # is just the secret: it isn't a real signature. + return sign_plaintext(client.client_secret, client.resource_owner_secret) def sign_plaintext(client_secret, resource_owner_secret): @@ -627,108 +838,6 @@ def sign_plaintext(client_secret, resource_owner_secret): return signature -def sign_plaintext_with_client(base_string, client): - return sign_plaintext(client.client_secret, client.resource_owner_secret) - - -def verify_hmac_sha1(request, client_secret=None, - resource_owner_secret=None): - """Verify a HMAC-SHA1 signature. - - Per `section 3.4`_ of the spec. - - .. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 - - To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri - attribute MUST be an absolute URI whose netloc part identifies the - origin server or gateway on which the resource resides. Any Host - item of the request argument's headers dict attribute will be - ignored. - - .. _`RFC2616 section 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 - - """ - norm_params = normalize_parameters(request.params) - bs_uri = base_string_uri(request.uri) - sig_base_str = signature_base_string(request.http_method, bs_uri, - norm_params) - signature = sign_hmac_sha1(sig_base_str, client_secret, - resource_owner_secret) - match = safe_string_equals(signature, request.signature) - if not match: - log.debug('Verify HMAC-SHA1 failed: signature base string: %s', - sig_base_str) - return match - - -def verify_hmac_sha256(request, client_secret=None, - resource_owner_secret=None): - """Verify a HMAC-SHA256 signature. - - Per `section 3.4`_ of the spec. - - .. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 - - To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri - attribute MUST be an absolute URI whose netloc part identifies the - origin server or gateway on which the resource resides. Any Host - item of the request argument's headers dict attribute will be - ignored. - - .. _`RFC2616 section 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 - - """ - norm_params = normalize_parameters(request.params) - bs_uri = base_string_uri(request.uri) - sig_base_str = signature_base_string(request.http_method, bs_uri, - norm_params) - signature = sign_hmac_sha256(sig_base_str, client_secret, - resource_owner_secret) - match = safe_string_equals(signature, request.signature) - if not match: - log.debug('Verify HMAC-SHA256 failed: signature base string: %s', - sig_base_str) - return match - - -def _prepare_key_plus(alg, keystr): - if isinstance(keystr, bytes): - keystr = keystr.decode('utf-8') - return alg.prepare_key(keystr) - -def verify_rsa_sha1(request, rsa_public_key): - """Verify a RSASSA-PKCS #1 v1.5 base64 encoded signature. - - Per `section 3.4.3`_ of the spec. - - Note this method requires the jwt and cryptography libraries. - - .. _`section 3.4.3`: https://tools.ietf.org/html/rfc5849#section-3.4.3 - - To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri - attribute MUST be an absolute URI whose netloc part identifies the - origin server or gateway on which the resource resides. Any Host - item of the request argument's headers dict attribute will be - ignored. - - .. _`RFC2616 section 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 - """ - norm_params = normalize_parameters(request.params) - bs_uri = base_string_uri(request.uri) - sig_base_str = signature_base_string(request.http_method, bs_uri, - norm_params).encode('utf-8') - sig = binascii.a2b_base64(request.signature.encode('utf-8')) - - alg = _jwt_rs1_signing_algorithm() - key = _prepare_key_plus(alg, rsa_public_key) - - verify_ok = alg.verify(sig_base_str, key, sig) - if not verify_ok: - log.debug('Verify RSA-SHA1 failed: signature base string: %s', - sig_base_str) - return verify_ok - - def verify_plaintext(request, client_secret=None, resource_owner_secret=None): """Verify a PLAINTEXT signature. diff --git a/libs/common/oauthlib/oauth1/rfc5849/utils.py b/libs/common/oauthlib/oauth1/rfc5849/utils.py index 735f21d6..8fb8302e 100644 --- a/libs/common/oauthlib/oauth1/rfc5849/utils.py +++ b/libs/common/oauthlib/oauth1/rfc5849/utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.utils ~~~~~~~~~~~~~~ @@ -6,15 +5,9 @@ oauthlib.utils This module contains utility methods used by various parts of the OAuth spec. """ -from __future__ import absolute_import, unicode_literals - -from oauthlib.common import quote, unicode_type, unquote - -try: - import urllib2 -except ImportError: - import urllib.request as urllib2 +import urllib.request as urllib2 +from oauthlib.common import quote, unquote UNICODE_ASCII_CHARACTER_SET = ('abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -52,16 +45,16 @@ def escape(u): .. _`section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 """ - if not isinstance(u, unicode_type): + if not isinstance(u, str): raise ValueError('Only unicode objects are escapable. ' + - 'Got %r of type %s.' % (u, type(u))) + 'Got {!r} of type {}.'.format(u, type(u))) # Letters, digits, and the characters '_.-' are already treated as safe # by urllib.quote(). We need to add '~' to fully support rfc5849. return quote(u, safe=b'~') def unescape(u): - if not isinstance(u, unicode_type): + if not isinstance(u, str): raise ValueError('Only unicode objects are unescapable.') return unquote(u) diff --git a/libs/common/oauthlib/oauth2/__init__.py b/libs/common/oauthlib/oauth2/__init__.py index 3f437556..deefb1af 100644 --- a/libs/common/oauthlib/oauth2/__init__.py +++ b/libs/common/oauthlib/oauth2/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2 ~~~~~~~~~~~~~~ @@ -6,31 +5,32 @@ oauthlib.oauth2 This module is a wrapper for the most recent implementation of OAuth 2.0 Client and Server classes. """ -from __future__ import absolute_import, unicode_literals - -from .rfc6749.clients import Client -from .rfc6749.clients import WebApplicationClient -from .rfc6749.clients import MobileApplicationClient -from .rfc6749.clients import LegacyApplicationClient -from .rfc6749.clients import BackendApplicationClient -from .rfc6749.clients import ServiceApplicationClient -from .rfc6749.endpoints import AuthorizationEndpoint -from .rfc6749.endpoints import IntrospectEndpoint -from .rfc6749.endpoints import MetadataEndpoint -from .rfc6749.endpoints import TokenEndpoint -from .rfc6749.endpoints import ResourceEndpoint -from .rfc6749.endpoints import RevocationEndpoint -from .rfc6749.endpoints import Server -from .rfc6749.endpoints import WebApplicationServer -from .rfc6749.endpoints import MobileApplicationServer -from .rfc6749.endpoints import LegacyApplicationServer -from .rfc6749.endpoints import BackendApplicationServer -from .rfc6749.errors import AccessDeniedError, OAuth2Error, FatalClientError, InsecureTransportError, InvalidClientError, InvalidClientIdError, InvalidGrantError, InvalidRedirectURIError, InvalidRequestError, InvalidRequestFatalError, InvalidScopeError, MismatchingRedirectURIError, MismatchingStateError, MissingClientIdError, MissingCodeError, MissingRedirectURIError, MissingResponseTypeError, MissingTokenError, MissingTokenTypeError, ServerError, TemporarilyUnavailableError, TokenExpiredError, UnauthorizedClientError, UnsupportedGrantTypeError, UnsupportedResponseTypeError, UnsupportedTokenTypeError -from .rfc6749.grant_types import AuthorizationCodeGrant -from .rfc6749.grant_types import ImplicitGrant -from .rfc6749.grant_types import ResourceOwnerPasswordCredentialsGrant -from .rfc6749.grant_types import ClientCredentialsGrant -from .rfc6749.grant_types import RefreshTokenGrant +from .rfc6749.clients import ( + BackendApplicationClient, Client, LegacyApplicationClient, + MobileApplicationClient, ServiceApplicationClient, WebApplicationClient, +) +from .rfc6749.endpoints import ( + AuthorizationEndpoint, BackendApplicationServer, IntrospectEndpoint, + LegacyApplicationServer, MetadataEndpoint, MobileApplicationServer, + ResourceEndpoint, RevocationEndpoint, Server, TokenEndpoint, + WebApplicationServer, +) +from .rfc6749.errors import ( + AccessDeniedError, FatalClientError, InsecureTransportError, + InvalidClientError, InvalidClientIdError, InvalidGrantError, + InvalidRedirectURIError, InvalidRequestError, InvalidRequestFatalError, + InvalidScopeError, MismatchingRedirectURIError, MismatchingStateError, + MissingClientIdError, MissingCodeError, MissingRedirectURIError, + MissingResponseTypeError, MissingTokenError, MissingTokenTypeError, + OAuth2Error, ServerError, TemporarilyUnavailableError, TokenExpiredError, + UnauthorizedClientError, UnsupportedGrantTypeError, + UnsupportedResponseTypeError, UnsupportedTokenTypeError, +) +from .rfc6749.grant_types import ( + AuthorizationCodeGrant, ClientCredentialsGrant, ImplicitGrant, + RefreshTokenGrant, ResourceOwnerPasswordCredentialsGrant, +) from .rfc6749.request_validator import RequestValidator from .rfc6749.tokens import BearerToken, OAuth2Token from .rfc6749.utils import is_secure_transport +from .rfc8628.clients import DeviceClient diff --git a/libs/common/oauthlib/oauth2/rfc6749/__init__.py b/libs/common/oauthlib/oauth2/rfc6749/__init__.py index 1a4128c5..4b75a8a1 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/__init__.py +++ b/libs/common/oauthlib/oauth2/rfc6749/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749 ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,15 +5,12 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import functools import logging -from .endpoints.base import BaseEndpoint -from .endpoints.base import catch_errors_and_unavailability -from .errors import TemporarilyUnavailableError, ServerError -from .errors import FatalClientError, OAuth2Error - +from .endpoints.base import BaseEndpoint, catch_errors_and_unavailability +from .errors import ( + FatalClientError, OAuth2Error, ServerError, TemporarilyUnavailableError, +) log = logging.getLogger(__name__) diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/__init__.py b/libs/common/oauthlib/oauth2/rfc6749/clients/__init__.py index 17d00236..8fc6c955 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/__init__.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/__init__.py @@ -6,11 +6,9 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - -from .base import Client, AUTH_HEADER, URI_QUERY, BODY -from .web_application import WebApplicationClient -from .mobile_application import MobileApplicationClient -from .legacy_application import LegacyApplicationClient from .backend_application import BackendApplicationClient +from .base import AUTH_HEADER, BODY, URI_QUERY, Client +from .legacy_application import LegacyApplicationClient +from .mobile_application import MobileApplicationClient from .service_application import ServiceApplicationClient +from .web_application import WebApplicationClient diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/backend_application.py b/libs/common/oauthlib/oauth2/rfc6749/clients/backend_application.py index 57378140..e11e8fae 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/backend_application.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/backend_application.py @@ -6,9 +6,7 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - -from ..parameters import parse_token_response, prepare_token_request +from ..parameters import prepare_token_request from .base import Client @@ -41,7 +39,7 @@ class BackendApplicationClient(Client): format per `Appendix B`_ in the HTTP request entity-body: :param body: Existing request body (URL encoded string) to embed parameters - into. This may contain extra paramters. Default ''. + into. This may contain extra parameters. Default ''. :param scope: The scope of the access request as described by `Section 3.3`_. diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/base.py b/libs/common/oauthlib/oauth2/rfc6749/clients/base.py index 9b05ad5d..d5eb0cc1 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/base.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/base.py @@ -6,18 +6,22 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - +import base64 +import hashlib +import re +import secrets import time import warnings from oauthlib.common import generate_token from oauthlib.oauth2.rfc6749 import tokens -from oauthlib.oauth2.rfc6749.errors import (InsecureTransportError, - TokenExpiredError) -from oauthlib.oauth2.rfc6749.parameters import (parse_token_response, - prepare_token_request, - prepare_token_revocation_request) +from oauthlib.oauth2.rfc6749.errors import ( + InsecureTransportError, TokenExpiredError, +) +from oauthlib.oauth2.rfc6749.parameters import ( + parse_token_response, prepare_token_request, + prepare_token_revocation_request, +) from oauthlib.oauth2.rfc6749.utils import is_secure_transport AUTH_HEADER = 'auth_header' @@ -29,7 +33,7 @@ FORM_ENC_HEADERS = { } -class Client(object): +class Client: """Base OAuth2 client responsible for access token management. This class also acts as a generic interface providing methods common to all @@ -61,6 +65,9 @@ class Client(object): state=None, redirect_url=None, state_generator=generate_token, + code_verifier=None, + code_challenge=None, + code_challenge_method=None, **kwargs): """Initialize a client with commonly used attributes. @@ -99,6 +106,15 @@ class Client(object): :param state_generator: A no argument state generation callable. Defaults to :py:meth:`oauthlib.common.generate_token`. + + :param code_verifier: PKCE parameter. A cryptographically random string that is used to correlate the + authorization request to the token request. + + :param code_challenge: PKCE parameter. A challenge derived from the code verifier that is sent in the + authorization request, to be verified against later. + + :param code_challenge_method: PKCE parameter. A method that was used to derive code challenge. + Defaults to "plain" if not present in the request. """ self.client_id = client_id @@ -113,6 +129,9 @@ class Client(object): self.state_generator = state_generator self.state = state self.redirect_url = redirect_url + self.code_verifier = code_verifier + self.code_challenge = code_challenge + self.code_challenge_method = code_challenge_method self.code = None self.expires_in = None self._expires_at = None @@ -186,8 +205,8 @@ class Client(object): token_placement = token_placement or self.default_token_placement - case_insensitive_token_types = dict( - (k.lower(), v) for k, v in self.token_types.items()) + case_insensitive_token_types = { + k.lower(): v for k, v in self.token_types.items()} if not self.token_type.lower() in case_insensitive_token_types: raise ValueError("Unsupported token type: %s" % self.token_type) @@ -209,23 +228,21 @@ class Client(object): required parameters to the authorization URL. :param authorization_url: Provider authorization endpoint URL. - :param state: CSRF protection string. Will be automatically created if - not provided. The generated state is available via the ``state`` - attribute. Clients should verify that the state is unchanged and - present in the authorization response. This verification is done - automatically if using the ``authorization_response`` parameter - with ``prepare_token_request``. - + not provided. The generated state is available via the ``state`` + attribute. Clients should verify that the state is unchanged and + present in the authorization response. This verification is done + automatically if using the ``authorization_response`` parameter + with ``prepare_token_request``. :param redirect_url: Redirect URL to which the user will be returned - after authorization. Must be provided unless previously setup with - the provider. If provided then it must also be provided in the - token request. - - :param scope: - + after authorization. Must be provided unless previously setup with + the provider. If provided then it must also be provided in the + token request. + :param scope: List of scopes to request. Must be equal to + or a subset of the scopes granted when obtaining the refresh + token. If none is provided, the ones provided in the constructor are + used. :param kwargs: Additional parameters to included in the request. - :returns: The prepared request tuple with (url, headers, body). """ if not is_secure_transport(authorization_url): @@ -233,10 +250,11 @@ class Client(object): self.state = state or self.state_generator() self.redirect_url = redirect_url or self.redirect_url - self.scope = scope or self.scope + # do not assign scope to self automatically anymore + scope = self.scope if scope is None else scope auth_url = self.prepare_request_uri( authorization_url, redirect_uri=self.redirect_url, - scope=self.scope, state=self.state, **kwargs) + scope=scope, state=self.state, **kwargs) return auth_url, FORM_ENC_HEADERS, '' def prepare_token_request(self, token_url, authorization_response=None, @@ -248,22 +266,16 @@ class Client(object): credentials. :param token_url: Provider token creation endpoint URL. - :param authorization_response: The full redirection URL string, i.e. - the location to which the user was redirected after successfull - authorization. Used to mine credentials needed to obtain a token - in this step, such as authorization code. - + the location to which the user was redirected after successful + authorization. Used to mine credentials needed to obtain a token + in this step, such as authorization code. :param redirect_url: The redirect_url supplied with the authorization - request (if there was one). - + request (if there was one). :param state: - :param body: Existing request body (URL encoded string) to embed parameters - into. This may contain extra paramters. Default ''. - + into. This may contain extra parameters. Default ''. :param kwargs: Additional parameters to included in the request. - :returns: The prepared request tuple with (url, headers, body). """ if not is_secure_transport(token_url): @@ -289,26 +301,23 @@ class Client(object): obtain a new access token, and possibly a new refresh token. :param token_url: Provider token refresh endpoint URL. - :param refresh_token: Refresh token string. - :param body: Existing request body (URL encoded string) to embed parameters - into. This may contain extra paramters. Default ''. - + into. This may contain extra parameters. Default ''. :param scope: List of scopes to request. Must be equal to - or a subset of the scopes granted when obtaining the refresh - token. - + or a subset of the scopes granted when obtaining the refresh + token. If none is provided, the ones provided in the constructor are + used. :param kwargs: Additional parameters to included in the request. - :returns: The prepared request tuple with (url, headers, body). """ if not is_secure_transport(token_url): raise InsecureTransportError() - self.scope = scope or self.scope + # do not assign scope to self automatically anymore + scope = self.scope if scope is None else scope body = self.prepare_refresh_body(body=body, - refresh_token=refresh_token, scope=self.scope, **kwargs) + refresh_token=refresh_token, scope=scope, **kwargs) return token_url, FORM_ENC_HEADERS, body def prepare_token_revocation_request(self, revocation_url, token, @@ -316,20 +325,14 @@ class Client(object): """Prepare a token revocation request. :param revocation_url: Provider token revocation endpoint URL. - :param token: The access or refresh token to be revoked (string). - :param token_type_hint: ``"access_token"`` (default) or - ``"refresh_token"``. This is optional and if you wish to not pass it you - must provide ``token_type_hint=None``. - + ``"refresh_token"``. This is optional and if you wish to not pass it you + must provide ``token_type_hint=None``. :param body: - :param callback: A jsonp callback such as ``package.callback`` to be invoked - upon receiving the response. Not that it should not include a () suffix. - + upon receiving the response. Not that it should not include a () suffix. :param kwargs: Additional parameters to included in the request. - :returns: The prepared request tuple with (url, headers, body). Note that JSONP request may use GET requests as the parameters will @@ -337,7 +340,7 @@ class Client(object): An example of a revocation request - .. code-block: http + .. code-block:: http POST /revoke HTTP/1.1 Host: server.example.com @@ -348,7 +351,7 @@ class Client(object): An example of a jsonp revocation request - .. code-block: http + .. code-block:: http GET /revoke?token=agabcdefddddafdd&callback=package.myCallback HTTP/1.1 Host: server.example.com @@ -357,9 +360,9 @@ class Client(object): and an error response - .. code-block: http + .. code-block:: javascript - package.myCallback({"error":"unsupported_token_type"}); + package.myCallback({"error":"unsupported_token_type"}); Note that these requests usually require client credentials, client_id in the case for public clients and provider specific authentication @@ -382,9 +385,11 @@ class Client(object): returns an error response as described in `Section 5.2`_. :param body: The response body from the token request. - :param scope: Scopes originally requested. + :param scope: Scopes originally requested. If none is provided, the ones + provided in the constructor are used. :return: Dictionary of token parameters. - :raises: Warning if scope has changed. OAuth2Error if response is invalid. + :raises: Warning if scope has changed. :py:class:`oauthlib.oauth2.errors.OAuth2Error` + if response is invalid. These response are json encoded and could easily be parsed without the assistance of OAuthLib. However, there are a few subtle issues @@ -410,7 +415,7 @@ class Client(object): If omitted, the authorization server SHOULD provide the expiration time via other means or document the default value. - **scope** + **scope** Providers may supply this in all responses but are required to only if it has changed since the authorization request. @@ -418,6 +423,7 @@ class Client(object): .. _`Section 5.2`: https://tools.ietf.org/html/rfc6749#section-5.2 .. _`Section 7.1`: https://tools.ietf.org/html/rfc6749#section-7.1 """ + scope = self.scope if scope is None else scope self.token = parse_token_response(body, scope=scope) self.populate_token_attributes(self.token) return self.token @@ -427,21 +433,19 @@ class Client(object): If the authorization server issued a refresh token to the client, the client makes a refresh request to the token endpoint by adding the - following parameters using the "application/x-www-form-urlencoded" + following parameters using the `application/x-www-form-urlencoded` format in the HTTP request entity-body: - grant_type - REQUIRED. Value MUST be set to "refresh_token". - refresh_token - REQUIRED. The refresh token issued to the client. - scope - OPTIONAL. The scope of the access request as described by - Section 3.3. The requested scope MUST NOT include any scope - not originally granted by the resource owner, and if omitted is - treated as equal to the scope originally granted by the - resource owner. + :param refresh_token: REQUIRED. The refresh token issued to the client. + :param scope: OPTIONAL. The scope of the access request as described by + Section 3.3. The requested scope MUST NOT include any scope + not originally granted by the resource owner, and if omitted is + treated as equal to the scope originally granted by the + resource owner. Note that if none is provided, the ones provided + in the constructor are used if any. """ refresh_token = refresh_token or self.refresh_token + scope = self.scope if scope is None else scope return prepare_token_request(self.refresh_token_key, body=body, scope=scope, refresh_token=refresh_token, **kwargs) @@ -461,6 +465,91 @@ class Client(object): raise ValueError("Invalid token placement.") return uri, headers, body + def create_code_verifier(self, length): + """Create PKCE **code_verifier** used in computing **code_challenge**. + See `RFC7636 Section 4.1`_ + + :param length: REQUIRED. The length of the code_verifier. + + The client first creates a code verifier, "code_verifier", for each + OAuth 2.0 [RFC6749] Authorization Request, in the following manner: + + .. code-block:: text + + code_verifier = high-entropy cryptographic random STRING using the + unreserved characters [A-Z] / [a-z] / [0-9] / "-" / "." / "_" / "~" + from Section 2.3 of [RFC3986], with a minimum length of 43 characters + and a maximum length of 128 characters. + + .. _`RFC7636 Section 4.1`: https://tools.ietf.org/html/rfc7636#section-4.1 + """ + code_verifier = None + + if not length >= 43: + raise ValueError("Length must be greater than or equal to 43") + + if not length <= 128: + raise ValueError("Length must be less than or equal to 128") + + allowed_characters = re.compile('^[A-Zaa-z0-9-._~]') + code_verifier = secrets.token_urlsafe(length) + + if not re.search(allowed_characters, code_verifier): + raise ValueError("code_verifier contains invalid characters") + + self.code_verifier = code_verifier + + return code_verifier + + def create_code_challenge(self, code_verifier, code_challenge_method=None): + """Create PKCE **code_challenge** derived from the **code_verifier**. + See `RFC7636 Section 4.2`_ + + :param code_verifier: REQUIRED. The **code_verifier** generated from `create_code_verifier()`. + :param code_challenge_method: OPTIONAL. The method used to derive the **code_challenge**. Acceptable values include `S256`. DEFAULT is `plain`. + + The client then creates a code challenge derived from the code + verifier by using one of the following transformations on the code + verifier:: + + plain + code_challenge = code_verifier + S256 + code_challenge = BASE64URL-ENCODE(SHA256(ASCII(code_verifier))) + + If the client is capable of using `S256`, it MUST use `S256`, as + `S256` is Mandatory To Implement (MTI) on the server. Clients are + permitted to use `plain` only if they cannot support `S256` for some + technical reason and know via out-of-band configuration that the + server supports `plain`. + + The plain transformation is for compatibility with existing + deployments and for constrained environments that can't use the S256 transformation. + + .. _`RFC7636 Section 4.2`: https://tools.ietf.org/html/rfc7636#section-4.2 + """ + code_challenge = None + + if code_verifier == None: + raise ValueError("Invalid code_verifier") + + if code_challenge_method == None: + code_challenge_method = "plain" + self.code_challenge_method = code_challenge_method + code_challenge = code_verifier + self.code_challenge = code_challenge + + if code_challenge_method == "S256": + h = hashlib.sha256() + h.update(code_verifier.encode(encoding='ascii')) + sha256_val = h.digest() + code_challenge = bytes.decode(base64.urlsafe_b64encode(sha256_val)) + # replace '+' with '-', '/' with '_', and remove trailing '=' + code_challenge = code_challenge.replace("+", "-").replace("/", "_").replace("=", "") + self.code_challenge = code_challenge + + return code_challenge + def _add_mac_token(self, uri, http_method='GET', body=None, headers=None, token_placement=AUTH_HEADER, ext=None, **kwargs): """Add a MAC token to the request authorization header. @@ -503,7 +592,10 @@ class Client(object): self._expires_at = time.time() + int(self.expires_in) if 'expires_at' in response: - self._expires_at = int(response.get('expires_at')) + try: + self._expires_at = int(response.get('expires_at')) + except: + self._expires_at = None if 'mac_key' in response: self.mac_key = response.get('mac_key') diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/legacy_application.py b/libs/common/oauthlib/oauth2/rfc6749/clients/legacy_application.py index ca218e45..9920981d 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/legacy_application.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/legacy_application.py @@ -6,9 +6,7 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - -from ..parameters import parse_token_response, prepare_token_request +from ..parameters import prepare_token_request from .base import Client @@ -38,7 +36,7 @@ class LegacyApplicationClient(Client): grant_type = 'password' def __init__(self, client_id, **kwargs): - super(LegacyApplicationClient, self).__init__(client_id, **kwargs) + super().__init__(client_id, **kwargs) def prepare_request_body(self, username, password, body='', scope=None, include_client_id=False, **kwargs): @@ -51,7 +49,7 @@ class LegacyApplicationClient(Client): :param username: The resource owner username. :param password: The resource owner password. :param body: Existing request body (URL encoded string) to embed parameters - into. This may contain extra paramters. Default ''. + into. This may contain extra parameters. Default ''. :param scope: The scope of the access request as described by `Section 3.3`_. :param include_client_id: `True` to send the `client_id` in the @@ -81,5 +79,6 @@ class LegacyApplicationClient(Client): """ kwargs['client_id'] = self.client_id kwargs['include_client_id'] = include_client_id + scope = self.scope if scope is None else scope return prepare_token_request(self.grant_type, body=body, username=username, password=password, scope=scope, **kwargs) diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/mobile_application.py b/libs/common/oauthlib/oauth2/rfc6749/clients/mobile_application.py index 11c6c51c..b10b41ce 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/mobile_application.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/mobile_application.py @@ -6,8 +6,6 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - from ..parameters import parse_implicit_response, prepare_grant_uri from .base import Client @@ -57,7 +55,7 @@ class MobileApplicationClient(Client): using the "application/x-www-form-urlencoded" format, per `Appendix B`_: :param redirect_uri: OPTIONAL. The redirect URI must be an absolute URI - and it should have been registerd with the OAuth + and it should have been registered with the OAuth provider prior to use. As described in `Section 3.1.2`_. :param scope: OPTIONAL. The scope of the access request as described by @@ -93,6 +91,7 @@ class MobileApplicationClient(Client): .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 .. _`Section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 """ + scope = self.scope if scope is None else scope return prepare_grant_uri(uri, self.client_id, self.response_type, redirect_uri=redirect_uri, state=state, scope=scope, **kwargs) @@ -169,6 +168,7 @@ class MobileApplicationClient(Client): .. _`Section 7.1`: https://tools.ietf.org/html/rfc6749#section-7.1 .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 """ + scope = self.scope if scope is None else scope self.token = parse_implicit_response(uri, state=state, scope=scope) self.populate_token_attributes(self.token) return self.token diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/service_application.py b/libs/common/oauthlib/oauth2/rfc6749/clients/service_application.py index ea946cec..8fb17377 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/service_application.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/service_application.py @@ -6,13 +6,11 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import time from oauthlib.common import to_unicode -from ..parameters import parse_token_response, prepare_token_request +from ..parameters import prepare_token_request from .base import Client @@ -33,7 +31,7 @@ class ServiceApplicationClient(Client): def __init__(self, client_id, private_key=None, subject=None, issuer=None, audience=None, **kwargs): - """Initalize a JWT client with defaults for implicit use later. + """Initialize a JWT client with defaults for implicit use later. :param client_id: Client identifier given by the OAuth provider upon registration. @@ -57,7 +55,7 @@ class ServiceApplicationClient(Client): state and token. See ``Client.__init__.__doc__`` for details. """ - super(ServiceApplicationClient, self).__init__(client_id, **kwargs) + super().__init__(client_id, **kwargs) self.private_key = private_key self.subject = subject self.issuer = issuer @@ -101,7 +99,7 @@ class ServiceApplicationClient(Client): :param extra_claims: A dict of additional claims to include in the JWT. :param body: Existing request body (URL encoded string) to embed parameters - into. This may contain extra paramters. Default ''. + into. This may contain extra parameters. Default ''. :param scope: The scope of the access request. @@ -183,6 +181,7 @@ class ServiceApplicationClient(Client): kwargs['client_id'] = self.client_id kwargs['include_client_id'] = include_client_id + scope = self.scope if scope is None else scope return prepare_token_request(self.grant_type, body=body, assertion=assertion, diff --git a/libs/common/oauthlib/oauth2/rfc6749/clients/web_application.py b/libs/common/oauthlib/oauth2/rfc6749/clients/web_application.py index 0cd39ce5..50890fbf 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/clients/web_application.py +++ b/libs/common/oauthlib/oauth2/rfc6749/clients/web_application.py @@ -6,13 +6,12 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import warnings -from ..parameters import (parse_authorization_code_response, - parse_token_response, prepare_grant_uri, - prepare_token_request) +from ..parameters import ( + parse_authorization_code_response, prepare_grant_uri, + prepare_token_request, +) from .base import Client @@ -38,11 +37,11 @@ class WebApplicationClient(Client): grant_type = 'authorization_code' def __init__(self, client_id, code=None, **kwargs): - super(WebApplicationClient, self).__init__(client_id, **kwargs) + super().__init__(client_id, **kwargs) self.code = code def prepare_request_uri(self, uri, redirect_uri=None, scope=None, - state=None, **kwargs): + state=None, code_challenge=None, code_challenge_method='plain', **kwargs): """Prepare the authorization code request URI The client constructs the request URI by adding the following @@ -50,7 +49,7 @@ class WebApplicationClient(Client): using the "application/x-www-form-urlencoded" format, per `Appendix B`_: :param redirect_uri: OPTIONAL. The redirect URI must be an absolute URI - and it should have been registerd with the OAuth + and it should have been registered with the OAuth provider prior to use. As described in `Section 3.1.2`_. :param scope: OPTIONAL. The scope of the access request as described by @@ -63,6 +62,13 @@ class WebApplicationClient(Client): to the client. The parameter SHOULD be used for preventing cross-site request forgery as described in `Section 10.12`_. + :param code_challenge: OPTIONAL. PKCE parameter. REQUIRED if PKCE is enforced. + A challenge derived from the code_verifier that is sent in the + authorization request, to be verified against later. + + :param code_challenge_method: OPTIONAL. PKCE parameter. A method that was used to derive code challenge. + Defaults to "plain" if not present in the request. + :param kwargs: Extra arguments to include in the request URI. In addition to supplied parameters, OAuthLib will append the ``client_id`` @@ -77,6 +83,10 @@ class WebApplicationClient(Client): 'https://example.com?client_id=your_id&response_type=code&redirect_uri=https%3A%2F%2Fa.b%2Fcallback' >>> client.prepare_request_uri('https://example.com', scope=['profile', 'pictures']) 'https://example.com?client_id=your_id&response_type=code&scope=profile+pictures' + >>> client.prepare_request_uri('https://example.com', code_challenge='kjasBS523KdkAILD2k78NdcJSk2k3KHG6') + 'https://example.com?client_id=your_id&response_type=code&code_challenge=kjasBS523KdkAILD2k78NdcJSk2k3KHG6' + >>> client.prepare_request_uri('https://example.com', code_challenge_method='S256') + 'https://example.com?client_id=your_id&response_type=code&code_challenge_method=S256' >>> client.prepare_request_uri('https://example.com', foo='bar') 'https://example.com?client_id=your_id&response_type=code&foo=bar' @@ -86,11 +96,13 @@ class WebApplicationClient(Client): .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 .. _`Section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 """ + scope = self.scope if scope is None else scope return prepare_grant_uri(uri, self.client_id, 'code', - redirect_uri=redirect_uri, scope=scope, state=state, **kwargs) + redirect_uri=redirect_uri, scope=scope, state=state, code_challenge=code_challenge, + code_challenge_method=code_challenge_method, **kwargs) def prepare_request_body(self, code=None, redirect_uri=None, body='', - include_client_id=True, **kwargs): + include_client_id=True, code_verifier=None, **kwargs): """Prepare the access token request body. The client makes a request to the token endpoint by adding the @@ -105,7 +117,7 @@ class WebApplicationClient(Client): values MUST be identical. :param body: Existing request body (URL encoded string) to embed parameters - into. This may contain extra paramters. Default ''. + into. This may contain extra parameters. Default ''. :param include_client_id: `True` (default) to send the `client_id` in the body of the upstream request. This is required @@ -113,6 +125,9 @@ class WebApplicationClient(Client): authorization server as described in `Section 3.2.1`_. :type include_client_id: Boolean + :param code_verifier: OPTIONAL. A cryptographically random string that is used to correlate the + authorization request to the token request. + :param kwargs: Extra parameters to include in the token request. In addition OAuthLib will add the ``grant_type`` parameter set to @@ -127,6 +142,8 @@ class WebApplicationClient(Client): >>> client = WebApplicationClient('your_id') >>> client.prepare_request_body(code='sh35ksdf09sf') 'grant_type=authorization_code&code=sh35ksdf09sf' + >>> client.prepare_request_body(code_verifier='KB46DCKJ873NCGXK5GD682NHDKK34GR') + 'grant_type=authorization_code&code_verifier=KB46DCKJ873NCGXK5GD682NHDKK34GR' >>> client.prepare_request_body(code='sh35ksdf09sf', foo='bar') 'grant_type=authorization_code&code=sh35ksdf09sf&foo=bar' @@ -154,7 +171,7 @@ class WebApplicationClient(Client): kwargs['client_id'] = self.client_id kwargs['include_client_id'] = include_client_id return prepare_token_request(self.grant_type, code=code, body=body, - redirect_uri=redirect_uri, **kwargs) + redirect_uri=redirect_uri, code_verifier=code_verifier, **kwargs) def parse_request_uri_response(self, uri, state=None): """Parse the URI query for code and state. diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/__init__.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/__init__.py index 51e173df..1695b41b 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/__init__.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749 ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,16 +5,13 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - from .authorization import AuthorizationEndpoint from .introspect import IntrospectEndpoint from .metadata import MetadataEndpoint -from .token import TokenEndpoint +from .pre_configured import ( + BackendApplicationServer, LegacyApplicationServer, MobileApplicationServer, + Server, WebApplicationServer, +) from .resource import ResourceEndpoint from .revocation import RevocationEndpoint -from .pre_configured import Server -from .pre_configured import WebApplicationServer -from .pre_configured import MobileApplicationServer -from .pre_configured import LegacyApplicationServer -from .pre_configured import BackendApplicationServer +from .token import TokenEndpoint diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/authorization.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/authorization.py index 92cde346..71967865 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/authorization.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/authorization.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749 ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,8 +5,6 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib.common import Request diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/base.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/base.py index e39232f4..3f239917 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/base.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/base.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749 ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,21 +5,18 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import functools import logging -from ..errors import (FatalClientError, OAuth2Error, ServerError, - TemporarilyUnavailableError, InvalidRequestError, - InvalidClientError, UnsupportedTokenTypeError) - -from oauthlib.common import CaseInsensitiveDict, urldecode +from ..errors import ( + FatalClientError, InvalidClientError, InvalidRequestError, OAuth2Error, + ServerError, TemporarilyUnavailableError, UnsupportedTokenTypeError, +) log = logging.getLogger(__name__) -class BaseEndpoint(object): +class BaseEndpoint: def __init__(self): self._available = True diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/introspect.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/introspect.py index 4accbdc1..3cc61e66 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/introspect.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/introspect.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.endpoint.introspect ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -7,14 +6,12 @@ An implementation of the OAuth 2.0 `Token Introspection`. .. _`Token Introspection`: https://tools.ietf.org/html/rfc7662 """ -from __future__ import absolute_import, unicode_literals - import json import logging from oauthlib.common import Request -from ..errors import OAuth2Error, UnsupportedTokenTypeError +from ..errors import OAuth2Error from .base import BaseEndpoint, catch_errors_and_unavailability log = logging.getLogger(__name__) @@ -89,9 +86,9 @@ class IntrospectEndpoint(BaseEndpoint): an HTTP POST request with parameters sent as "application/x-www-form-urlencoded". - token REQUIRED. The string value of the token. + * token REQUIRED. The string value of the token. + * token_type_hint OPTIONAL. - token_type_hint OPTIONAL. A hint about the type of the token submitted for introspection. The protected resource MAY pass this parameter to help the authorization server optimize the token lookup. If the @@ -99,11 +96,9 @@ class IntrospectEndpoint(BaseEndpoint): extend its search across all of its supported token types. An authorization server MAY ignore this parameter, particularly if it is able to detect the token type automatically. - * access_token: An Access Token as defined in [`RFC6749`], - `section 1.4`_ - * refresh_token: A Refresh Token as defined in [`RFC6749`], - `section 1.5`_ + * access_token: An Access Token as defined in [`RFC6749`], `section 1.4`_ + * refresh_token: A Refresh Token as defined in [`RFC6749`], `section 1.5`_ The introspection endpoint MAY accept other OPTIONAL parameters to provide further context to the query. For diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/metadata.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/metadata.py index 936e8788..a2820f28 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/metadata.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/metadata.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.endpoint.metadata ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -7,20 +6,16 @@ An implementation of the `OAuth 2.0 Authorization Server Metadata`. .. _`OAuth 2.0 Authorization Server Metadata`: https://tools.ietf.org/html/rfc8414 """ -from __future__ import absolute_import, unicode_literals - import copy import json import logging -from ....common import unicode_type -from .base import BaseEndpoint, catch_errors_and_unavailability +from .. import grant_types, utils from .authorization import AuthorizationEndpoint +from .base import BaseEndpoint, catch_errors_and_unavailability from .introspect import IntrospectEndpoint -from .token import TokenEndpoint from .revocation import RevocationEndpoint -from .. import grant_types - +from .token import TokenEndpoint log = logging.getLogger(__name__) @@ -59,7 +54,8 @@ class MetadataEndpoint(BaseEndpoint): """Create metadata response """ headers = { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', + 'Access-Control-Allow-Origin': '*', } return headers, json.dumps(self.claims), 200 @@ -72,7 +68,7 @@ class MetadataEndpoint(BaseEndpoint): raise ValueError("key {} is a mandatory metadata.".format(key)) elif is_issuer: - if not array[key].startswith("https"): + if not utils.is_secure_transport(array[key]): raise ValueError("key {}: {} must be an HTTPS URL".format(key, array[key])) if "?" in array[key] or "&" in array[key] or "#" in array[key]: raise ValueError("key {}: {} must not contain query or fragment components".format(key, array[key])) @@ -85,7 +81,7 @@ class MetadataEndpoint(BaseEndpoint): if not isinstance(array[key], list): raise ValueError("key {}: {} must be an Array".format(key, array[key])) for elem in array[key]: - if not isinstance(elem, unicode_type): + if not isinstance(elem, str): raise ValueError("array {}: {} must contains only string (not {})".format(key, array[key], elem)) def validate_metadata_token(self, claims, endpoint): @@ -166,10 +162,10 @@ class MetadataEndpoint(BaseEndpoint): response_types_supported REQUIRED. - * Other OPTIONAL fields: - jwks_uri - registration_endpoint - response_modes_supported + Other OPTIONAL fields: + jwks_uri, + registration_endpoint, + response_modes_supported grant_types_supported OPTIONAL. JSON array containing a list of the OAuth 2.0 grant diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py index e2cc9db7..d64a1663 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.endpoints.pre_configured ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,13 +5,10 @@ oauthlib.oauth2.rfc6749.endpoints.pre_configured This module is an implementation of various endpoints needed for providing OAuth 2.0 RFC6749 servers. """ -from __future__ import absolute_import, unicode_literals - -from ..grant_types import (AuthorizationCodeGrant, - ClientCredentialsGrant, - ImplicitGrant, - RefreshTokenGrant, - ResourceOwnerPasswordCredentialsGrant) +from ..grant_types import ( + AuthorizationCodeGrant, ClientCredentialsGrant, ImplicitGrant, + RefreshTokenGrant, ResourceOwnerPasswordCredentialsGrant, +) from ..tokens import BearerToken from .authorization import AuthorizationEndpoint from .introspect import IntrospectEndpoint @@ -42,34 +38,34 @@ class Server(AuthorizationEndpoint, IntrospectEndpoint, TokenEndpoint, :param kwargs: Extra parameters to pass to authorization-, token-, resource-, and revocation-endpoint constructors. """ - auth_grant = AuthorizationCodeGrant(request_validator) - implicit_grant = ImplicitGrant(request_validator) - password_grant = ResourceOwnerPasswordCredentialsGrant( + self.auth_grant = AuthorizationCodeGrant(request_validator) + self.implicit_grant = ImplicitGrant(request_validator) + self.password_grant = ResourceOwnerPasswordCredentialsGrant( request_validator) - credentials_grant = ClientCredentialsGrant(request_validator) - refresh_grant = RefreshTokenGrant(request_validator) + self.credentials_grant = ClientCredentialsGrant(request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) - bearer = BearerToken(request_validator, token_generator, + self.bearer = BearerToken(request_validator, token_generator, token_expires_in, refresh_token_generator) AuthorizationEndpoint.__init__(self, default_response_type='code', response_types={ - 'code': auth_grant, - 'token': implicit_grant, - 'none': auth_grant + 'code': self.auth_grant, + 'token': self.implicit_grant, + 'none': self.auth_grant }, - default_token_type=bearer) + default_token_type=self.bearer) TokenEndpoint.__init__(self, default_grant_type='authorization_code', grant_types={ - 'authorization_code': auth_grant, - 'password': password_grant, - 'client_credentials': credentials_grant, - 'refresh_token': refresh_grant, + 'authorization_code': self.auth_grant, + 'password': self.password_grant, + 'client_credentials': self.credentials_grant, + 'refresh_token': self.refresh_grant, }, - default_token_type=bearer) + default_token_type=self.bearer) ResourceEndpoint.__init__(self, default_token='Bearer', - token_types={'Bearer': bearer}) + token_types={'Bearer': self.bearer}) RevocationEndpoint.__init__(self, request_validator) IntrospectEndpoint.__init__(self, request_validator) @@ -94,21 +90,21 @@ class WebApplicationServer(AuthorizationEndpoint, IntrospectEndpoint, TokenEndpo :param kwargs: Extra parameters to pass to authorization-, token-, resource-, and revocation-endpoint constructors. """ - auth_grant = AuthorizationCodeGrant(request_validator) - refresh_grant = RefreshTokenGrant(request_validator) - bearer = BearerToken(request_validator, token_generator, + self.auth_grant = AuthorizationCodeGrant(request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, token_expires_in, refresh_token_generator) AuthorizationEndpoint.__init__(self, default_response_type='code', - response_types={'code': auth_grant}, - default_token_type=bearer) + response_types={'code': self.auth_grant}, + default_token_type=self.bearer) TokenEndpoint.__init__(self, default_grant_type='authorization_code', grant_types={ - 'authorization_code': auth_grant, - 'refresh_token': refresh_grant, + 'authorization_code': self.auth_grant, + 'refresh_token': self.refresh_grant, }, - default_token_type=bearer) + default_token_type=self.bearer) ResourceEndpoint.__init__(self, default_token='Bearer', - token_types={'Bearer': bearer}) + token_types={'Bearer': self.bearer}) RevocationEndpoint.__init__(self, request_validator) IntrospectEndpoint.__init__(self, request_validator) @@ -133,15 +129,15 @@ class MobileApplicationServer(AuthorizationEndpoint, IntrospectEndpoint, :param kwargs: Extra parameters to pass to authorization-, token-, resource-, and revocation-endpoint constructors. """ - implicit_grant = ImplicitGrant(request_validator) - bearer = BearerToken(request_validator, token_generator, + self.implicit_grant = ImplicitGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, token_expires_in, refresh_token_generator) AuthorizationEndpoint.__init__(self, default_response_type='token', response_types={ - 'token': implicit_grant}, - default_token_type=bearer) + 'token': self.implicit_grant}, + default_token_type=self.bearer) ResourceEndpoint.__init__(self, default_token='Bearer', - token_types={'Bearer': bearer}) + token_types={'Bearer': self.bearer}) RevocationEndpoint.__init__(self, request_validator, supported_token_types=['access_token']) IntrospectEndpoint.__init__(self, request_validator, @@ -168,19 +164,19 @@ class LegacyApplicationServer(TokenEndpoint, IntrospectEndpoint, :param kwargs: Extra parameters to pass to authorization-, token-, resource-, and revocation-endpoint constructors. """ - password_grant = ResourceOwnerPasswordCredentialsGrant( + self.password_grant = ResourceOwnerPasswordCredentialsGrant( request_validator) - refresh_grant = RefreshTokenGrant(request_validator) - bearer = BearerToken(request_validator, token_generator, + self.refresh_grant = RefreshTokenGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, token_expires_in, refresh_token_generator) TokenEndpoint.__init__(self, default_grant_type='password', grant_types={ - 'password': password_grant, - 'refresh_token': refresh_grant, + 'password': self.password_grant, + 'refresh_token': self.refresh_grant, }, - default_token_type=bearer) + default_token_type=self.bearer) ResourceEndpoint.__init__(self, default_token='Bearer', - token_types={'Bearer': bearer}) + token_types={'Bearer': self.bearer}) RevocationEndpoint.__init__(self, request_validator) IntrospectEndpoint.__init__(self, request_validator) @@ -205,15 +201,15 @@ class BackendApplicationServer(TokenEndpoint, IntrospectEndpoint, :param kwargs: Extra parameters to pass to authorization-, token-, resource-, and revocation-endpoint constructors. """ - credentials_grant = ClientCredentialsGrant(request_validator) - bearer = BearerToken(request_validator, token_generator, + self.credentials_grant = ClientCredentialsGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, token_expires_in, refresh_token_generator) TokenEndpoint.__init__(self, default_grant_type='client_credentials', grant_types={ - 'client_credentials': credentials_grant}, - default_token_type=bearer) + 'client_credentials': self.credentials_grant}, + default_token_type=self.bearer) ResourceEndpoint.__init__(self, default_token='Bearer', - token_types={'Bearer': bearer}) + token_types={'Bearer': self.bearer}) RevocationEndpoint.__init__(self, request_validator, supported_token_types=['access_token']) IntrospectEndpoint.__init__(self, request_validator, diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/resource.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/resource.py index f19c60c3..f7562255 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/resource.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/resource.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749 ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,8 +5,6 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib.common import Request diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/revocation.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/revocation.py index 1fabd038..596d0860 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/revocation.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/revocation.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.endpoint.revocation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -7,13 +6,11 @@ An implementation of the OAuth 2 `Token Revocation`_ spec (draft 11). .. _`Token Revocation`: https://tools.ietf.org/html/draft-ietf-oauth-revocation-11 """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib.common import Request -from ..errors import OAuth2Error, UnsupportedTokenTypeError +from ..errors import OAuth2Error from .base import BaseEndpoint, catch_errors_and_unavailability log = logging.getLogger(__name__) @@ -45,7 +42,7 @@ class RevocationEndpoint(BaseEndpoint): The authorization server responds with HTTP status code 200 if the - token has been revoked sucessfully or if the client submitted an + token has been revoked successfully or if the client submitted an invalid token. Note: invalid tokens do not cause an error response since the client @@ -73,7 +70,7 @@ class RevocationEndpoint(BaseEndpoint): log.debug('Client error during validation of %r. %r.', request, e) response_body = e.json if self.enable_jsonp and request.callback: - response_body = '%s(%s);' % (request.callback, response_body) + response_body = '{}({});'.format(request.callback, response_body) resp_headers.update(e.headers) return resp_headers, response_body, e.status_code @@ -98,7 +95,7 @@ class RevocationEndpoint(BaseEndpoint): submitted for revocation. Clients MAY pass this parameter in order to help the authorization server to optimize the token lookup. If the server is unable to locate the token using the given hint, it MUST - extend its search accross all of its supported token types. An + extend its search across all of its supported token types. An authorization server MAY ignore this parameter, particularly if it is able to detect the token type automatically. This specification defines two such values: diff --git a/libs/common/oauthlib/oauth2/rfc6749/endpoints/token.py b/libs/common/oauthlib/oauth2/rfc6749/endpoints/token.py index bc87e9bd..ab9e0918 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/endpoints/token.py +++ b/libs/common/oauthlib/oauth2/rfc6749/endpoints/token.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749 ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,8 +5,6 @@ oauthlib.oauth2.rfc6749 This module is an implementation of various logic needed for consuming and providing OAuth 2.0 RFC6749. """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib.common import Request @@ -39,7 +36,6 @@ class TokenEndpoint(BaseEndpoint): https://example.com/path?query=component # OK https://example.com/path?query=component#fragment # Not OK - Since requests to the authorization endpoint result in user Since requests to the token endpoint result in the transmission of clear-text credentials (in the HTTP request and response), the authorization server MUST require the use of TLS as described in diff --git a/libs/common/oauthlib/oauth2/rfc6749/errors.py b/libs/common/oauthlib/oauth2/rfc6749/errors.py index d2a14026..da24feab 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/errors.py +++ b/libs/common/oauthlib/oauth2/rfc6749/errors.py @@ -1,4 +1,3 @@ -# coding=utf-8 """ oauthlib.oauth2.rfc6749.errors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,8 +5,6 @@ oauthlib.oauth2.rfc6749.errors Error used both by OAuth 2 clients and providers to represent the spec defined error responses for all four core grant types. """ -from __future__ import unicode_literals - import json from oauthlib.common import add_params_to_uri, urlencode @@ -45,10 +42,10 @@ class OAuth2Error(Exception): if description is not None: self.description = description - message = '(%s) %s' % (self.error, self.description) + message = '({}) {}'.format(self.error, self.description) if request: message += ' ' + repr(request) - super(OAuth2Error, self).__init__(message) + super().__init__(message) self.uri = uri self.state = state @@ -106,15 +103,12 @@ class OAuth2Error(Exception): value "Bearer". This scheme MUST be followed by one or more auth-param values. """ - authvalues = [ - "Bearer", - 'error="{}"'.format(self.error) - ] + authvalues = ['error="{}"'.format(self.error)] if self.description: authvalues.append('error_description="{}"'.format(self.description)) if self.uri: authvalues.append('error_uri="{}"'.format(self.uri)) - return {"WWW-Authenticate": ", ".join(authvalues)} + return {"WWW-Authenticate": "Bearer " + ", ".join(authvalues)} return {} @@ -389,7 +383,7 @@ class CustomOAuth2Error(OAuth2Error): """ def __init__(self, error, *args, **kwargs): self.error = error - super(CustomOAuth2Error, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def raise_from_error(error, params=None): diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/__init__.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/__init__.py index 2ec8e4fa..eb88cfc2 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/__init__.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/__init__.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import unicode_literals, absolute_import - from .authorization_code import AuthorizationCodeGrant -from .implicit import ImplicitGrant -from .resource_owner_password_credentials import ResourceOwnerPasswordCredentialsGrant from .client_credentials import ClientCredentialsGrant +from .implicit import ImplicitGrant from .refresh_token import RefreshTokenGrant +from .resource_owner_password_credentials import ( + ResourceOwnerPasswordCredentialsGrant, +) diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py index 9b84c4c4..858855a1 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py @@ -1,10 +1,7 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import base64 import hashlib import json @@ -275,6 +272,8 @@ class AuthorizationCodeGrant(GrantTypeBase): grant = self.create_authorization_code(request) for modifier in self._code_modifiers: grant = modifier(grant, token_handler, request) + if 'access_token' in grant: + self.request_validator.save_token(grant, request) log.debug('Saving grant %r for %r.', grant, request) self.request_validator.save_authorization_code( request.client_id, grant, request) @@ -313,6 +312,7 @@ class AuthorizationCodeGrant(GrantTypeBase): self.request_validator.save_token(token, request) self.request_validator.invalidate_authorization_code( request.client_id, request.code, request) + headers.update(self._create_cors_headers(request)) return headers, json.dumps(token), 200 def validate_authorization_request(self, request): diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/base.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/base.py index f0772e28..ca343a11 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/base.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/base.py @@ -1,23 +1,21 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging from itertools import chain from oauthlib.common import add_params_to_uri -from oauthlib.uri_validate import is_absolute_uri from oauthlib.oauth2.rfc6749 import errors, utils +from oauthlib.uri_validate import is_absolute_uri from ..request_validator import RequestValidator +from ..utils import is_secure_transport log = logging.getLogger(__name__) -class ValidatorsContainer(object): +class ValidatorsContainer: """ Container object for holding custom validator callables to be invoked as part of the grant type `validate_authorization_request()` or @@ -74,7 +72,7 @@ class ValidatorsContainer(object): return chain(self.post_auth, self.post_token) -class GrantTypeBase(object): +class GrantTypeBase: error_uri = None request_validator = None default_response_mode = 'fragment' @@ -251,3 +249,20 @@ class GrantTypeBase(object): raise errors.MissingRedirectURIError(request=request) if not is_absolute_uri(request.redirect_uri): raise errors.InvalidRedirectURIError(request=request) + + def _create_cors_headers(self, request): + """If CORS is allowed, create the appropriate headers.""" + if 'origin' not in request.headers: + return {} + + origin = request.headers['origin'] + if not is_secure_transport(origin): + log.debug('Origin "%s" is not HTTPS, CORS not allowed.', origin) + return {} + elif not self.request_validator.is_origin_allowed( + request.client_id, origin, request): + log.debug('Invalid origin "%s", CORS not allowed.', origin) + return {} + else: + log.debug('Valid origin "%s", injecting CORS headers.', origin) + return {'Access-Control-Allow-Origin': origin} diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py index 7e508577..e7b46189 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py @@ -1,15 +1,11 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import json import logging from .. import errors -from ..request_validator import RequestValidator from .base import GrantTypeBase log = logging.getLogger(__name__) @@ -119,8 +115,8 @@ class ClientCredentialsGrant(GrantTypeBase): # Ensure client is authorized use of this grant type self.validate_grant_type(request) - log.debug('Authorizing access to user %r.', request.user) request.client_id = request.client_id or request.client.client_id + log.debug('Authorizing access to client %r.', request.client_id) self.validate_scopes(request) for validator in self.custom_validators.post_token: diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/implicit.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/implicit.py index 48bae7a5..6110b6f3 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/implicit.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/implicit.py @@ -1,10 +1,7 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging from oauthlib import common diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py index fc61d65c..ce33df0e 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py @@ -1,15 +1,11 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import json import logging from .. import errors, utils -from ..request_validator import RequestValidator from .base import GrantTypeBase log = logging.getLogger(__name__) @@ -25,7 +21,7 @@ class RefreshTokenGrant(GrantTypeBase): def __init__(self, request_validator=None, issue_new_refresh_tokens=True, **kwargs): - super(RefreshTokenGrant, self).__init__( + super().__init__( request_validator, issue_new_refresh_tokens=issue_new_refresh_tokens, **kwargs) @@ -67,12 +63,13 @@ class RefreshTokenGrant(GrantTypeBase): refresh_token=self.issue_new_refresh_tokens) for modifier in self._token_modifiers: - token = modifier(token) + token = modifier(token, token_handler, request) self.request_validator.save_token(token, request) log.debug('Issuing new token to client id %r (%r), %r.', request.client_id, request.client, token) + headers.update(self._create_cors_headers(request)) return headers, json.dumps(token), 200 def validate_token_request(self, request): @@ -126,7 +123,7 @@ class RefreshTokenGrant(GrantTypeBase): if request.scope: request.scopes = utils.scope_to_list(request.scope) - if (not all((s in original_scopes for s in request.scopes)) + if (not all(s in original_scopes for s in request.scopes) and not self.request_validator.is_within_original_scope( request.scopes, request.refresh_token, request)): log.debug('Refresh token %s lack requested scopes, %r.', diff --git a/libs/common/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py b/libs/common/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py index 5929afb4..4b0de5bf 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py +++ b/libs/common/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py @@ -1,15 +1,11 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import json import logging from .. import errors -from ..request_validator import RequestValidator from .base import GrantTypeBase log = logging.getLogger(__name__) diff --git a/libs/common/oauthlib/oauth2/rfc6749/parameters.py b/libs/common/oauthlib/oauth2/rfc6749/parameters.py index 14d4c0d8..8f6ce2c7 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/parameters.py +++ b/libs/common/oauthlib/oauth2/rfc6749/parameters.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -7,29 +6,24 @@ This module contains methods related to `Section 4`_ of the OAuth 2 RFC. .. _`Section 4`: https://tools.ietf.org/html/rfc6749#section-4 """ -from __future__ import absolute_import, unicode_literals - import json import os import time +import urllib.parse as urlparse -from oauthlib.common import add_params_to_qs, add_params_to_uri, unicode_type +from oauthlib.common import add_params_to_qs, add_params_to_uri from oauthlib.signals import scope_changed -from .errors import (InsecureTransportError, MismatchingStateError, - MissingCodeError, MissingTokenError, - MissingTokenTypeError, raise_from_error) +from .errors import ( + InsecureTransportError, MismatchingStateError, MissingCodeError, + MissingTokenError, MissingTokenTypeError, raise_from_error, +) from .tokens import OAuth2Token from .utils import is_secure_transport, list_to_scope, scope_to_list -try: - import urlparse -except ImportError: - import urllib.parse as urlparse - def prepare_grant_uri(uri, client_id, response_type, redirect_uri=None, - scope=None, state=None, **kwargs): + scope=None, state=None, code_challenge=None, code_challenge_method='plain', **kwargs): """Prepare the authorization grant request URI. The client constructs the request URI by adding the following @@ -51,6 +45,11 @@ def prepare_grant_uri(uri, client_id, response_type, redirect_uri=None, back to the client. The parameter SHOULD be used for preventing cross-site request forgery as described in `Section 10.12`_. + :param code_challenge: PKCE parameter. A challenge derived from the + code_verifier that is sent in the authorization + request, to be verified against later. + :param code_challenge_method: PKCE parameter. A method that was used to derive the + code_challenge. Defaults to "plain" if not present in the request. :param kwargs: Extra arguments to embed in the grant/authorization URL. An example of an authorization code grant authorization URL: @@ -58,6 +57,7 @@ def prepare_grant_uri(uri, client_id, response_type, redirect_uri=None, .. code-block:: http GET /authorize?response_type=code&client_id=s6BhdRkqt3&state=xyz + &code_challenge=kjasBS523KdkAILD2k78NdcJSk2k3KHG6&code_challenge_method=S256 &redirect_uri=https%3A%2F%2Fclient%2Eexample%2Ecom%2Fcb HTTP/1.1 Host: server.example.com @@ -79,15 +79,18 @@ def prepare_grant_uri(uri, client_id, response_type, redirect_uri=None, params.append(('scope', list_to_scope(scope))) if state: params.append(('state', state)) + if code_challenge is not None: + params.append(('code_challenge', code_challenge)) + params.append(('code_challenge_method', code_challenge_method)) for k in kwargs: if kwargs[k]: - params.append((unicode_type(k), kwargs[k])) + params.append((str(k), kwargs[k])) return add_params_to_uri(uri, params) -def prepare_token_request(grant_type, body='', include_client_id=True, **kwargs): +def prepare_token_request(grant_type, body='', include_client_id=True, code_verifier=None, **kwargs): """Prepare the access token request. The client makes a request to the token endpoint by adding the @@ -122,6 +125,9 @@ def prepare_token_request(grant_type, body='', include_client_id=True, **kwargs) authorization request as described in `Section 4.1.1`_, and their values MUST be identical. * + :param code_verifier: PKCE parameter. A cryptographically random string that is used to correlate the + authorization request to the token request. + :param kwargs: Extra arguments to embed in the request body. Parameters marked with a `*` above are not explicit arguments in the @@ -146,18 +152,22 @@ def prepare_token_request(grant_type, body='', include_client_id=True, **kwargs) client_id = kwargs.pop('client_id', None) if include_client_id: if client_id is not None: - params.append((unicode_type('client_id'), client_id)) + params.append(('client_id', client_id)) + + # use code_verifier if code_challenge was passed in the authorization request + if code_verifier is not None: + params.append(('code_verifier', code_verifier)) # the kwargs iteration below only supports including boolean truth (truthy) # values, but some servers may require an empty string for `client_secret` client_secret = kwargs.pop('client_secret', None) if client_secret is not None: - params.append((unicode_type('client_secret'), client_secret)) + params.append(('client_secret', client_secret)) # this handles: `code`, `redirect_uri`, and other undocumented params for k in kwargs: if kwargs[k]: - params.append((unicode_type(k), kwargs[k])) + params.append((str(k), kwargs[k])) return add_params_to_qs(body, params) @@ -167,7 +177,7 @@ def prepare_token_revocation_request(url, token, token_type_hint="access_token", """Prepare a token revocation request. The client constructs the request by including the following parameters - using the "application/x-www-form-urlencoded" format in the HTTP request + using the ``application/x-www-form-urlencoded`` format in the HTTP request entity-body: :param token: REQUIRED. The token that the client wants to get revoked. @@ -209,7 +219,7 @@ def prepare_token_revocation_request(url, token, token_type_hint="access_token", for k in kwargs: if kwargs[k]: - params.append((unicode_type(k), kwargs[k])) + params.append((str(k), kwargs[k])) headers = {'Content-Type': 'application/x-www-form-urlencoded'} @@ -433,7 +443,7 @@ def parse_token_response(body, scope=None): def validate_token_parameters(params): - """Ensures token precence, token type, expiration and scope in params.""" + """Ensures token presence, token type, expiration and scope in params.""" if 'error' in params: raise_from_error(params.get('error'), params) diff --git a/libs/common/oauthlib/oauth2/rfc6749/request_validator.py b/libs/common/oauthlib/oauth2/rfc6749/request_validator.py index 86509b62..3910c0b9 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/request_validator.py +++ b/libs/common/oauthlib/oauth2/rfc6749/request_validator.py @@ -1,16 +1,13 @@ -# -*- coding: utf-8 -*- """ oauthlib.oauth2.rfc6749.request_validator ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging log = logging.getLogger(__name__) -class RequestValidator(object): +class RequestValidator: def client_authentication_required(self, request, *args, **kwargs): """Determine if client authentication is required for current request. @@ -51,6 +48,17 @@ class RequestValidator(object): Headers may be accesses through request.headers and parameters found in both body and query can be obtained by direct attribute access, i.e. request.client_id for client_id in the URL query. + + The authentication process is required to contain the identification of + the client (i.e. search the database based on the client_id). In case the + client doesn't exist based on the received client_id, this method has to + return False and the HTTP response created by the library will contain + 'invalid_client' message. + + After the client identification succeeds, this method needs to set the + client on the request, i.e. request.client = client. A client object's + class must contain the 'client_id' attribute and the 'client_id' must have + a value. :param request: OAuthlib request. :type request: oauthlib.common.Request @@ -183,6 +191,7 @@ class RequestValidator(object): claims associated, or `None` in case the token is unknown. Below the list of registered claims you should be interested in: + - scope : space-separated list of scopes - client_id : client identifier - username : human-readable identifier for the resource owner @@ -196,10 +205,10 @@ class RequestValidator(object): - jti : string identifier for the token Note that most of them are coming directly from JWT RFC. More details - can be found in `Introspect Claims`_ or `_JWT Claims`_. + can be found in `Introspect Claims`_ or `JWT Claims`_. The implementation can use *token_type_hint* to improve lookup - efficency, but must fallback to other types to be compliant with RFC. + efficiency, but must fallback to other types to be compliant with RFC. The dict of claims is added to request.token after this method. @@ -435,6 +444,7 @@ class RequestValidator(object): - request.user - request.scopes - request.claims (if given) + OBS! The request.user attribute should be set to the resource owner associated with this authorization code. Similarly request.scopes must also be set. @@ -443,6 +453,7 @@ class RequestValidator(object): If PKCE is enabled (see 'is_pkce_required' and 'save_authorization_code') you MUST set the following based on the information stored: + - request.code_challenge - request.code_challenge_method @@ -553,7 +564,7 @@ class RequestValidator(object): OBS! The validation should also set the user attribute of the request to a valid resource owner, i.e. request.user = username or similar. If not set you will be unable to associate a token with a user in the - persistance method used (commonly, save_bearer_token). + persistence method used (commonly, save_bearer_token). :param username: Unicode username. :param password: Unicode password. @@ -641,3 +652,29 @@ class RequestValidator(object): """ raise NotImplementedError('Subclasses must implement this method.') + + def is_origin_allowed(self, client_id, origin, request, *args, **kwargs): + """Indicate if the given origin is allowed to access the token endpoint + via Cross-Origin Resource Sharing (CORS). CORS is used by browser-based + clients, such as Single-Page Applications, to perform the Authorization + Code Grant. + + (Note: If performing Authorization Code Grant via a public client such + as a browser, you should use PKCE as well.) + + If this method returns true, the appropriate CORS headers will be added + to the response. By default this method always returns False, meaning + CORS is disabled. + + :param client_id: Unicode client identifier. + :param redirect_uri: Unicode origin. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: bool + + Method is used by: + - Authorization Code Grant + - Refresh Token Grant + + """ + return False diff --git a/libs/common/oauthlib/oauth2/rfc6749/tokens.py b/libs/common/oauthlib/oauth2/rfc6749/tokens.py index 3587af43..0757d07e 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/tokens.py +++ b/libs/common/oauthlib/oauth2/rfc6749/tokens.py @@ -7,28 +7,22 @@ This module contains methods for adding two types of access tokens to requests. - Bearer https://tools.ietf.org/html/rfc6750 - MAC https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01 """ -from __future__ import absolute_import, unicode_literals - import hashlib import hmac -from binascii import b2a_base64 import warnings +from binascii import b2a_base64 +from urllib.parse import urlparse from oauthlib import common -from oauthlib.common import add_params_to_qs, add_params_to_uri, unicode_type +from oauthlib.common import add_params_to_qs, add_params_to_uri from . import utils -try: - from urlparse import urlparse -except ImportError: - from urllib.parse import urlparse - class OAuth2Token(dict): def __init__(self, params, old_scope=None): - super(OAuth2Token, self).__init__(params) + super().__init__(params) self._new_scope = None if 'scope' in params and params['scope']: self._new_scope = set(utils.scope_to_list(params['scope'])) @@ -121,7 +115,7 @@ def prepare_mac_header(token, uri, key, http_method, raise ValueError('unknown hash algorithm') if draft == 0: - nonce = nonce or '{0}:{1}'.format(utils.generate_age(issue_time), + nonce = nonce or '{}:{}'.format(utils.generate_age(issue_time), common.generate_nonce()) else: ts = common.generate_timestamp() @@ -158,7 +152,7 @@ def prepare_mac_header(token, uri, key, http_method, base_string = '\n'.join(base) + '\n' # hmac struggles with unicode strings - http://bugs.python.org/issue5285 - if isinstance(key, unicode_type): + if isinstance(key, str): key = key.encode('utf-8') sign = hmac.new(key, base_string.encode('utf-8'), h) sign = b2a_base64(sign.digest())[:-1].decode('utf-8') @@ -262,7 +256,8 @@ def get_token_from_header(request): return token -class TokenBase(object): +class TokenBase: + __slots__ = () def __call__(self, request, refresh_token=False): raise NotImplementedError('Subclasses must implement this method.') diff --git a/libs/common/oauthlib/oauth2/rfc6749/utils.py b/libs/common/oauthlib/oauth2/rfc6749/utils.py index f67019d3..7dc27b3d 100644 --- a/libs/common/oauthlib/oauth2/rfc6749/utils.py +++ b/libs/common/oauthlib/oauth2/rfc6749/utils.py @@ -1,33 +1,22 @@ -# -*- coding: utf-8 -*- """ oauthlib.utils ~~~~~~~~~~~~~~ This module contains utility methods used by various parts of the OAuth 2 spec. """ -from __future__ import absolute_import, unicode_literals - import datetime import os +from urllib.parse import quote, urlparse -from oauthlib.common import unicode_type, urldecode - -try: - from urllib import quote -except ImportError: - from urllib.parse import quote -try: - from urlparse import urlparse -except ImportError: - from urllib.parse import urlparse +from oauthlib.common import urldecode def list_to_scope(scope): """Convert a list of scopes to a space separated string.""" - if isinstance(scope, unicode_type) or scope is None: + if isinstance(scope, str) or scope is None: return scope elif isinstance(scope, (set, tuple, list)): - return " ".join([unicode_type(s) for s in scope]) + return " ".join([str(s) for s in scope]) else: raise ValueError("Invalid scope (%s), must be string, tuple, set, or list." % scope) @@ -35,7 +24,7 @@ def list_to_scope(scope): def scope_to_list(scope): """Convert a space separated string to a list of scopes.""" if isinstance(scope, (tuple, list, set)): - return [unicode_type(s) for s in scope] + return [str(s) for s in scope] elif scope is None: return None else: @@ -74,7 +63,7 @@ def escape(u): TODO: verify whether this can in fact be used for OAuth 2 """ - if not isinstance(u, unicode_type): + if not isinstance(u, str): raise ValueError('Only unicode objects are escapable.') return quote(u.encode('utf-8'), safe=b'~') @@ -84,7 +73,7 @@ def generate_age(issue_time): td = datetime.datetime.now() - issue_time age = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 - return unicode_type(age) + return str(age) def is_secure_transport(uri): diff --git a/libs/common/oauthlib/oauth2/rfc8628/__init__.py b/libs/common/oauthlib/oauth2/rfc8628/__init__.py new file mode 100644 index 00000000..531929dc --- /dev/null +++ b/libs/common/oauthlib/oauth2/rfc8628/__init__.py @@ -0,0 +1,10 @@ +""" +oauthlib.oauth2.rfc8628 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 Device Authorization RFC8628. +""" +import logging + +log = logging.getLogger(__name__) diff --git a/libs/common/oauthlib/oauth2/rfc8628/clients/__init__.py b/libs/common/oauthlib/oauth2/rfc8628/clients/__init__.py new file mode 100644 index 00000000..130b52e3 --- /dev/null +++ b/libs/common/oauthlib/oauth2/rfc8628/clients/__init__.py @@ -0,0 +1,8 @@ +""" +oauthlib.oauth2.rfc8628 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming OAuth 2.0 Device Authorization RFC8628. +""" +from .device import DeviceClient diff --git a/libs/common/oauthlib/oauth2/rfc8628/clients/device.py b/libs/common/oauthlib/oauth2/rfc8628/clients/device.py new file mode 100644 index 00000000..b9ba2150 --- /dev/null +++ b/libs/common/oauthlib/oauth2/rfc8628/clients/device.py @@ -0,0 +1,95 @@ +""" +oauthlib.oauth2.rfc8628 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 Device Authorization RFC8628. +""" +from oauthlib.common import add_params_to_uri +from oauthlib.oauth2 import BackendApplicationClient, Client +from oauthlib.oauth2.rfc6749.errors import InsecureTransportError +from oauthlib.oauth2.rfc6749.parameters import prepare_token_request +from oauthlib.oauth2.rfc6749.utils import is_secure_transport, list_to_scope + + +class DeviceClient(Client): + + """A public client utilizing the device authorization workflow. + + The client can request an access token using a device code and + a public client id associated with the device code as defined + in RFC8628. + + The device authorization grant type can be used to obtain both + access tokens and refresh tokens and is intended to be used in + a scenario where the device being authorized does not have a + user interface that is suitable for performing authentication. + """ + + grant_type = 'urn:ietf:params:oauth:grant-type:device_code' + + def __init__(self, client_id, **kwargs): + super().__init__(client_id, **kwargs) + self.client_secret = kwargs.get('client_secret') + + def prepare_request_uri(self, uri, scope=None, **kwargs): + if not is_secure_transport(uri): + raise InsecureTransportError() + + scope = self.scope if scope is None else scope + params = [(('client_id', self.client_id)), (('grant_type', self.grant_type))] + + if self.client_secret is not None: + params.append(('client_secret', self.client_secret)) + + if scope: + params.append(('scope', list_to_scope(scope))) + + for k in kwargs: + if kwargs[k]: + params.append((str(k), kwargs[k])) + + return add_params_to_uri(uri, params) + + def prepare_request_body(self, device_code, body='', scope=None, + include_client_id=False, **kwargs): + """Add device_code to request body + + The client makes a request to the token endpoint by adding the + device_code as a parameter using the + "application/x-www-form-urlencoded" format to the HTTP request + body. + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra parameters. Default ''. + :param scope: The scope of the access request as described by + `Section 3.3`_. + + :param include_client_id: `True` to send the `client_id` in the + body of the upstream request. This is required + if the client is not authenticating with the + authorization server as described in + `Section 3.2.1`_. False otherwise (default). + :type include_client_id: Boolean + + :param kwargs: Extra credentials to include in the token request. + + The prepared body will include all provided device_code as well as + the ``grant_type`` parameter set to + ``urn:ietf:params:oauth:grant-type:device_code``:: + + >>> from oauthlib.oauth2 import DeviceClient + >>> client = DeviceClient('your_id', 'your_code') + >>> client.prepare_request_body(scope=['hello', 'world']) + 'grant_type=urn:ietf:params:oauth:grant-type:device_code&scope=hello+world' + + .. _`Section 3.2.1`: https://datatracker.ietf.org/doc/html/rfc6749#section-3.2.1 + .. _`Section 3.3`: https://datatracker.ietf.org/doc/html/rfc6749#section-3.3 + .. _`Section 3.4`: https://datatracker.ietf.org/doc/html/rfc8628#section-3.4 + """ + + kwargs['client_id'] = self.client_id + kwargs['include_client_id'] = include_client_id + scope = self.scope if scope is None else scope + return prepare_token_request(self.grant_type, body=body, device_code=device_code, + scope=scope, **kwargs) diff --git a/libs/common/oauthlib/openid/__init__.py b/libs/common/oauthlib/openid/__init__.py index 8157c297..e3174374 100644 --- a/libs/common/oauthlib/openid/__init__.py +++ b/libs/common/oauthlib/openid/__init__.py @@ -1,11 +1,7 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid ~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - -from .connect.core.endpoints import Server -from .connect.core.endpoints import UserInfoEndpoint +from .connect.core.endpoints import Server, UserInfoEndpoint from .connect.core.request_validator import RequestValidator diff --git a/libs/common/oauthlib/openid/connect/core/endpoints/__init__.py b/libs/common/oauthlib/openid/connect/core/endpoints/__init__.py index 528841f4..7017ff4f 100644 --- a/libs/common/oauthlib/openid/connect/core/endpoints/__init__.py +++ b/libs/common/oauthlib/openid/connect/core/endpoints/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.oopenid.core ~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,7 +5,5 @@ oauthlib.oopenid.core This module is an implementation of various logic needed for consuming and providing OpenID Connect """ -from __future__ import absolute_import, unicode_literals - from .pre_configured import Server from .userinfo import UserInfoEndpoint diff --git a/libs/common/oauthlib/openid/connect/core/endpoints/pre_configured.py b/libs/common/oauthlib/openid/connect/core/endpoints/pre_configured.py index fde2739e..8ce8bee6 100644 --- a/libs/common/oauthlib/openid/connect/core/endpoints/pre_configured.py +++ b/libs/common/oauthlib/openid/connect/core/endpoints/pre_configured.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid.connect.core.endpoints.pre_configured ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,32 +5,21 @@ oauthlib.openid.connect.core.endpoints.pre_configured This module is an implementation of various endpoints needed for providing OpenID Connect servers. """ -from __future__ import absolute_import, unicode_literals - from oauthlib.oauth2.rfc6749.endpoints import ( - AuthorizationEndpoint, - IntrospectEndpoint, - ResourceEndpoint, - RevocationEndpoint, - TokenEndpoint + AuthorizationEndpoint, IntrospectEndpoint, ResourceEndpoint, + RevocationEndpoint, TokenEndpoint, ) from oauthlib.oauth2.rfc6749.grant_types import ( AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant, - ImplicitGrant as OAuth2ImplicitGrant, - ClientCredentialsGrant, - RefreshTokenGrant, - ResourceOwnerPasswordCredentialsGrant + ClientCredentialsGrant, ImplicitGrant as OAuth2ImplicitGrant, + RefreshTokenGrant, ResourceOwnerPasswordCredentialsGrant, ) from oauthlib.oauth2.rfc6749.tokens import BearerToken -from ..grant_types import ( - AuthorizationCodeGrant, - ImplicitGrant, - HybridGrant, -) + +from ..grant_types import AuthorizationCodeGrant, HybridGrant, ImplicitGrant from ..grant_types.dispatchers import ( - AuthorizationCodeGrantDispatcher, + AuthorizationCodeGrantDispatcher, AuthorizationTokenGrantDispatcher, ImplicitTokenGrantDispatcher, - AuthorizationTokenGrantDispatcher ) from ..tokens import JWTToken from .userinfo import UserInfoEndpoint @@ -58,52 +46,52 @@ class Server(AuthorizationEndpoint, IntrospectEndpoint, TokenEndpoint, :param kwargs: Extra parameters to pass to authorization-, token-, resource-, and revocation-endpoint constructors. """ - auth_grant = OAuth2AuthorizationCodeGrant(request_validator) - implicit_grant = OAuth2ImplicitGrant(request_validator) - password_grant = ResourceOwnerPasswordCredentialsGrant( + self.auth_grant = OAuth2AuthorizationCodeGrant(request_validator) + self.implicit_grant = OAuth2ImplicitGrant(request_validator) + self.password_grant = ResourceOwnerPasswordCredentialsGrant( request_validator) - credentials_grant = ClientCredentialsGrant(request_validator) - refresh_grant = RefreshTokenGrant(request_validator) - openid_connect_auth = AuthorizationCodeGrant(request_validator) - openid_connect_implicit = ImplicitGrant(request_validator) - openid_connect_hybrid = HybridGrant(request_validator) + self.credentials_grant = ClientCredentialsGrant(request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) + self.openid_connect_auth = AuthorizationCodeGrant(request_validator) + self.openid_connect_implicit = ImplicitGrant(request_validator) + self.openid_connect_hybrid = HybridGrant(request_validator) - bearer = BearerToken(request_validator, token_generator, + self.bearer = BearerToken(request_validator, token_generator, token_expires_in, refresh_token_generator) - jwt = JWTToken(request_validator, token_generator, + self.jwt = JWTToken(request_validator, token_generator, token_expires_in, refresh_token_generator) - auth_grant_choice = AuthorizationCodeGrantDispatcher(default_grant=auth_grant, oidc_grant=openid_connect_auth) - implicit_grant_choice = ImplicitTokenGrantDispatcher(default_grant=implicit_grant, oidc_grant=openid_connect_implicit) + self.auth_grant_choice = AuthorizationCodeGrantDispatcher(default_grant=self.auth_grant, oidc_grant=self.openid_connect_auth) + self.implicit_grant_choice = ImplicitTokenGrantDispatcher(default_grant=self.implicit_grant, oidc_grant=self.openid_connect_implicit) # See http://openid.net/specs/oauth-v2-multiple-response-types-1_0.html#Combinations for valid combinations # internally our AuthorizationEndpoint will ensure they can appear in any order for any valid combination AuthorizationEndpoint.__init__(self, default_response_type='code', response_types={ - 'code': auth_grant_choice, - 'token': implicit_grant_choice, - 'id_token': openid_connect_implicit, - 'id_token token': openid_connect_implicit, - 'code token': openid_connect_hybrid, - 'code id_token': openid_connect_hybrid, - 'code id_token token': openid_connect_hybrid, - 'none': auth_grant + 'code': self.auth_grant_choice, + 'token': self.implicit_grant_choice, + 'id_token': self.openid_connect_implicit, + 'id_token token': self.openid_connect_implicit, + 'code token': self.openid_connect_hybrid, + 'code id_token': self.openid_connect_hybrid, + 'code id_token token': self.openid_connect_hybrid, + 'none': self.auth_grant }, - default_token_type=bearer) + default_token_type=self.bearer) - token_grant_choice = AuthorizationTokenGrantDispatcher(request_validator, default_grant=auth_grant, oidc_grant=openid_connect_auth) + self.token_grant_choice = AuthorizationTokenGrantDispatcher(request_validator, default_grant=self.auth_grant, oidc_grant=self.openid_connect_auth) TokenEndpoint.__init__(self, default_grant_type='authorization_code', grant_types={ - 'authorization_code': token_grant_choice, - 'password': password_grant, - 'client_credentials': credentials_grant, - 'refresh_token': refresh_grant, + 'authorization_code': self.token_grant_choice, + 'password': self.password_grant, + 'client_credentials': self.credentials_grant, + 'refresh_token': self.refresh_grant, }, - default_token_type=bearer) + default_token_type=self.bearer) ResourceEndpoint.__init__(self, default_token='Bearer', - token_types={'Bearer': bearer, 'JWT': jwt}) + token_types={'Bearer': self.bearer, 'JWT': self.jwt}) RevocationEndpoint.__init__(self, request_validator) IntrospectEndpoint.__init__(self, request_validator) UserInfoEndpoint.__init__(self, request_validator) diff --git a/libs/common/oauthlib/openid/connect/core/endpoints/userinfo.py b/libs/common/oauthlib/openid/connect/core/endpoints/userinfo.py index 7a39f76b..7aa2bbe9 100644 --- a/libs/common/oauthlib/openid/connect/core/endpoints/userinfo.py +++ b/libs/common/oauthlib/openid/connect/core/endpoints/userinfo.py @@ -4,18 +4,15 @@ oauthlib.openid.connect.core.endpoints.userinfo This module is an implementation of userinfo endpoint. """ -from __future__ import absolute_import, unicode_literals - import json import logging from oauthlib.common import Request -from oauthlib.common import unicode_type -from oauthlib.oauth2.rfc6749.endpoints.base import BaseEndpoint -from oauthlib.oauth2.rfc6749.endpoints.base import catch_errors_and_unavailability -from oauthlib.oauth2.rfc6749.tokens import BearerToken from oauthlib.oauth2.rfc6749 import errors - +from oauthlib.oauth2.rfc6749.endpoints.base import ( + BaseEndpoint, catch_errors_and_unavailability, +) +from oauthlib.oauth2.rfc6749.tokens import BearerToken log = logging.getLogger(__name__) @@ -55,7 +52,7 @@ class UserInfoEndpoint(BaseEndpoint): log.error('Userinfo MUST have "sub" for %r.', request) raise errors.ServerError(status_code=500) body = json.dumps(claims) - elif isinstance(claims, unicode_type): + elif isinstance(claims, str): resp_headers = { 'Content-Type': 'application/jwt' } @@ -72,7 +69,7 @@ class UserInfoEndpoint(BaseEndpoint): 5.3.1. UserInfo Request The Client sends the UserInfo Request using either HTTP GET or HTTP POST. The Access Token obtained from an OpenID Connect Authentication - Request MUST be sent as a Bearer Token, per Section 2 of OAuth 2.0 + Request MUST be sent as a Bearer Token, per `Section 2`_ of OAuth 2.0 Bearer Token Usage [RFC6750]. It is RECOMMENDED that the request use the HTTP GET method and the @@ -80,21 +77,28 @@ class UserInfoEndpoint(BaseEndpoint): The following is a non-normative example of a UserInfo Request: - GET /userinfo HTTP/1.1 - Host: server.example.com - Authorization: Bearer SlAV32hkKG + .. code-block:: http + + GET /userinfo HTTP/1.1 + Host: server.example.com + Authorization: Bearer SlAV32hkKG 5.3.3. UserInfo Error Response When an error condition occurs, the UserInfo Endpoint returns an Error - Response as defined in Section 3 of OAuth 2.0 Bearer Token Usage + Response as defined in `Section 3`_ of OAuth 2.0 Bearer Token Usage [RFC6750]. (HTTP errors unrelated to RFC 6750 are returned to the User Agent using the appropriate HTTP status code.) The following is a non-normative example of a UserInfo Error Response: - HTTP/1.1 401 Unauthorized - WWW-Authenticate: Bearer error="invalid_token", + .. code-block:: http + + HTTP/1.1 401 Unauthorized + WWW-Authenticate: Bearer error="invalid_token", error_description="The Access Token expired" + + .. _`Section 2`: https://datatracker.ietf.org/doc/html/rfc6750#section-2 + .. _`Section 3`: https://datatracker.ietf.org/doc/html/rfc6750#section-3 """ if not self.bearer.validate_request(request): raise errors.InvalidTokenError() diff --git a/libs/common/oauthlib/openid/connect/core/exceptions.py b/libs/common/oauthlib/openid/connect/core/exceptions.py index 8b08d21f..099b84e2 100644 --- a/libs/common/oauthlib/openid/connect/core/exceptions.py +++ b/libs/common/oauthlib/openid/connect/core/exceptions.py @@ -1,4 +1,3 @@ -# coding=utf-8 """ oauthlib.oauth2.rfc6749.errors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6,8 +5,6 @@ oauthlib.oauth2.rfc6749.errors Error used both by OAuth 2 clients and providers to represent the spec defined error responses for all four core grant types. """ -from __future__ import unicode_literals - from oauthlib.oauth2.rfc6749.errors import FatalClientError, OAuth2Error diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/__init__.py b/libs/common/oauthlib/openid/connect/core/grant_types/__init__.py index 63f30ac9..8dad5f60 100644 --- a/libs/common/oauthlib/openid/connect/core/grant_types/__init__.py +++ b/libs/common/oauthlib/openid/connect/core/grant_types/__init__.py @@ -1,17 +1,13 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid.connect.core.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import unicode_literals, absolute_import - from .authorization_code import AuthorizationCodeGrant -from .implicit import ImplicitGrant from .base import GrantTypeBase -from .hybrid import HybridGrant -from .exceptions import OIDCNoPrompt from .dispatchers import ( - AuthorizationCodeGrantDispatcher, + AuthorizationCodeGrantDispatcher, AuthorizationTokenGrantDispatcher, ImplicitTokenGrantDispatcher, - AuthorizationTokenGrantDispatcher ) +from .hybrid import HybridGrant +from .implicit import ImplicitGrant +from .refresh_token import RefreshTokenGrant diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/authorization_code.py b/libs/common/oauthlib/openid/connect/core/grant_types/authorization_code.py index becfcfab..6b2dcc3b 100644 --- a/libs/common/oauthlib/openid/connect/core/grant_types/authorization_code.py +++ b/libs/common/oauthlib/openid/connect/core/grant_types/authorization_code.py @@ -1,13 +1,12 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid.connect.core.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging -from oauthlib.oauth2.rfc6749.grant_types.authorization_code import AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant +from oauthlib.oauth2.rfc6749.grant_types.authorization_code import ( + AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant, +) from .base import GrantTypeBase @@ -41,4 +40,4 @@ class AuthorizationCodeGrant(GrantTypeBase): request.redirect_uri, request ) - return super(AuthorizationCodeGrant, self).add_id_token(token, token_handler, request, nonce=nonce) + return super().add_id_token(token, token_handler, request, nonce=nonce) diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/base.py b/libs/common/oauthlib/openid/connect/core/grant_types/base.py index 32a21b62..33411dad 100644 --- a/libs/common/oauthlib/openid/connect/core/grant_types/base.py +++ b/libs/common/oauthlib/openid/connect/core/grant_types/base.py @@ -1,17 +1,17 @@ -from .exceptions import OIDCNoPrompt - import base64 import hashlib import logging import time from json import loads -from oauthlib.oauth2.rfc6749.errors import ConsentRequired, InvalidRequestError, LoginRequired +from oauthlib.oauth2.rfc6749.errors import ( + ConsentRequired, InvalidRequestError, LoginRequired, +) log = logging.getLogger(__name__) -class GrantTypeBase(object): +class GrantTypeBase: # Just proxy the majority of method calls through to the # proxy_target grant type handler, which will usually be either @@ -20,7 +20,7 @@ class GrantTypeBase(object): return getattr(self.proxy_target, attr) def __setattr__(self, attr, value): - proxied_attrs = set(('refresh_token', 'response_types')) + proxied_attrs = {'refresh_token', 'response_types'} if attr in proxied_attrs: setattr(self.proxy_target, attr, value) else: @@ -31,13 +31,7 @@ class GrantTypeBase(object): :returns: (list of scopes, dict of request info) """ - # If request.prompt is 'none' then no login/authorization form should - # be presented to the user. Instead, a silent login/authorization - # should be performed. - if request.prompt == 'none': - raise OIDCNoPrompt() - else: - return self.proxy_target.validate_authorization_request(request) + return self.proxy_target.validate_authorization_request(request) def _inflate_claims(self, request): # this may be called multiple times in a single request so make sure we only de-serialize the claims once diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/dispatchers.py b/libs/common/oauthlib/openid/connect/core/grant_types/dispatchers.py index be8e2f3c..5aa7d469 100644 --- a/libs/common/oauthlib/openid/connect/core/grant_types/dispatchers.py +++ b/libs/common/oauthlib/openid/connect/core/grant_types/dispatchers.py @@ -1,16 +1,19 @@ import logging + log = logging.getLogger(__name__) -class Dispatcher(object): +class Dispatcher: default_grant = None oidc_grant = None class AuthorizationCodeGrantDispatcher(Dispatcher): """ - This is an adapter class that will route simple Authorization Code requests, those that have response_type=code and a scope - including 'openid' to either the default_grant or the oidc_grant based on the scopes requested. + This is an adapter class that will route simple Authorization Code + requests, those that have `response_type=code` and a scope including + `openid` to either the `default_grant` or the `oidc_grant` based on + the scopes requested. """ def __init__(self, default_grant=None, oidc_grant=None): self.default_grant = default_grant @@ -26,16 +29,20 @@ class AuthorizationCodeGrantDispatcher(Dispatcher): return handler def create_authorization_response(self, request, token_handler): + """Read scope and route to the designated handler.""" return self._handler_for_request(request).create_authorization_response(request, token_handler) def validate_authorization_request(self, request): + """Read scope and route to the designated handler.""" return self._handler_for_request(request).validate_authorization_request(request) class ImplicitTokenGrantDispatcher(Dispatcher): """ - This is an adapter class that will route simple Authorization Code requests, those that have response_type=code and a scope - including 'openid' to either the default_grant or the oidc_grant based on the scopes requested. + This is an adapter class that will route simple Authorization + requests, those that have `id_token` in `response_type` and a scope + including `openid` to either the `default_grant` or the `oidc_grant` + based on the scopes requested. """ def __init__(self, default_grant=None, oidc_grant=None): self.default_grant = default_grant @@ -51,9 +58,11 @@ class ImplicitTokenGrantDispatcher(Dispatcher): return handler def create_authorization_response(self, request, token_handler): + """Read scope and route to the designated handler.""" return self._handler_for_request(request).create_authorization_response(request, token_handler) def validate_authorization_request(self, request): + """Read scope and route to the designated handler.""" return self._handler_for_request(request).validate_authorization_request(request) @@ -75,7 +84,7 @@ class AuthorizationTokenGrantDispatcher(Dispatcher): code = parameters.get('code', None) redirect_uri = parameters.get('redirect_uri', None) - # If code is not pressent fallback to `default_grant` wich will + # If code is not present fallback to `default_grant` which will # raise an error for the missing `code` in `create_token_response` step. if code: scopes = self.request_validator.get_authorization_code_scopes(client_id, code, redirect_uri, request) @@ -87,5 +96,6 @@ class AuthorizationTokenGrantDispatcher(Dispatcher): return handler def create_token_response(self, request, token_handler): + """Read scope and route to the designated handler.""" handler = self._handler_for_request(request) return handler.create_token_response(request, token_handler) diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/exceptions.py b/libs/common/oauthlib/openid/connect/core/grant_types/exceptions.py deleted file mode 100644 index 809f1b3b..00000000 --- a/libs/common/oauthlib/openid/connect/core/grant_types/exceptions.py +++ /dev/null @@ -1,32 +0,0 @@ -class OIDCNoPrompt(Exception): - """Exception used to inform users that no explicit authorization is needed. - - Normally users authorize requests after validation of the request is done. - Then post-authorization validation is again made and a response containing - an auth code or token is created. However, when OIDC clients request - no prompting of user authorization the final response is created directly. - - Example (without the shortcut for no prompt) - - scopes, req_info = endpoint.validate_authorization_request(url, ...) - authorization_view = create_fancy_auth_form(scopes, req_info) - return authorization_view - - Example (with the no prompt shortcut) - try: - scopes, req_info = endpoint.validate_authorization_request(url, ...) - authorization_view = create_fancy_auth_form(scopes, req_info) - return authorization_view - except OIDCNoPrompt: - # Note: Location will be set for you - headers, body, status = endpoint.create_authorization_response(url, ...) - redirect_view = create_redirect(headers, body, status) - return redirect_view - """ - - def __init__(self): - msg = ("OIDC request for no user interaction received. Do not ask user " - "for authorization, it should been done using silent " - "authentication through create_authorization_response. " - "See OIDCNoPrompt.__doc__ for more details.") - super(OIDCNoPrompt, self).__init__(msg) diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/hybrid.py b/libs/common/oauthlib/openid/connect/core/grant_types/hybrid.py index 685fa08a..7cb0758b 100644 --- a/libs/common/oauthlib/openid/connect/core/grant_types/hybrid.py +++ b/libs/common/oauthlib/openid/connect/core/grant_types/hybrid.py @@ -1,17 +1,16 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid.connect.core.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging -from oauthlib.oauth2.rfc6749.grant_types.authorization_code import AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant from oauthlib.oauth2.rfc6749.errors import InvalidRequestError +from oauthlib.oauth2.rfc6749.grant_types.authorization_code import ( + AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant, +) -from .base import GrantTypeBase from ..request_validator import RequestValidator +from .base import GrantTypeBase log = logging.getLogger(__name__) @@ -36,10 +35,13 @@ class HybridGrant(GrantTypeBase): self.register_code_modifier(self.add_id_token) self.register_token_modifier(self.add_id_token) + def add_id_token(self, token, token_handler, request): + return super().add_id_token(token, token_handler, request, nonce=request.nonce) + def openid_authorization_validator(self, request): """Additional validation when following the Authorization Code flow. """ - request_info = super(HybridGrant, self).openid_authorization_validator(request) + request_info = super().openid_authorization_validator(request) if not request_info: # returns immediately if OAuth2.0 return request_info diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/implicit.py b/libs/common/oauthlib/openid/connect/core/grant_types/implicit.py index c2dbc278..a4fe6049 100644 --- a/libs/common/oauthlib/openid/connect/core/grant_types/implicit.py +++ b/libs/common/oauthlib/openid/connect/core/grant_types/implicit.py @@ -1,16 +1,15 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid.connect.core.grant_types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging -from .base import GrantTypeBase - -from oauthlib.oauth2.rfc6749.grant_types.implicit import ImplicitGrant as OAuth2ImplicitGrant from oauthlib.oauth2.rfc6749.errors import InvalidRequestError +from oauthlib.oauth2.rfc6749.grant_types.implicit import ( + ImplicitGrant as OAuth2ImplicitGrant, +) + +from .base import GrantTypeBase log = logging.getLogger(__name__) @@ -29,12 +28,12 @@ class ImplicitGrant(GrantTypeBase): def add_id_token(self, token, token_handler, request): if 'state' not in token and request.state: token['state'] = request.state - return super(ImplicitGrant, self).add_id_token(token, token_handler, request, nonce=request.nonce) + return super().add_id_token(token, token_handler, request, nonce=request.nonce) def openid_authorization_validator(self, request): """Additional validation when following the implicit flow. """ - request_info = super(ImplicitGrant, self).openid_authorization_validator(request) + request_info = super().openid_authorization_validator(request) if not request_info: # returns immediately if OAuth2.0 return request_info diff --git a/libs/common/oauthlib/openid/connect/core/grant_types/refresh_token.py b/libs/common/oauthlib/openid/connect/core/grant_types/refresh_token.py new file mode 100644 index 00000000..43e4499c --- /dev/null +++ b/libs/common/oauthlib/openid/connect/core/grant_types/refresh_token.py @@ -0,0 +1,34 @@ +""" +oauthlib.openid.connect.core.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +from oauthlib.oauth2.rfc6749.grant_types.refresh_token import ( + RefreshTokenGrant as OAuth2RefreshTokenGrant, +) + +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class RefreshTokenGrant(GrantTypeBase): + + def __init__(self, request_validator=None, **kwargs): + self.proxy_target = OAuth2RefreshTokenGrant( + request_validator=request_validator, **kwargs) + self.register_token_modifier(self.add_id_token) + + def add_id_token(self, token, token_handler, request): + """ + Construct an initial version of id_token, and let the + request_validator sign or encrypt it. + + The authorization_code version of this method is used to + retrieve the nonce accordingly to the code storage. + """ + if not self.request_validator.refresh_id_token(request): + return token + + return super().add_id_token(token, token_handler, request) diff --git a/libs/common/oauthlib/openid/connect/core/request_validator.py b/libs/common/oauthlib/openid/connect/core/request_validator.py index e853d39c..47c4cd94 100644 --- a/libs/common/oauthlib/openid/connect/core/request_validator.py +++ b/libs/common/oauthlib/openid/connect/core/request_validator.py @@ -1,13 +1,12 @@ -# -*- coding: utf-8 -*- """ oauthlib.openid.connect.core.request_validator ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import absolute_import, unicode_literals - import logging -from oauthlib.oauth2.rfc6749.request_validator import RequestValidator as OAuth2RequestValidator +from oauthlib.oauth2.rfc6749.request_validator import ( + RequestValidator as OAuth2RequestValidator, +) log = logging.getLogger(__name__) @@ -307,3 +306,15 @@ class RequestValidator(OAuth2RequestValidator): Method is used by: UserInfoEndpoint """ + + def refresh_id_token(self, request): + """Whether the id token should be refreshed. Default, True + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + RefreshTokenGrant + """ + return True diff --git a/libs/common/oauthlib/openid/connect/core/tokens.py b/libs/common/oauthlib/openid/connect/core/tokens.py index b67cdf2a..936ab52e 100644 --- a/libs/common/oauthlib/openid/connect/core/tokens.py +++ b/libs/common/oauthlib/openid/connect/core/tokens.py @@ -4,10 +4,9 @@ authlib.openid.connect.core.tokens This module contains methods for adding JWT tokens to requests. """ -from __future__ import absolute_import, unicode_literals - - -from oauthlib.oauth2.rfc6749.tokens import TokenBase, random_token_generator +from oauthlib.oauth2.rfc6749.tokens import ( + TokenBase, get_token_from_header, random_token_generator, +) class JWTToken(TokenBase): @@ -38,17 +37,12 @@ class JWTToken(TokenBase): return self.request_validator.get_jwt_bearer_token(None, None, request) def validate_request(self, request): - token = None - if 'Authorization' in request.headers: - token = request.headers.get('Authorization')[7:] - else: - token = request.access_token + token = get_token_from_header(request) return self.request_validator.validate_jwt_bearer_token( token, request.scopes, request) def estimate_type(self, request): - token = request.headers.get('Authorization', '')[7:] - if token.startswith('ey') and token.count('.') in (2, 4): + token = get_token_from_header(request) + if token and token.startswith('ey') and token.count('.') in (2, 4): return 10 - else: - return 0 + return 0 diff --git a/libs/common/oauthlib/signals.py b/libs/common/oauthlib/signals.py index 22d47a41..8fd347a5 100644 --- a/libs/common/oauthlib/signals.py +++ b/libs/common/oauthlib/signals.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Implements signals based on blinker if available, otherwise falls silently back to a noop. Shamelessly stolen from flask.signals: @@ -9,11 +8,11 @@ try: from blinker import Namespace signals_available = True except ImportError: # noqa - class Namespace(object): + class Namespace: def signal(self, name, doc=None): return _FakeSignal(name, doc) - class _FakeSignal(object): + class _FakeSignal: """If blinker is unavailable, create a fake class with the same interface that allows sending of signals but will fail with an error on anything else. Instead of doing anything on send, it diff --git a/libs/common/oauthlib/uri_validate.py b/libs/common/oauthlib/uri_validate.py index ce8ea402..a6fe0fb2 100644 --- a/libs/common/oauthlib/uri_validate.py +++ b/libs/common/oauthlib/uri_validate.py @@ -8,8 +8,6 @@ They should be processed with re.VERBOSE. Thanks Mark Nottingham for this code - https://gist.github.com/138549 """ -from __future__ import unicode_literals - import re # basics @@ -67,32 +65,8 @@ dec_octet = r"""(?: %(DIGIT)s | IPv4address = r"%(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s" % locals( ) -# h16 = 1*4HEXDIG -h16 = r"(?: %(HEXDIG)s ){1,4}" % locals() - -# ls32 = ( h16 ":" h16 ) / IPv4address -ls32 = r"(?: (?: %(h16)s : %(h16)s ) | %(IPv4address)s )" % locals() - -# IPv6address = 6( h16 ":" ) ls32 -# / "::" 5( h16 ":" ) ls32 -# / [ h16 ] "::" 4( h16 ":" ) ls32 -# / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 -# / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 -# / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 -# / [ *4( h16 ":" ) h16 ] "::" ls32 -# / [ *5( h16 ":" ) h16 ] "::" h16 -# / [ *6( h16 ":" ) h16 ] "::" -IPv6address = r"""(?: (?: %(h16)s : ){6} %(ls32)s | - :: (?: %(h16)s : ){5} %(ls32)s | - %(h16)s :: (?: %(h16)s : ){4} %(ls32)s | - (?: %(h16)s : ) %(h16)s :: (?: %(h16)s : ){3} %(ls32)s | - (?: %(h16)s : ){2} %(h16)s :: (?: %(h16)s : ){2} %(ls32)s | - (?: %(h16)s : ){3} %(h16)s :: %(h16)s : %(ls32)s | - (?: %(h16)s : ){4} %(h16)s :: %(ls32)s | - (?: %(h16)s : ){5} %(h16)s :: %(h16)s | - (?: %(h16)s : ){6} %(h16)s :: - ) -""" % locals() +# IPv6address +IPv6address = r"([A-Fa-f0-9:]+[:$])[A-Fa-f0-9]{1,4}" # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) IPvFuture = r"v %(HEXDIG)s+ \. (?: %(unreserved)s | %(sub_delims)s | : )+" % locals() diff --git a/libs/common/pkg_resources/__init__.py b/libs/common/pkg_resources/__init__.py index 2f5aa64a..d59226af 100644 --- a/libs/common/pkg_resources/__init__.py +++ b/libs/common/pkg_resources/__init__.py @@ -1,4 +1,3 @@ -# coding: utf-8 """ Package resource API -------------------- @@ -15,8 +14,6 @@ The package resource API is designed to work with normal filesystem packages, method. """ -from __future__ import absolute_import - import sys import os import io @@ -41,6 +38,7 @@ import itertools import inspect import ntpath import posixpath +import importlib from pkgutil import get_importer try: @@ -54,9 +52,6 @@ try: except NameError: FileExistsError = OSError -from pkg_resources.extern import six -from pkg_resources.extern.six.moves import urllib, map, filter - # capture these to bypass sandboxing from os import utime try: @@ -76,26 +71,23 @@ try: except ImportError: importlib_machinery = None -from . import py31compat +from pkg_resources.extern.jaraco.text import ( + yield_lines, + drop_comment, + join_continuation, +) + from pkg_resources.extern import appdirs from pkg_resources.extern import packaging __import__('pkg_resources.extern.packaging.version') __import__('pkg_resources.extern.packaging.specifiers') __import__('pkg_resources.extern.packaging.requirements') __import__('pkg_resources.extern.packaging.markers') +__import__('pkg_resources.extern.packaging.utils') - -__metaclass__ = type - - -if (3, 0) < sys.version_info < (3, 5): +if sys.version_info < (3, 5): raise RuntimeError("Python 3.5 or later is required") -if six.PY2: - # Those builtin exceptions are only defined in Python 3 - PermissionError = None - NotADirectoryError = None - # declare some globals that will be defined later to # satisfy the linters. require = None @@ -128,6 +120,11 @@ def parse_version(v): try: return packaging.version.Version(v) except packaging.version.InvalidVersion: + warnings.warn( + f"{v} is an invalid version and will not be supported in " + "a future release", + PkgResourcesDeprecationWarning, + ) return packaging.version.LegacyVersion(v) @@ -178,10 +175,10 @@ def get_supported_platform(): """Return this platform's maximum compatible version. distutils.util.get_platform() normally reports the minimum version - of Mac OS X that would be required to *use* extensions produced by + of macOS that would be required to *use* extensions produced by distutils. But what we want when checking compatibility is to know the - version of Mac OS X that we are *running*. To allow usage of packages that - explicitly require a newer version of Mac OS X, we must also know the + version of macOS that we are *running*. To allow usage of packages that + explicitly require a newer version of macOS, we must also know the current version of the OS. If this condition occurs for any other platform with a version in its @@ -191,9 +188,9 @@ def get_supported_platform(): m = macosVersionString.match(plat) if m is not None and sys.platform == "darwin": try: - plat = 'macosx-%s-%s' % ('.'.join(_macosx_vers()[:2]), m.group(3)) + plat = 'macosx-%s-%s' % ('.'.join(_macos_vers()[:2]), m.group(3)) except ValueError: - # not Mac OS X + # not macOS pass return plat @@ -364,7 +361,7 @@ def get_provider(moduleOrReq): return _find_adapter(_provider_factories, loader)(module) -def _macosx_vers(_cache=[]): +def _macos_vers(_cache=[]): if not _cache: version = platform.mac_ver()[0] # fallback for MacPorts @@ -380,7 +377,7 @@ def _macosx_vers(_cache=[]): return _cache[0] -def _macosx_arch(machine): +def _macos_arch(machine): return {'PowerPC': 'ppc', 'Power_Macintosh': 'ppc'}.get(machine, machine) @@ -388,18 +385,18 @@ def get_build_platform(): """Return this platform's string for platform-specific distributions XXX Currently this is the same as ``distutils.util.get_platform()``, but it - needs some hacks for Linux and Mac OS X. + needs some hacks for Linux and macOS. """ from sysconfig import get_platform plat = get_platform() if sys.platform == "darwin" and not plat.startswith('macosx-'): try: - version = _macosx_vers() + version = _macos_vers() machine = os.uname()[4].replace(" ", "_") return "macosx-%d.%d-%s" % ( int(version[0]), int(version[1]), - _macosx_arch(machine), + _macos_arch(machine), ) except ValueError: # if someone is running a non-Mac darwin system, this will fall @@ -425,7 +422,7 @@ def compatible_platforms(provided, required): # easy case return True - # Mac OS X special cases + # macOS special cases reqMac = macosVersionString.match(required) if reqMac: provMac = macosVersionString.match(provided) @@ -434,7 +431,7 @@ def compatible_platforms(provided, required): if not provMac: # this is backwards compatibility for packages built before # setuptools 0.6. All packages built after this point will - # use the new macosx designation. + # use the new macOS designation. provDarwin = darwinVersionString.match(provided) if provDarwin: dversion = int(provDarwin.group(1)) @@ -442,7 +439,7 @@ def compatible_platforms(provided, required): if dversion == 7 and macosversion >= "10.3" or \ dversion == 8 and macosversion >= "10.4": return True - # egg isn't macosx or legacy darwin + # egg isn't macOS or legacy darwin return False # are they the same major version and machine type? @@ -475,7 +472,7 @@ run_main = run_script def get_distribution(dist): """Return a current distribution object for a Requirement or string""" - if isinstance(dist, six.string_types): + if isinstance(dist, str): dist = Requirement.parse(dist) if isinstance(dist, Requirement): dist = get_provider(dist) @@ -558,6 +555,7 @@ class WorkingSet: self.entries = [] self.entry_keys = {} self.by_key = {} + self.normalized_to_canonical_keys = {} self.callbacks = [] if entries is None: @@ -638,6 +636,14 @@ class WorkingSet: is returned. """ dist = self.by_key.get(req.key) + + if dist is None: + canonical_key = self.normalized_to_canonical_keys.get(req.key) + + if canonical_key is not None: + req.key = canonical_key + dist = self.by_key.get(canonical_key) + if dist is not None and dist not in req: # XXX add more info raise VersionConflict(dist, req) @@ -706,13 +712,16 @@ class WorkingSet: return self.by_key[dist.key] = dist + normalized_name = packaging.utils.canonicalize_name(dist.key) + self.normalized_to_canonical_keys[normalized_name] = dist.key if dist.key not in keys: keys.append(dist.key) if dist.key not in keys2: keys2.append(dist.key) self._added_new(dist) - def resolve(self, requirements, env=None, installer=None, + # FIXME: 'WorkingSet.resolve' is too complex (11) + def resolve(self, requirements, env=None, installer=None, # noqa: C901 replace_conflicting=False, extras=None): """List all distributions needed to (recursively) meet `requirements` @@ -925,14 +934,15 @@ class WorkingSet: def __getstate__(self): return ( self.entries[:], self.entry_keys.copy(), self.by_key.copy(), - self.callbacks[:] + self.normalized_to_canonical_keys.copy(), self.callbacks[:] ) - def __setstate__(self, e_k_b_c): - entries, keys, by_key, callbacks = e_k_b_c + def __setstate__(self, e_k_b_n_c): + entries, keys, by_key, normalized_to_canonical_keys, callbacks = e_k_b_n_c self.entries = entries[:] self.entry_keys = keys.copy() self.by_key = by_key.copy() + self.normalized_to_canonical_keys = normalized_to_canonical_keys.copy() self.callbacks = callbacks[:] @@ -1234,12 +1244,13 @@ class ResourceManager: mode = os.stat(path).st_mode if mode & stat.S_IWOTH or mode & stat.S_IWGRP: msg = ( - "%s is writable by group/others and vulnerable to attack " - "when " - "used with get_resource_filename. Consider a more secure " + "Extraction path is writable by group/others " + "and vulnerable to attack when " + "used with get_resource_filename ({path}). " + "Consider a more secure " "location (set with .set_extraction_path or the " - "PYTHON_EGG_CACHE environment variable)." % path - ) + "PYTHON_EGG_CACHE environment variable)." + ).format(**locals()) warnings.warn(msg, UserWarning) def postprocess(self, tempname, filename): @@ -1377,7 +1388,7 @@ def evaluate_marker(text, extra=None): marker = packaging.markers.Marker(text) return marker.evaluate() except packaging.markers.InvalidMarker as e: - raise SyntaxError(e) + raise SyntaxError(e) from e class NullProvider: @@ -1418,8 +1429,6 @@ class NullProvider: return "" path = self._get_metadata_path(name) value = self._get(path) - if six.PY2: - return value try: return value.decode('utf-8') except UnicodeDecodeError as exc: @@ -1457,7 +1466,8 @@ class NullProvider: script_filename = self._fn(self.egg_info, script) namespace['__file__'] = script_filename if os.path.exists(script_filename): - source = open(script_filename).read() + with open(script_filename) as fid: + source = fid.read() code = compile(source, script_filename, 'exec') exec(code, namespace, namespace) else: @@ -1493,7 +1503,7 @@ class NullProvider: def _validate_resource_path(path): """ Validate the resource paths according to the docs. - https://setuptools.readthedocs.io/en/latest/pkg_resources.html#basic-resource-access + https://setuptools.pypa.io/en/latest/pkg_resources.html#basic-resource-access >>> warned = getfixture('recwarn') >>> warnings.simplefilter('always') @@ -1575,26 +1585,35 @@ is not allowed. register_loader_type(object, NullProvider) +def _parents(path): + """ + yield all parents of path including path + """ + last = None + while path != last: + yield path + last = path + path, _ = os.path.split(path) + + class EggProvider(NullProvider): """Provider based on a virtual filesystem""" def __init__(self, module): - NullProvider.__init__(self, module) + super().__init__(module) self._setup_prefix() def _setup_prefix(self): - # we assume here that our metadata may be nested inside a "basket" - # of multiple eggs; that's why we use module_path instead of .archive - path = self.module_path - old = None - while path != old: - if _is_egg_path(path): - self.egg_name = os.path.basename(path) - self.egg_info = os.path.join(path, 'EGG-INFO') - self.egg_root = path - break - old = path - path, base = os.path.split(path) + # Assume that metadata may be nested inside a "basket" + # of multiple eggs and use module_path instead of .archive. + eggs = filter(_is_egg_path, _parents(self.module_path)) + egg = next(eggs, None) + egg and self._set_egg(egg) + + def _set_egg(self, path): + self.egg_name = os.path.basename(path) + self.egg_info = os.path.join(path, 'EGG-INFO') + self.egg_root = path class DefaultProvider(EggProvider): @@ -1701,7 +1720,7 @@ class ZipProvider(EggProvider): _zip_manifests = MemoizedZipManifests() def __init__(self, module): - EggProvider.__init__(self, module) + super().__init__(module) self.zip_pre = self.loader.archive + os.sep def _zipinfo_name(self, fspath): @@ -1752,7 +1771,8 @@ class ZipProvider(EggProvider): timestamp = time.mktime(date_time) return timestamp, size - def _extract_resource(self, manager, zip_path): + # FIXME: 'ZipProvider._extract_resource' is too complex (12) + def _extract_resource(self, manager, zip_path): # noqa: C901 if zip_path in self._index(): for name in self._index()[zip_path]: @@ -1900,8 +1920,7 @@ class FileMetadata(EmptyProvider): return metadata def _warn_on_replacement(self, metadata): - # Python 2.7 compat for: replacement_char = '�' - replacement_char = b'\xef\xbf\xbd'.decode('utf-8') + replacement_char = '�' if replacement_char in metadata: tmpl = "{self.path} could not be properly decoded in UTF-8" msg = tmpl.format(**locals()) @@ -1991,7 +2010,7 @@ def find_eggs_in_zip(importer, path_item, only=False): dists = find_eggs_in_zip(zipimport.zipimporter(subpath), subpath) for dist in dists: yield dist - elif subitem.lower().endswith('.dist-info'): + elif subitem.lower().endswith(('.dist-info', '.egg-info')): subpath = os.path.join(path_item, subitem) submeta = EggMetadata(zipimport.zipimporter(subpath)) submeta.egg_info = subpath @@ -2015,7 +2034,7 @@ def _by_version_descending(names): >>> names = 'bar', 'foo', 'Python-2.7.10.egg', 'Python-2.7.2.egg' >>> _by_version_descending(names) - ['Python-2.7.10.egg', 'Python-2.7.2.egg', 'foo', 'bar'] + ['Python-2.7.10.egg', 'Python-2.7.2.egg', 'bar', 'foo'] >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.egg' >>> _by_version_descending(names) ['Setuptools-1.2.3.egg', 'Setuptools-1.2.3b1.egg'] @@ -2023,13 +2042,22 @@ def _by_version_descending(names): >>> _by_version_descending(names) ['Setuptools-1.2.3.post1.egg', 'Setuptools-1.2.3b1.egg'] """ + def try_parse(name): + """ + Attempt to parse as a version or return a null version. + """ + try: + return packaging.version.Version(name) + except Exception: + return packaging.version.Version('0') + def _by_version(name): """ Parse each component of the filename """ name, ext = os.path.splitext(name) parts = itertools.chain(name.split('-'), [ext]) - return [packaging.version.parse(part) for part in parts] + return [try_parse(part) for part in parts] return sorted(names, key=_by_version, reverse=True) @@ -2046,7 +2074,10 @@ def find_on_path(importer, path_item, only=False): ) return - entries = safe_listdir(path_item) + entries = ( + os.path.join(path_item, child) + for child in safe_listdir(path_item) + ) # for performance, before sorting by version, # screen entries for only those that will yield @@ -2067,11 +2098,14 @@ def find_on_path(importer, path_item, only=False): def dist_factory(path_item, entry, only): - """ - Return a dist_factory for a path_item and entry - """ + """Return a dist_factory for the given entry.""" lower = entry.lower() - is_meta = any(map(lower.endswith, ('.egg-info', '.dist-info'))) + is_egg_info = lower.endswith('.egg-info') + is_dist_info = ( + lower.endswith('.dist-info') and + os.path.isdir(os.path.join(path_item, entry)) + ) + is_meta = is_egg_info or is_dist_info return ( distributions_from_metadata if is_meta else @@ -2093,8 +2127,6 @@ class NoDists: """ def __bool__(self): return False - if six.PY2: - __nonzero__ = __bool__ def __call__(self, fullpath): return iter(()) @@ -2111,12 +2143,7 @@ def safe_listdir(path): except OSError as e: # Ignore the directory if does not exist, not a directory or # permission denied - ignorable = ( - e.errno in (errno.ENOTDIR, errno.EACCES, errno.ENOENT) - # Python 2 on Windows needs to be handled this way :( - or getattr(e, "winerror", None) == 267 - ) - if not ignorable: + if e.errno not in (errno.ENOTDIR, errno.EACCES, errno.ENOENT): raise return () @@ -2195,10 +2222,16 @@ def _handle_ns(packageName, path_item): if importer is None: return None - # capture warnings due to #1111 - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - loader = importer.find_module(packageName) + # use find_spec (PEP 451) and fall-back to find_module (PEP 302) + try: + spec = importer.find_spec(packageName) + except AttributeError: + # capture warnings due to #1111 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + loader = importer.find_module(packageName) + else: + loader = spec.loader if spec else None if loader is None: return None @@ -2214,7 +2247,7 @@ def _handle_ns(packageName, path_item): if subpath is not None: path = module.__path__ path.append(subpath) - loader.load_module(packageName) + importlib.import_module(packageName) _rebuild_mod_path(path, packageName, module) return subpath @@ -2270,8 +2303,8 @@ def declare_namespace(packageName): __import__(parent) try: path = sys.modules[parent].__path__ - except AttributeError: - raise TypeError("Not a package:", parent) + except AttributeError as e: + raise TypeError("Not a package:", parent) from e # Track what packages are namespaces, so when new path items are added, # they can be updated @@ -2328,7 +2361,8 @@ register_namespace_handler(object, null_ns_handler) def normalize_path(filename): """Normalize a file/dir name for comparison purposes""" - return os.path.normcase(os.path.realpath(os.path.normpath(_cygwin_patch(filename)))) + return os.path.normcase(os.path.realpath(os.path.normpath( + _cygwin_patch(filename)))) def _cygwin_patch(filename): # pragma: nocover @@ -2354,7 +2388,15 @@ def _is_egg_path(path): """ Determine if given path appears to be an egg. """ - return path.lower().endswith('.egg') + return _is_zip_egg(path) or _is_unpacked_egg(path) + + +def _is_zip_egg(path): + return ( + path.lower().endswith('.egg') and + os.path.isfile(path) and + zipfile.is_zipfile(path) + ) def _is_unpacked_egg(path): @@ -2362,7 +2404,7 @@ def _is_unpacked_egg(path): Determine if given path appears to be an unpacked egg. """ return ( - _is_egg_path(path) and + path.lower().endswith('.egg') and os.path.isfile(os.path.join(path, 'EGG-INFO', 'PKG-INFO')) ) @@ -2375,20 +2417,6 @@ def _set_parent_ns(packageName): setattr(sys.modules[parent], name, sys.modules[packageName]) -def yield_lines(strs): - """Yield non-empty/non-comment lines of a string or sequence""" - if isinstance(strs, six.string_types): - for s in strs.splitlines(): - s = s.strip() - # skip blank lines/comments - if s and not s.startswith('#'): - yield s - else: - for ss in strs: - for s in yield_lines(ss): - yield s - - MODULE = re.compile(r"\w+(\.\w+)*$").match EGG_NAME = re.compile( r""" @@ -2450,7 +2478,7 @@ class EntryPoint: try: return functools.reduce(getattr, self.attrs, module) except AttributeError as exc: - raise ImportError(str(exc)) + raise ImportError(str(exc)) from exc def require(self, env=None, installer=None): if self.extras and not self.dist: @@ -2536,15 +2564,6 @@ class EntryPoint: return maps -def _remove_md5_fragment(location): - if not location: - return '' - parsed = urllib.parse.urlparse(location) - if parsed[-1].startswith('md5='): - return urllib.parse.urlunparse(parsed[:-1] + ('',)) - return location - - def _version_from_file(lines): """ Given an iterable of lines from a Metadata file, return @@ -2601,7 +2620,7 @@ class Distribution: self.parsed_version, self.precedence, self.key, - _remove_md5_fragment(self.location), + self.location, self.py_version or '', self.platform or '', ) @@ -2679,14 +2698,14 @@ class Distribution: def version(self): try: return self._version - except AttributeError: + except AttributeError as e: version = self._get_version() if version is None: path = self._get_metadata_path_for_display(self.PKG_INFO) msg = ( "Missing 'Version:' header and/or {} file at path: {}" ).format(self.PKG_INFO, path) - raise ValueError(msg, self) + raise ValueError(msg, self) from e return version @@ -2739,10 +2758,10 @@ class Distribution: for ext in extras: try: deps.extend(dm[safe_extra(ext)]) - except KeyError: + except KeyError as e: raise UnknownExtra( "%s has no such extra feature %r" % (self, ext) - ) + ) from e return deps def _get_metadata_path_for_display(self, name): @@ -2824,10 +2843,6 @@ class Distribution: ) ) - if not hasattr(object, '__dir__'): - # python 2.7 not supported - del __dir__ - @classmethod def from_filename(cls, filename, metadata=None, **kw): return cls.from_location( @@ -2867,7 +2882,8 @@ class Distribution: """Return the EntryPoint object for `group`+`name`, or ``None``""" return self.get_entry_map(group).get(name) - def insert_on(self, path, loc=None, replace=False): + # FIXME: 'Distribution.insert_on' is too complex (13) + def insert_on(self, path, loc=None, replace=False): # noqa: C901 """Ensure self.location is on path If replace=False (default): @@ -3037,12 +3053,12 @@ class DistInfoDistribution(Distribution): if not req.marker or req.marker.evaluate({'extra': extra}): yield req - common = frozenset(reqs_for_extra(None)) + common = types.MappingProxyType(dict.fromkeys(reqs_for_extra(None))) dm[None].extend(common) for extra in self._parsed_pkg_info.get_all('Provides-Extra') or []: s_extra = safe_extra(extra.strip()) - dm[s_extra] = list(frozenset(reqs_for_extra(extra)) - common) + dm[s_extra] = [r for r in reqs_for_extra(extra) if r not in common] return dm @@ -3067,40 +3083,23 @@ def issue_warning(*args, **kw): warnings.warn(stacklevel=level + 1, *args, **kw) -class RequirementParseError(ValueError): - def __str__(self): - return ' '.join(self.args) - - def parse_requirements(strs): - """Yield ``Requirement`` objects for each specification in `strs` + """ + Yield ``Requirement`` objects for each specification in `strs`. `strs` must be a string, or a (possibly-nested) iterable thereof. """ - # create a steppable iterator, so we can handle \-continuations - lines = iter(yield_lines(strs)) + return map(Requirement, join_continuation(map(drop_comment, yield_lines(strs)))) - for line in lines: - # Drop comments -- a hash without a space may be in a URL. - if ' #' in line: - line = line[:line.find(' #')] - # If there is a line continuation, drop it, and append the next line. - if line.endswith('\\'): - line = line[:-2].strip() - try: - line += next(lines) - except StopIteration: - return - yield Requirement(line) + +class RequirementParseError(packaging.requirements.InvalidRequirement): + "Compatibility wrapper for InvalidRequirement" class Requirement(packaging.requirements.Requirement): def __init__(self, requirement_string): """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!""" - try: - super(Requirement, self).__init__(requirement_string) - except packaging.requirements.InvalidRequirement as e: - raise RequirementParseError(str(e)) + super(Requirement, self).__init__(requirement_string) self.unsafe_name = self.name project_name = safe_name(self.name) self.project_name, self.key = project_name, project_name.lower() @@ -3170,7 +3169,7 @@ def _find_adapter(registry, ob): def ensure_directory(path): """Ensure that the parent directory of `path` exists""" dirname = os.path.dirname(path) - py31compat.makedirs(dirname, exist_ok=True) + os.makedirs(dirname, exist_ok=True) def _bypass_ensure_directory(path): @@ -3248,6 +3247,15 @@ def _initialize(g=globals()): ) +class PkgResourcesDeprecationWarning(Warning): + """ + Base class for warning about deprecations in ``pkg_resources`` + + This class is not derived from ``DeprecationWarning``, and as such is + visible by default. + """ + + @_call_aside def _initialize_master_working_set(): """ @@ -3286,11 +3294,3 @@ def _initialize_master_working_set(): # match order list(map(working_set.add_entry, sys.path)) globals().update(locals()) - -class PkgResourcesDeprecationWarning(Warning): - """ - Base class for warning about deprecations in ``pkg_resources`` - - This class is not derived from ``DeprecationWarning``, and as such is - visible by default. - """ diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/__init__.py b/libs/common/pkg_resources/_vendor/importlib_resources/__init__.py new file mode 100644 index 00000000..34e3a995 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/__init__.py @@ -0,0 +1,36 @@ +"""Read resources contained within a package.""" + +from ._common import ( + as_file, + files, + Package, +) + +from ._legacy import ( + contents, + open_binary, + read_binary, + open_text, + read_text, + is_resource, + path, + Resource, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Resource', + 'ResourceReader', + 'as_file', + 'contents', + 'files', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/_adapters.py b/libs/common/pkg_resources/_vendor/importlib_resources/_adapters.py new file mode 100644 index 00000000..ea363d86 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/_adapters.py @@ -0,0 +1,170 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError( + "Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode) + ) + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/_common.py b/libs/common/pkg_resources/_vendor/importlib_resources/_common.py new file mode 100644 index 00000000..a12e2c75 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/_common.py @@ -0,0 +1,104 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib + +from typing import Union, Optional +from .abc import ResourceReader, Traversable + +from ._compat import wrap_spec + +Package = Union[types.ModuleType, str] + + +def files(package): + # type: (Package) -> Traversable + """ + Get a Traversable resource from a package + """ + return from_package(get_package(package)) + + +def get_resource_reader(package): + # type: (types.ModuleType) -> Optional[ResourceReader] + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore + if reader is None: + return None + return reader(spec.name) # type: ignore + + +def resolve(cand): + # type: (Package) -> types.ModuleType + return cand if isinstance(cand, types.ModuleType) else importlib.import_module(cand) + + +def get_package(package): + # type: (Package) -> types.ModuleType + """Take a package name or module object and return the module. + + Raise an exception if the resolved module is not a package. + """ + resolved = resolve(package) + if wrap_spec(resolved).submodule_search_locations is None: + raise TypeError(f'{package!r} is not a package') + return resolved + + +def from_package(package): + """ + Return a Traversable object for the given package. + + """ + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile(reader, suffix=''): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + os.remove(raw_path) + except FileNotFoundError: + pass + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _tempfile(path.read_bytes, suffix=path.name) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/_compat.py b/libs/common/pkg_resources/_vendor/importlib_resources/_compat.py new file mode 100644 index 00000000..cb9fc820 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/_compat.py @@ -0,0 +1,98 @@ +# flake8: noqa + +import abc +import sys +import pathlib +from contextlib import suppress + +if sys.version_info >= (3, 10): + from zipfile import Path as ZipPath # type: ignore +else: + from ..zipp import Path as ZipPath # type: ignore + + +try: + from typing import runtime_checkable # type: ignore +except ImportError: + + def runtime_checkable(cls): # type: ignore + return cls + + +try: + from typing import Protocol # type: ignore +except ImportError: + Protocol = abc.ABC # type: ignore + + +class TraversableResourcesLoader: + """ + Adapt loaders to provide TraversableResources and other + compatibility. + + Used primarily for Python 3.9 and earlier where the native + loaders do not yet implement TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + @property + def path(self): + return self.spec.origin + + def get_resource_reader(self, name): + from . import readers, _adapters + + def _zip_reader(spec): + with suppress(AttributeError): + return readers.ZipReader(spec.loader, spec.name) + + def _namespace_reader(spec): + with suppress(AttributeError, ValueError): + return readers.NamespaceReader(spec.submodule_search_locations) + + def _available_reader(spec): + with suppress(AttributeError): + return spec.loader.get_resource_reader(spec.name) + + def _native_reader(spec): + reader = _available_reader(spec) + return reader if hasattr(reader, 'files') else None + + def _file_reader(spec): + try: + path = pathlib.Path(self.path) + except TypeError: + return None + if path.exists(): + return readers.FileReader(self) + + return ( + # native reader if it supplies 'files' + _native_reader(self.spec) + or + # local ZipReader if a zip module + _zip_reader(self.spec) + or + # local NamespaceReader if a namespace module + _namespace_reader(self.spec) + or + # local FileReader + _file_reader(self.spec) + # fallback - adapt the spec ResourceReader to TraversableReader + or _adapters.CompatibilityFiles(self.spec) + ) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + + Supersedes _adapters.wrap_spec to use TraversableResourcesLoader + from above for older Python compatibility (<3.10). + """ + from . import _adapters + + return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/_itertools.py b/libs/common/pkg_resources/_vendor/importlib_resources/_itertools.py new file mode 100644 index 00000000..cce05582 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/_itertools.py @@ -0,0 +1,35 @@ +from itertools import filterfalse + +from typing import ( + Callable, + Iterable, + Iterator, + Optional, + Set, + TypeVar, + Union, +) + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + + +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None +) -> Iterator[_T]: + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen: Set[Union[_T, _U]] = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/_legacy.py b/libs/common/pkg_resources/_vendor/importlib_resources/_legacy.py new file mode 100644 index 00000000..1d5d3f1f --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/_legacy.py @@ -0,0 +1,121 @@ +import functools +import os +import pathlib +import types +import warnings + +from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any + +from . import _common + +Package = Union[types.ModuleType, str] +Resource = str + + +def deprecated(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + warnings.warn( + f"{func.__name__} is deprecated. Use files() instead. " + "Refer to https://importlib-resources.readthedocs.io" + "/en/latest/using.html#migrating-from-legacy for migration advice.", + DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + +def normalize_path(path): + # type: (Any) -> str + """Normalize a path by ensuring it is a string. + + If the resulting string contains path separators, an exception is raised. + """ + str_path = str(path) + parent, file_name = os.path.split(str_path) + if parent: + raise ValueError(f'{path!r} must be only a file name') + return file_name + + +@deprecated +def open_binary(package: Package, resource: Resource) -> BinaryIO: + """Return a file-like object opened for binary reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open('rb') + + +@deprecated +def read_binary(package: Package, resource: Resource) -> bytes: + """Return the binary contents of the resource.""" + return (_common.files(package) / normalize_path(resource)).read_bytes() + + +@deprecated +def open_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> TextIO: + """Return a file-like object opened for text reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open( + 'r', encoding=encoding, errors=errors + ) + + +@deprecated +def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> str: + """Return the decoded string of the resource. + + The decoding-related arguments have the same semantics as those of + bytes.decode(). + """ + with open_text(package, resource, encoding, errors) as fp: + return fp.read() + + +@deprecated +def contents(package: Package) -> Iterable[str]: + """Return an iterable of entries in `package`. + + Note that not all entries are resources. Specifically, directories are + not considered resources. Use `is_resource()` on each entry returned here + to check if it is a resource or not. + """ + return [path.name for path in _common.files(package).iterdir()] + + +@deprecated +def is_resource(package: Package, name: str) -> bool: + """True if `name` is a resource inside `package`. + + Directories are *not* resources. + """ + resource = normalize_path(name) + return any( + traversable.name == resource and traversable.is_file() + for traversable in _common.files(package).iterdir() + ) + + +@deprecated +def path( + package: Package, + resource: Resource, +) -> ContextManager[pathlib.Path]: + """A context manager providing a file path object to the resource. + + If the resource does not already exist on its own on the file system, + a temporary file will be created. If the file was created, the file + will be deleted upon exiting the context manager (no exception is + raised if the file was deleted prior to the context manager + exiting). + """ + return _common.as_file(_common.files(package) / normalize_path(resource)) diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/abc.py b/libs/common/pkg_resources/_vendor/importlib_resources/abc.py new file mode 100644 index 00000000..d39dc1ad --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/abc.py @@ -0,0 +1,137 @@ +import abc +from typing import BinaryIO, Iterable, Text + +from ._compat import runtime_checkable, Protocol + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + """ + + @abc.abstractmethod + def iterdir(self): + """ + Yield Traversable objects in self + """ + + def read_bytes(self): + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding=None): + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + @abc.abstractmethod + def joinpath(self, child): + """ + Return Traversable child in self + """ + + def __truediv__(self, child): + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @abc.abstractproperty + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self): + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource): + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource): + raise FileNotFoundError(resource) + + def is_resource(self, path): + return self.files().joinpath(path).is_file() + + def contents(self): + return (item.name for item in self.files().iterdir()) diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/readers.py b/libs/common/pkg_resources/_vendor/importlib_resources/readers.py new file mode 100644 index 00000000..f1190ca4 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/readers.py @@ -0,0 +1,122 @@ +import collections +import pathlib +import operator + +from . import abc + +from ._itertools import unique_everseen +from ._compat import ZipPath + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + _, _, name = module.rpartition('.') + self.prefix = loader.prefix.replace('\\', '/') + name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + # workaround for `zipfile.Path.is_file` returning true + # for non-existent paths. + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return ZipPath(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + files = (file for path in self._paths for file in path.iterdir()) + return unique_everseen(files, key=operator.attrgetter('name')) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, child): + # first try to find child in current paths + for file in self.iterdir(): + if file.name == child: + return file + # if it does not exist, construct it with the first path + return self._paths[0] / child + + __truediv__ = joinpath + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*list(namespace_path)) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path diff --git a/libs/common/pkg_resources/_vendor/importlib_resources/simple.py b/libs/common/pkg_resources/_vendor/importlib_resources/simple.py new file mode 100644 index 00000000..da073cbd --- /dev/null +++ b/libs/common/pkg_resources/_vendor/importlib_resources/simple.py @@ -0,0 +1,116 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @abc.abstractproperty + def package(self): + # type: () -> str + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self): + # type: () -> List['SimpleReader'] + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self): + # type: () -> List[str] + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource): + # type: (str) -> BinaryIO + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent, name): + # type: (ResourceContainer, str) -> None + self.parent = parent + self.name = name # type: ignore + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(*args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader): + # type: (SimpleReader) -> None + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + def joinpath(self, name): + return next( + traversable for traversable in self.iterdir() if traversable.name == name + ) + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/libs/common/pkg_resources/_vendor/jaraco/__init__.py b/libs/common/pkg_resources/_vendor/jaraco/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/pkg_resources/_vendor/jaraco/context.py b/libs/common/pkg_resources/_vendor/jaraco/context.py new file mode 100644 index 00000000..87a4e3dc --- /dev/null +++ b/libs/common/pkg_resources/_vendor/jaraco/context.py @@ -0,0 +1,213 @@ +import os +import subprocess +import contextlib +import functools +import tempfile +import shutil +import operator + + +@contextlib.contextmanager +def pushd(dir): + orig = os.getcwd() + os.chdir(dir) + try: + yield dir + finally: + os.chdir(orig) + + +@contextlib.contextmanager +def tarball_context(url, target_dir=None, runner=None, pushd=pushd): + """ + Get a tarball, extract it, change to that directory, yield, then + clean up. + `runner` is the function to invoke commands. + `pushd` is a context manager for changing the directory. + """ + if target_dir is None: + target_dir = os.path.basename(url).replace('.tar.gz', '').replace('.tgz', '') + if runner is None: + runner = functools.partial(subprocess.check_call, shell=True) + # In the tar command, use --strip-components=1 to strip the first path and + # then + # use -C to cause the files to be extracted to {target_dir}. This ensures + # that we always know where the files were extracted. + runner('mkdir {target_dir}'.format(**vars())) + try: + getter = 'wget {url} -O -' + extract = 'tar x{compression} --strip-components=1 -C {target_dir}' + cmd = ' | '.join((getter, extract)) + runner(cmd.format(compression=infer_compression(url), **vars())) + with pushd(target_dir): + yield target_dir + finally: + runner('rm -Rf {target_dir}'.format(**vars())) + + +def infer_compression(url): + """ + Given a URL or filename, infer the compression code for tar. + """ + # cheat and just assume it's the last two characters + compression_indicator = url[-2:] + mapping = dict(gz='z', bz='j', xz='J') + # Assume 'z' (gzip) if no match + return mapping.get(compression_indicator, 'z') + + +@contextlib.contextmanager +def temp_dir(remover=shutil.rmtree): + """ + Create a temporary directory context. Pass a custom remover + to override the removal behavior. + """ + temp_dir = tempfile.mkdtemp() + try: + yield temp_dir + finally: + remover(temp_dir) + + +@contextlib.contextmanager +def repo_context(url, branch=None, quiet=True, dest_ctx=temp_dir): + """ + Check out the repo indicated by url. + + If dest_ctx is supplied, it should be a context manager + to yield the target directory for the check out. + """ + exe = 'git' if 'git' in url else 'hg' + with dest_ctx() as repo_dir: + cmd = [exe, 'clone', url, repo_dir] + if branch: + cmd.extend(['--branch', branch]) + devnull = open(os.path.devnull, 'w') + stdout = devnull if quiet else None + subprocess.check_call(cmd, stdout=stdout) + yield repo_dir + + +@contextlib.contextmanager +def null(): + yield + + +class ExceptionTrap: + """ + A context manager that will catch certain exceptions and provide an + indication they occurred. + + >>> with ExceptionTrap() as trap: + ... raise Exception() + >>> bool(trap) + True + + >>> with ExceptionTrap() as trap: + ... pass + >>> bool(trap) + False + + >>> with ExceptionTrap(ValueError) as trap: + ... raise ValueError("1 + 1 is not 3") + >>> bool(trap) + True + + >>> with ExceptionTrap(ValueError) as trap: + ... raise Exception() + Traceback (most recent call last): + ... + Exception + + >>> bool(trap) + False + """ + + exc_info = None, None, None + + def __init__(self, exceptions=(Exception,)): + self.exceptions = exceptions + + def __enter__(self): + return self + + @property + def type(self): + return self.exc_info[0] + + @property + def value(self): + return self.exc_info[1] + + @property + def tb(self): + return self.exc_info[2] + + def __exit__(self, *exc_info): + type = exc_info[0] + matches = type and issubclass(type, self.exceptions) + if matches: + self.exc_info = exc_info + return matches + + def __bool__(self): + return bool(self.type) + + def raises(self, func, *, _test=bool): + """ + Wrap func and replace the result with the truth + value of the trap (True if an exception occurred). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> raises = ExceptionTrap(ValueError).raises + + Now decorate a function that always fails. + + >>> @raises + ... def fail(): + ... raise ValueError('failed') + >>> fail() + True + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with ExceptionTrap(self.exceptions) as trap: + func(*args, **kwargs) + return _test(trap) + + return wrapper + + def passes(self, func): + """ + Wrap func and replace the result with the truth + value of the trap (True if no exception). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> passes = ExceptionTrap(ValueError).passes + + Now decorate a function that always fails. + + >>> @passes + ... def fail(): + ... raise ValueError('failed') + + >>> fail() + False + """ + return self.raises(func, _test=operator.not_) + + +class suppress(contextlib.suppress, contextlib.ContextDecorator): + """ + A version of contextlib.suppress with decorator support. + + >>> @suppress(KeyError) + ... def key_error(): + ... {}[''] + >>> key_error() + """ diff --git a/libs/common/pkg_resources/_vendor/jaraco/functools.py b/libs/common/pkg_resources/_vendor/jaraco/functools.py new file mode 100644 index 00000000..a3fea3a1 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/jaraco/functools.py @@ -0,0 +1,525 @@ +import functools +import time +import inspect +import collections +import types +import itertools + +import pkg_resources.extern.more_itertools + +from typing import Callable, TypeVar + + +CallableT = TypeVar("CallableT", bound=Callable[..., object]) + + +def compose(*funcs): + """ + Compose any number of unary functions into a single unary function. + + >>> import textwrap + >>> expected = str.strip(textwrap.dedent(compose.__doc__)) + >>> strip_and_dedent = compose(str.strip, textwrap.dedent) + >>> strip_and_dedent(compose.__doc__) == expected + True + + Compose also allows the innermost function to take arbitrary arguments. + + >>> round_three = lambda x: round(x, ndigits=3) + >>> f = compose(round_three, int.__truediv__) + >>> [f(3*x, x+1) for x in range(1,10)] + [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7] + """ + + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) + + +def method_caller(method_name, *args, **kwargs): + """ + Return a function that will call a named method on the + target object with optional positional and keyword + arguments. + + >>> lower = method_caller('lower') + >>> lower('MyString') + 'mystring' + """ + + def call_method(target): + func = getattr(target, method_name) + return func(*args, **kwargs) + + return call_method + + +def once(func): + """ + Decorate func so it's only ever called the first time. + + This decorator can ensure that an expensive or non-idempotent function + will not be expensive on subsequent calls and is idempotent. + + >>> add_three = once(lambda a: a+3) + >>> add_three(3) + 6 + >>> add_three(9) + 6 + >>> add_three('12') + 6 + + To reset the stored value, simply clear the property ``saved_result``. + + >>> del add_three.saved_result + >>> add_three(9) + 12 + >>> add_three(8) + 12 + + Or invoke 'reset()' on it. + + >>> add_three.reset() + >>> add_three(-3) + 0 + >>> add_three(0) + 0 + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not hasattr(wrapper, 'saved_result'): + wrapper.saved_result = func(*args, **kwargs) + return wrapper.saved_result + + wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result') + return wrapper + + +def method_cache( + method: CallableT, + cache_wrapper: Callable[ + [CallableT], CallableT + ] = functools.lru_cache(), # type: ignore[assignment] +) -> CallableT: + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + + def wrapper(self: object, *args: object, **kwargs: object) -> object: + # it's the first call, replace the method with a cached, bound method + bound_method: CallableT = types.MethodType( # type: ignore[assignment] + method, self + ) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None # type: ignore[attr-defined] + + return ( # type: ignore[return-value] + _special_method_cache(method, cache_wrapper) or wrapper + ) + + +def _special_method_cache(method, cache_wrapper): + """ + Because Python treats special methods differently, it's not + possible to use instance attributes to implement the cached + methods. + + Instead, install the wrapper method under a different name + and return a simple proxy to that wrapper. + + https://github.com/jaraco/jaraco.functools/issues/5 + """ + name = method.__name__ + special_names = '__getattr__', '__getitem__' + if name not in special_names: + return + + wrapper_name = '__cached' + name + + def proxy(self, *args, **kwargs): + if wrapper_name not in vars(self): + bound = types.MethodType(method, self) + cache = cache_wrapper(bound) + setattr(self, wrapper_name, cache) + else: + cache = getattr(self, wrapper_name) + return cache(*args, **kwargs) + + return proxy + + +def apply(transform): + """ + Decorate a function with a transform function that is + invoked on results returned from the decorated function. + + >>> @apply(reversed) + ... def get_numbers(start): + ... "doc for get_numbers" + ... return range(start, start+3) + >>> list(get_numbers(4)) + [6, 5, 4] + >>> get_numbers.__doc__ + 'doc for get_numbers' + """ + + def wrap(func): + return functools.wraps(func)(compose(transform, func)) + + return wrap + + +def result_invoke(action): + r""" + Decorate a function with an action function that is + invoked on the results returned from the decorated + function (for its side-effect), then return the original + result. + + >>> @result_invoke(print) + ... def add_two(a, b): + ... return a + b + >>> x = add_two(2, 3) + 5 + >>> x + 5 + """ + + def wrap(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + action(result) + return result + + return wrapper + + return wrap + + +def call_aside(f, *args, **kwargs): + """ + Call a function for its side effect after initialization. + + >>> @call_aside + ... def func(): print("called") + called + >>> func() + called + + Use functools.partial to pass parameters to the initial call + + >>> @functools.partial(call_aside, name='bingo') + ... def func(name): print("called with", name) + called with bingo + """ + f(*args, **kwargs) + return f + + +class Throttler: + """ + Rate-limit a function (or other callable) + """ + + def __init__(self, func, max_rate=float('Inf')): + if isinstance(func, Throttler): + func = func.func + self.func = func + self.max_rate = max_rate + self.reset() + + def reset(self): + self.last_called = 0 + + def __call__(self, *args, **kwargs): + self._wait() + return self.func(*args, **kwargs) + + def _wait(self): + "ensure at least 1/max_rate seconds from last call" + elapsed = time.time() - self.last_called + must_wait = 1 / self.max_rate - elapsed + time.sleep(max(0, must_wait)) + self.last_called = time.time() + + def __get__(self, obj, type=None): + return first_invoke(self._wait, functools.partial(self.func, obj)) + + +def first_invoke(func1, func2): + """ + Return a function that when invoked will invoke func1 without + any parameters (for its side-effect) and then invoke func2 + with whatever parameters were passed, returning its result. + """ + + def wrapper(*args, **kwargs): + func1() + return func2(*args, **kwargs) + + return wrapper + + +def retry_call(func, cleanup=lambda: None, retries=0, trap=()): + """ + Given a callable func, trap the indicated exceptions + for up to 'retries' times, invoking cleanup on the + exception. On the final attempt, allow any exceptions + to propagate. + """ + attempts = itertools.count() if retries == float('inf') else range(retries) + for attempt in attempts: + try: + return func() + except trap: + cleanup() + + return func() + + +def retry(*r_args, **r_kwargs): + """ + Decorator wrapper for retry_call. Accepts arguments to retry_call + except func and then returns a decorator for the decorated function. + + Ex: + + >>> @retry(retries=3) + ... def my_func(a, b): + ... "this is my funk" + ... print(a, b) + >>> my_func.__doc__ + 'this is my funk' + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*f_args, **f_kwargs): + bound = functools.partial(func, *f_args, **f_kwargs) + return retry_call(bound, *r_args, **r_kwargs) + + return wrapper + + return decorate + + +def print_yielded(func): + """ + Convert a generator into a function that prints all yielded elements + + >>> @print_yielded + ... def x(): + ... yield 3; yield None + >>> x() + 3 + None + """ + print_all = functools.partial(map, print) + print_results = compose(more_itertools.consume, print_all, func) + return functools.wraps(func)(print_results) + + +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper + + +def assign_params(func, namespace): + """ + Assign parameters from namespace where func solicits. + + >>> def func(x, y=3): + ... print(x, y) + >>> assigned = assign_params(func, dict(x=2, z=4)) + >>> assigned() + 2 3 + + The usual errors are raised if a function doesn't receive + its required parameters: + + >>> assigned = assign_params(func, dict(y=3, z=4)) + >>> assigned() + Traceback (most recent call last): + TypeError: func() ...argument... + + It even works on methods: + + >>> class Handler: + ... def meth(self, arg): + ... print(arg) + >>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))() + crystal + """ + sig = inspect.signature(func) + params = sig.parameters.keys() + call_ns = {k: namespace[k] for k in params if k in namespace} + return functools.partial(func, **call_ns) + + +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + + >>> class MyClass: + ... @save_method_args + ... def method(self, a, b): + ... print(a, b) + >>> my_ob = MyClass() + >>> my_ob.method(1, 2) + 1 2 + >>> my_ob._saved_method.args + (1, 2) + >>> my_ob._saved_method.kwargs + {} + >>> my_ob.method(a=3, b='foo') + 3 foo + >>> my_ob._saved_method.args + () + >>> my_ob._saved_method.kwargs == dict(a=3, b='foo') + True + + The arguments are stored on the instance, allowing for + different instance to save different args. + + >>> your_ob = MyClass() + >>> your_ob.method({str('x'): 3}, b=[4]) + {'x': 3} [4] + >>> your_ob._saved_method.args + ({'x': 3},) + >>> my_ob._saved_method.args + () + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') + + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper + + +def except_(*exceptions, replace=None, use=None): + """ + Replace the indicated exceptions, if raised, with the indicated + literal replacement or evaluated expression (if present). + + >>> safe_int = except_(ValueError)(int) + >>> safe_int('five') + >>> safe_int('5') + 5 + + Specify a literal replacement with ``replace``. + + >>> safe_int_r = except_(ValueError, replace=0)(int) + >>> safe_int_r('five') + 0 + + Provide an expression to ``use`` to pass through particular parameters. + + >>> safe_int_pt = except_(ValueError, use='args[0]')(int) + >>> safe_int_pt('five') + 'five' + + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exceptions: + try: + return eval(use) + except TypeError: + return replace + + return wrapper + + return decorate diff --git a/libs/common/pkg_resources/_vendor/jaraco/text/__init__.py b/libs/common/pkg_resources/_vendor/jaraco/text/__init__.py new file mode 100644 index 00000000..c466378c --- /dev/null +++ b/libs/common/pkg_resources/_vendor/jaraco/text/__init__.py @@ -0,0 +1,599 @@ +import re +import itertools +import textwrap +import functools + +try: + from importlib.resources import files # type: ignore +except ImportError: # pragma: nocover + from pkg_resources.extern.importlib_resources import files # type: ignore + +from pkg_resources.extern.jaraco.functools import compose, method_cache +from pkg_resources.extern.jaraco.context import ExceptionTrap + + +def substitution(old, new): + """ + Return a function that will perform a substitution on a string + """ + return lambda s: s.replace(old, new) + + +def multi_substitution(*substitutions): + """ + Take a sequence of pairs specifying substitutions, and create + a function that performs those substitutions. + + >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo') + 'baz' + """ + substitutions = itertools.starmap(substitution, substitutions) + # compose function applies last function first, so reverse the + # substitutions to get the expected order. + substitutions = reversed(tuple(substitutions)) + return compose(*substitutions) + + +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use ``in_``: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) + + +# Python 3.8 compatibility +_unicode_trap = ExceptionTrap(UnicodeDecodeError) + + +@_unicode_trap.passes +def is_decodable(value): + r""" + Return True if the supplied value is decodable (using the default + encoding). + + >>> is_decodable(b'\xff') + False + >>> is_decodable(b'\x32') + True + """ + value.decode() + + +def is_binary(value): + r""" + Return True if the value appears to be binary (that is, it's a byte + string and isn't decodable). + + >>> is_binary(b'\xff') + True + >>> is_binary('\xff') + False + """ + return isinstance(value, bytes) and not is_decodable(value) + + +def trim(s): + r""" + Trim something like a docstring to remove the whitespace that + is common due to indentation and formatting. + + >>> trim("\n\tfoo = bar\n\t\tbar = baz\n") + 'foo = bar\n\tbar = baz' + """ + return textwrap.dedent(s).strip() + + +def wrap(s): + """ + Wrap lines of text, retaining existing newlines as + paragraph markers. + + >>> print(wrap(lorem_ipsum)) + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris nisi ut + aliquip ex ea commodo consequat. Duis aute irure dolor in + reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in + culpa qui officia deserunt mollit anim id est laborum. + + Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam + varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus + magna felis sollicitudin mauris. Integer in mauris eu nibh euismod + gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis + risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, + eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas + fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla + a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, + neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing + sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque + nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus + quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, + molestie eu, feugiat in, orci. In hac habitasse platea dictumst. + """ + paragraphs = s.splitlines() + wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs) + return '\n\n'.join(wrapped) + + +def unwrap(s): + r""" + Given a multi-line string, return an unwrapped version. + + >>> wrapped = wrap(lorem_ipsum) + >>> wrapped.count('\n') + 20 + >>> unwrapped = unwrap(wrapped) + >>> unwrapped.count('\n') + 1 + >>> print(unwrapped) + Lorem ipsum dolor sit amet, consectetur adipiscing ... + Curabitur pretium tincidunt lacus. Nulla gravida orci ... + + """ + paragraphs = re.split(r'\n\n+', s) + cleaned = (para.replace('\n', ' ') for para in paragraphs) + return '\n'.join(cleaned) + + + + +class Splitter(object): + """object that will split a string with the given arguments for each call + + >>> s = Splitter(',') + >>> s('hello, world, this is your, master calling') + ['hello', ' world', ' this is your', ' master calling'] + """ + + def __init__(self, *args): + self.args = args + + def __call__(self, s): + return s.split(*self.args) + + +def indent(string, prefix=' ' * 4): + """ + >>> indent('foo') + ' foo' + """ + return prefix + string + + +class WordSet(tuple): + """ + Given an identifier, return the words that identifier represents, + whether in camel case, underscore-separated, etc. + + >>> WordSet.parse("camelCase") + ('camel', 'Case') + + >>> WordSet.parse("under_sep") + ('under', 'sep') + + Acronyms should be retained + + >>> WordSet.parse("firstSNL") + ('first', 'SNL') + + >>> WordSet.parse("you_and_I") + ('you', 'and', 'I') + + >>> WordSet.parse("A simple test") + ('A', 'simple', 'test') + + Multiple caps should not interfere with the first cap of another word. + + >>> WordSet.parse("myABCClass") + ('my', 'ABC', 'Class') + + The result is a WordSet, so you can get the form you need. + + >>> WordSet.parse("myABCClass").underscore_separated() + 'my_ABC_Class' + + >>> WordSet.parse('a-command').camel_case() + 'ACommand' + + >>> WordSet.parse('someIdentifier').lowered().space_separated() + 'some identifier' + + Slices of the result should return another WordSet. + + >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated() + 'out_of_context' + + >>> WordSet.from_class_name(WordSet()).lowered().space_separated() + 'word set' + + >>> example = WordSet.parse('figured it out') + >>> example.headless_camel_case() + 'figuredItOut' + >>> example.dash_separated() + 'figured-it-out' + + """ + + _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))') + + def capitalized(self): + return WordSet(word.capitalize() for word in self) + + def lowered(self): + return WordSet(word.lower() for word in self) + + def camel_case(self): + return ''.join(self.capitalized()) + + def headless_camel_case(self): + words = iter(self) + first = next(words).lower() + new_words = itertools.chain((first,), WordSet(words).camel_case()) + return ''.join(new_words) + + def underscore_separated(self): + return '_'.join(self) + + def dash_separated(self): + return '-'.join(self) + + def space_separated(self): + return ' '.join(self) + + def trim_right(self, item): + """ + Remove the item from the end of the set. + + >>> WordSet.parse('foo bar').trim_right('foo') + ('foo', 'bar') + >>> WordSet.parse('foo bar').trim_right('bar') + ('foo',) + >>> WordSet.parse('').trim_right('bar') + () + """ + return self[:-1] if self and self[-1] == item else self + + def trim_left(self, item): + """ + Remove the item from the beginning of the set. + + >>> WordSet.parse('foo bar').trim_left('foo') + ('bar',) + >>> WordSet.parse('foo bar').trim_left('bar') + ('foo', 'bar') + >>> WordSet.parse('').trim_left('bar') + () + """ + return self[1:] if self and self[0] == item else self + + def trim(self, item): + """ + >>> WordSet.parse('foo bar').trim('foo') + ('bar',) + """ + return self.trim_left(item).trim_right(item) + + def __getitem__(self, item): + result = super(WordSet, self).__getitem__(item) + if isinstance(item, slice): + result = WordSet(result) + return result + + @classmethod + def parse(cls, identifier): + matches = cls._pattern.finditer(identifier) + return WordSet(match.group(0) for match in matches) + + @classmethod + def from_class_name(cls, subject): + return cls.parse(subject.__class__.__name__) + + +# for backward compatibility +words = WordSet.parse + + +def simple_html_strip(s): + r""" + Remove HTML from the string `s`. + + >>> str(simple_html_strip('')) + '' + + >>> print(simple_html_strip('A stormy day in paradise')) + A stormy day in paradise + + >>> print(simple_html_strip('Somebody tell the truth.')) + Somebody tell the truth. + + >>> print(simple_html_strip('What about
\nmultiple lines?')) + What about + multiple lines? + """ + html_stripper = re.compile('()|(<[^>]*>)|([^<]+)', re.DOTALL) + texts = (match.group(3) or '' for match in html_stripper.finditer(s)) + return ''.join(texts) + + +class SeparatedValues(str): + """ + A string separated by a separator. Overrides __iter__ for getting + the values. + + >>> list(SeparatedValues('a,b,c')) + ['a', 'b', 'c'] + + Whitespace is stripped and empty values are discarded. + + >>> list(SeparatedValues(' a, b , c, ')) + ['a', 'b', 'c'] + """ + + separator = ',' + + def __iter__(self): + parts = self.split(self.separator) + return filter(None, (part.strip() for part in parts)) + + +class Stripper: + r""" + Given a series of lines, find the common prefix and strip it from them. + + >>> lines = [ + ... 'abcdefg\n', + ... 'abc\n', + ... 'abcde\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix + 'abc' + >>> list(res.lines) + ['defg\n', '\n', 'de\n'] + + If no prefix is common, nothing should be stripped. + + >>> lines = [ + ... 'abcd\n', + ... '1234\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix = '' + >>> list(res.lines) + ['abcd\n', '1234\n'] + """ + + def __init__(self, prefix, lines): + self.prefix = prefix + self.lines = map(self, lines) + + @classmethod + def strip_prefix(cls, lines): + prefix_lines, lines = itertools.tee(lines) + prefix = functools.reduce(cls.common_prefix, prefix_lines) + return cls(prefix, lines) + + def __call__(self, line): + if not self.prefix: + return line + null, prefix, rest = line.partition(self.prefix) + return rest + + @staticmethod + def common_prefix(s1, s2): + """ + Return the common prefix of two lines. + """ + index = min(len(s1), len(s2)) + while s1[:index] != s2[:index]: + index -= 1 + return s1[:index] + + +def remove_prefix(text, prefix): + """ + Remove the prefix from the text if it exists. + + >>> remove_prefix('underwhelming performance', 'underwhelming ') + 'performance' + + >>> remove_prefix('something special', 'sample') + 'something special' + """ + null, prefix, rest = text.rpartition(prefix) + return rest + + +def remove_suffix(text, suffix): + """ + Remove the suffix from the text if it exists. + + >>> remove_suffix('name.git', '.git') + 'name' + + >>> remove_suffix('something special', 'sample') + 'something special' + """ + rest, suffix, null = text.partition(suffix) + return rest + + +def normalize_newlines(text): + r""" + Replace alternate newlines with the canonical newline. + + >>> normalize_newlines('Lorem Ipsum\u2029') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\r\n') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\x85') + 'Lorem Ipsum\n' + """ + newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] + pattern = '|'.join(newlines) + return re.sub(pattern, '\n', text) + + +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(' #')[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith('\\'): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item diff --git a/libs/common/pkg_resources/_vendor/more_itertools/__init__.py b/libs/common/pkg_resources/_vendor/more_itertools/__init__.py new file mode 100644 index 00000000..ea38bef1 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/more_itertools/__init__.py @@ -0,0 +1,4 @@ +from .more import * # noqa +from .recipes import * # noqa + +__version__ = '8.12.0' diff --git a/libs/common/pkg_resources/_vendor/more_itertools/more.py b/libs/common/pkg_resources/_vendor/more_itertools/more.py new file mode 100644 index 00000000..6b6a5cab --- /dev/null +++ b/libs/common/pkg_resources/_vendor/more_itertools/more.py @@ -0,0 +1,4316 @@ +import warnings + +from collections import Counter, defaultdict, deque, abc +from collections.abc import Sequence +from functools import partial, reduce, wraps +from heapq import merge, heapify, heapreplace, heappop +from itertools import ( + chain, + compress, + count, + cycle, + dropwhile, + groupby, + islice, + repeat, + starmap, + takewhile, + tee, + zip_longest, +) +from math import exp, factorial, floor, log +from queue import Empty, Queue +from random import random, randrange, uniform +from operator import itemgetter, mul, sub, gt, lt, ge, le +from sys import hexversion, maxsize +from time import monotonic + +from .recipes import ( + consume, + flatten, + pairwise, + powerset, + take, + unique_everseen, +) + +__all__ = [ + 'AbortThread', + 'SequenceView', + 'UnequalIterablesError', + 'adjacent', + 'all_unique', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'chunked_even', + 'circular_shifts', + 'collapse', + 'collate', + 'combination_index', + 'consecutive_groups', + 'consumer', + 'count_cycle', + 'countable', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'duplicates_everseen', + 'duplicates_justseen', + 'exactly_n', + 'filter_except', + 'first', + 'groupby_transform', + 'ichunked', + 'ilen', + 'interleave', + 'interleave_evenly', + 'interleave_longest', + 'intersperse', + 'is_sorted', + 'islice_extended', + 'iterate', + 'last', + 'locate', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_if', + 'map_reduce', + 'mark_ends', + 'minmax', + 'nth_or_last', + 'nth_permutation', + 'nth_product', + 'numeric_range', + 'one', + 'only', + 'padded', + 'partitions', + 'peekable', + 'permutation_index', + 'product_index', + 'raise_', + 'repeat_each', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'set_partitions', + 'side_effect', + 'sliced', + 'sort_together', + 'split_after', + 'split_at', + 'split_before', + 'split_into', + 'split_when', + 'spy', + 'stagger', + 'strip', + 'strictly_n', + 'substrings', + 'substrings_indexes', + 'time_limited', + 'unique_in_window', + 'unique_to_each', + 'unzip', + 'value_chain', + 'windowed', + 'windowed_complete', + 'with_iter', + 'zip_broadcast', + 'zip_equal', + 'zip_offset', +] + + +_marker = object() + + +def chunked(iterable, n, strict=False): + """Break *iterable* into lists of length *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) + [[1, 2, 3], [4, 5, 6]] + + By the default, the last yielded list will have fewer than *n* elements + if the length of *iterable* is not divisible by *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) + [[1, 2, 3], [4, 5, 6], [7, 8]] + + To use a fill-in value instead, see the :func:`grouper` recipe. + + If the length of *iterable* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + list is yielded. + + """ + iterator = iter(partial(take, n, iter(iterable)), []) + if strict: + if n is None: + raise ValueError('n must not be None when using strict mode.') + + def ret(): + for chunk in iterator: + if len(chunk) != n: + raise ValueError('iterable is not divisible by n.') + yield chunk + + return iter(ret()) + else: + return iterator + + +def first(iterable, default=_marker): + """Return the first item of *iterable*, or *default* if *iterable* is + empty. + + >>> first([0, 1, 2, 3]) + 0 + >>> first([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + + :func:`first` is useful when you have a generator of expensive-to-retrieve + values and want any arbitrary one. It is marginally shorter than + ``next(iter(iterable), default)``. + + """ + try: + return next(iter(iterable)) + except StopIteration as e: + if default is _marker: + raise ValueError( + 'first() was called on an empty iterable, and no ' + 'default value was provided.' + ) from e + return default + + +def last(iterable, default=_marker): + """Return the last item of *iterable*, or *default* if *iterable* is + empty. + + >>> last([0, 1, 2, 3]) + 3 + >>> last([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + try: + if isinstance(iterable, Sequence): + return iterable[-1] + # Work around https://bugs.python.org/issue38525 + elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + return next(reversed(iterable)) + else: + return deque(iterable, maxlen=1)[-1] + except (IndexError, TypeError, StopIteration): + if default is _marker: + raise ValueError( + 'last() was called on an empty iterable, and no default was ' + 'provided.' + ) + return default + + +def nth_or_last(iterable, n, default=_marker): + """Return the nth or the last item of *iterable*, + or *default* if *iterable* is empty. + + >>> nth_or_last([0, 1, 2, 3], 2) + 2 + >>> nth_or_last([0, 1], 2) + 1 + >>> nth_or_last([], 0, 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + return last(islice(iterable, n + 1), default=default) + + +class peekable: + """Wrap an iterator to allow lookahead and prepending elements. + + Call :meth:`peek` on the result to get the value that will be returned + by :func:`next`. This won't advance the iterator: + + >>> p = peekable(['a', 'b']) + >>> p.peek() + 'a' + >>> next(p) + 'a' + + Pass :meth:`peek` a default value to return that instead of raising + ``StopIteration`` when the iterator is exhausted. + + >>> p = peekable([]) + >>> p.peek('hi') + 'hi' + + peekables also offer a :meth:`prepend` method, which "inserts" items + at the head of the iterable: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> p.peek() + 11 + >>> list(p) + [11, 12, 1, 2, 3] + + peekables can be indexed. Index 0 is the item that will be returned by + :func:`next`, index 1 is the item after that, and so on: + The values up to the given index will be cached. + + >>> p = peekable(['a', 'b', 'c', 'd']) + >>> p[0] + 'a' + >>> p[1] + 'b' + >>> next(p) + 'a' + + Negative indexes are supported, but be aware that they will cache the + remaining items in the source iterator, which may require significant + storage. + + To check whether a peekable is exhausted, check its truth value: + + >>> p = peekable(['a', 'b']) + >>> if p: # peekable has items + ... list(p) + ['a', 'b'] + >>> if not p: # peekable is exhausted + ... list(p) + [] + + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self._cache = deque() + + def __iter__(self): + return self + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + """Return the item that will be next returned from ``next()``. + + Return ``default`` if there are no items left. If ``default`` is not + provided, raise ``StopIteration``. + + """ + if not self._cache: + try: + self._cache.append(next(self._it)) + except StopIteration: + if default is _marker: + raise + return default + return self._cache[0] + + def prepend(self, *items): + """Stack up items to be the next ones returned from ``next()`` or + ``self.peek()``. The items will be returned in + first in, first out order:: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> list(p) + [11, 12, 1, 2, 3] + + It is possible, by prepending items, to "resurrect" a peekable that + previously raised ``StopIteration``. + + >>> p = peekable([]) + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + >>> p.prepend(1) + >>> next(p) + 1 + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + + """ + self._cache.extendleft(reversed(items)) + + def __next__(self): + if self._cache: + return self._cache.popleft() + + return next(self._it) + + def _get_slice(self, index): + # Normalize the slice's arguments + step = 1 if (index.step is None) else index.step + if step > 0: + start = 0 if (index.start is None) else index.start + stop = maxsize if (index.stop is None) else index.stop + elif step < 0: + start = -1 if (index.start is None) else index.start + stop = (-maxsize - 1) if (index.stop is None) else index.stop + else: + raise ValueError('slice step cannot be zero') + + # If either the start or stop index is negative, we'll need to cache + # the rest of the iterable in order to slice from the right side. + if (start < 0) or (stop < 0): + self._cache.extend(self._it) + # Otherwise we'll need to find the rightmost index and cache to that + # point. + else: + n = min(max(start, stop) + 1, maxsize) + cache_len = len(self._cache) + if n >= cache_len: + self._cache.extend(islice(self._it, n - cache_len)) + + return list(self._cache)[index] + + def __getitem__(self, index): + if isinstance(index, slice): + return self._get_slice(index) + + cache_len = len(self._cache) + if index < 0: + self._cache.extend(self._it) + elif index >= cache_len: + self._cache.extend(islice(self._it, index + 1 - cache_len)) + + return self._cache[index] + + +def collate(*iterables, **kwargs): + """Return a sorted merge of the items from each of several already-sorted + *iterables*. + + >>> list(collate('ACDZ', 'AZ', 'JKL')) + ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z'] + + Works lazily, keeping only the next value from each iterable in memory. Use + :func:`collate` to, for example, perform a n-way mergesort of items that + don't fit in memory. + + If a *key* function is specified, the iterables will be sorted according + to its result: + + >>> key = lambda s: int(s) # Sort by numeric value, not by string + >>> list(collate(['1', '10'], ['2', '11'], key=key)) + ['1', '2', '10', '11'] + + + If the *iterables* are sorted in descending order, set *reverse* to + ``True``: + + >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True)) + [5, 4, 3, 2, 1, 0] + + If the elements of the passed-in iterables are out of order, you might get + unexpected results. + + On Python 3.5+, this function is an alias for :func:`heapq.merge`. + + """ + warnings.warn( + "collate is no longer part of more_itertools, use heapq.merge", + DeprecationWarning, + ) + return merge(*iterables, **kwargs) + + +def consumer(func): + """Decorator that automatically advances a PEP-342-style "reverse iterator" + to its first yield point so you don't have to call ``next()`` on it + manually. + + >>> @consumer + ... def tally(): + ... i = 0 + ... while True: + ... print('Thing number %s is %s.' % (i, (yield))) + ... i += 1 + ... + >>> t = tally() + >>> t.send('red') + Thing number 0 is red. + >>> t.send('fish') + Thing number 1 is fish. + + Without the decorator, you would have to call ``next(t)`` before + ``t.send()`` could be used. + + """ + + @wraps(func) + def wrapper(*args, **kwargs): + gen = func(*args, **kwargs) + next(gen) + return gen + + return wrapper + + +def ilen(iterable): + """Return the number of items in *iterable*. + + >>> ilen(x for x in range(1000000) if x % 3 == 0) + 333334 + + This consumes the iterable, so handle with care. + + """ + # This approach was selected because benchmarks showed it's likely the + # fastest of the known implementations at the time of writing. + # See GitHub tracker: #236, #230. + counter = count() + deque(zip(iterable, counter), maxlen=0) + return next(counter) + + +def iterate(func, start): + """Return ``start``, ``func(start)``, ``func(func(start))``, ... + + >>> from itertools import islice + >>> list(islice(iterate(lambda x: 2*x, 1), 10)) + [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] + + """ + while True: + yield start + start = func(start) + + +def with_iter(context_manager): + """Wrap an iterable in a ``with`` statement, so it closes once exhausted. + + For example, this will close the file when the iterator is exhausted:: + + upper_lines = (line.upper() for line in with_iter(open('foo'))) + + Any context manager which returns an iterable is a candidate for + ``with_iter``. + + """ + with context_manager as iterable: + yield from iterable + + +def one(iterable, too_short=None, too_long=None): + """Return the first item from *iterable*, which is expected to contain only + that item. Raise an exception if *iterable* is empty or has more than one + item. + + :func:`one` is useful for ensuring that an iterable contains only one item. + For example, it can be used to retrieve the result of a database query + that is expected to return a single row. + + If *iterable* is empty, ``ValueError`` will be raised. You may specify a + different exception with the *too_short* keyword: + + >>> it = [] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (expected 1)' + >>> too_short = IndexError('too few items') + >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + IndexError: too few items + + Similarly, if *iterable* contains more than one item, ``ValueError`` will + be raised. You may specify a different exception with the *too_long* + keyword: + + >>> it = ['too', 'many'] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 'too', + 'many', and perhaps more. + >>> too_long = RuntimeError + >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + Note that :func:`one` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check iterable + contents less destructively. + + """ + it = iter(iterable) + + try: + first_value = next(it) + except StopIteration as e: + raise ( + too_short or ValueError('too few items in iterable (expected 1)') + ) from e + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def raise_(exception, *args): + raise exception(*args) + + +def strictly_n(iterable, n, too_short=None, too_long=None): + """Validate that *iterable* has exactly *n* items and return them if + it does. If it has fewer than *n* items, call function *too_short* + with those items. If it has more than *n* items, call function + *too_long* with the first ``n + 1`` items. + + >>> iterable = ['a', 'b', 'c', 'd'] + >>> n = 4 + >>> list(strictly_n(iterable, n)) + ['a', 'b', 'c', 'd'] + + By default, *too_short* and *too_long* are functions that raise + ``ValueError``. + + >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too few items in iterable (got 2) + + >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (got at least 3) + + You can instead supply functions that do something else. + *too_short* will be called with the number of items in *iterable*. + *too_long* will be called with `n + 1`. + + >>> def too_short(item_count): + ... raise RuntimeError + >>> it = strictly_n('abcd', 6, too_short=too_short) + >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + >>> def too_long(item_count): + ... print('The boss is going to hear about this') + >>> it = strictly_n('abcdef', 4, too_long=too_long) + >>> list(it) + The boss is going to hear about this + ['a', 'b', 'c', 'd'] + + """ + if too_short is None: + too_short = lambda item_count: raise_( + ValueError, + 'Too few items in iterable (got {})'.format(item_count), + ) + + if too_long is None: + too_long = lambda item_count: raise_( + ValueError, + 'Too many items in iterable (got at least {})'.format(item_count), + ) + + it = iter(iterable) + for i in range(n): + try: + item = next(it) + except StopIteration: + too_short(i) + return + else: + yield item + + try: + next(it) + except StopIteration: + pass + else: + too_long(n + 1) + + +def distinct_permutations(iterable, r=None): + """Yield successive distinct permutations of the elements in *iterable*. + + >>> sorted(distinct_permutations([1, 0, 1])) + [(0, 1, 1), (1, 0, 1), (1, 1, 0)] + + Equivalent to ``set(permutations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + Duplicate permutations arise when there are duplicated elements in the + input iterable. The number of items returned is + `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of + items input, and each `x_i` is the count of a distinct item in the input + sequence. + + If *r* is given, only the *r*-length permutations are yielded. + + >>> sorted(distinct_permutations([1, 0, 1], r=2)) + [(0, 1), (1, 0), (1, 1)] + >>> sorted(distinct_permutations(range(3), r=2)) + [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + + """ + # Algorithm: https://w.wiki/Qai + def _full(A): + while True: + # Yield the permutation we have + yield tuple(A) + + # Find the largest index i such that A[i] < A[i + 1] + for i in range(size - 2, -1, -1): + if A[i] < A[i + 1]: + break + # If no such index exists, this permutation is the last one + else: + return + + # Find the largest index j greater than j such that A[i] < A[j] + for j in range(size - 1, i, -1): + if A[i] < A[j]: + break + + # Swap the value of A[i] with that of A[j], then reverse the + # sequence from A[i + 1] to form the new permutation + A[i], A[j] = A[j], A[i] + A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] + + # Algorithm: modified from the above + def _partial(A, r): + # Split A into the first r items and the last r items + head, tail = A[:r], A[r:] + right_head_indexes = range(r - 1, -1, -1) + left_tail_indexes = range(len(tail)) + + while True: + # Yield the permutation we have + yield tuple(head) + + # Starting from the right, find the first index of the head with + # value smaller than the maximum value of the tail - call it i. + pivot = tail[-1] + for i in right_head_indexes: + if head[i] < pivot: + break + pivot = head[i] + else: + return + + # Starting from the left, find the first value of the tail + # with a value greater than head[i] and swap. + for j in left_tail_indexes: + if tail[j] > head[i]: + head[i], tail[j] = tail[j], head[i] + break + # If we didn't find one, start from the right and find the first + # index of the head with a value greater than head[i] and swap. + else: + for j in right_head_indexes: + if head[j] > head[i]: + head[i], head[j] = head[j], head[i] + break + + # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] + tail += head[: i - r : -1] # head[i + 1:][::-1] + i += 1 + head[i:], tail[:] = tail[: r - i], tail[r - i :] + + items = sorted(iterable) + + size = len(items) + if r is None: + r = size + + if 0 < r <= size: + return _full(items) if (r == size) else _partial(items, r) + + return iter(() if r else ((),)) + + +def intersperse(e, iterable, n=1): + """Intersperse filler element *e* among the items in *iterable*, leaving + *n* items between each filler element. + + >>> list(intersperse('!', [1, 2, 3, 4, 5])) + [1, '!', 2, '!', 3, '!', 4, '!', 5] + + >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2)) + [1, 2, None, 3, 4, None, 5] + + """ + if n == 0: + raise ValueError('n must be > 0') + elif n == 1: + # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2... + # islice(..., 1, None) -> x_0, e, x_1, e, x_2... + return islice(interleave(repeat(e), iterable), 1, None) + else: + # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]... + # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]... + # flatten(...) -> x_0, x_1, e, x_2, x_3... + filler = repeat([e]) + chunks = chunked(iterable, n) + return flatten(islice(interleave(filler, chunks), 1, None)) + + +def unique_to_each(*iterables): + """Return the elements from each of the input iterables that aren't in the + other input iterables. + + For example, suppose you have a set of packages, each with a set of + dependencies:: + + {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}} + + If you remove one package, which dependencies can also be removed? + + If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not + associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for + ``pkg_2``, and ``D`` is only needed for ``pkg_3``:: + + >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'}) + [['A'], ['C'], ['D']] + + If there are duplicates in one input iterable that aren't in the others + they will be duplicated in the output. Input order is preserved:: + + >>> unique_to_each("mississippi", "missouri") + [['p', 'p'], ['o', 'u', 'r']] + + It is assumed that the elements of each iterable are hashable. + + """ + pool = [list(it) for it in iterables] + counts = Counter(chain.from_iterable(map(set, pool))) + uniques = {element for element in counts if counts[element] == 1} + return [list(filter(uniques.__contains__, it)) for it in pool] + + +def windowed(seq, n, fillvalue=None, step=1): + """Return a sliding window of width *n* over the given iterable. + + >>> all_windows = windowed([1, 2, 3, 4, 5], 3) + >>> list(all_windows) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + When the window is larger than the iterable, *fillvalue* is used in place + of missing values: + + >>> list(windowed([1, 2, 3], 4)) + [(1, 2, 3, None)] + + Each window will advance in increments of *step*: + + >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) + [(1, 2, 3), (3, 4, 5), (5, 6, '!')] + + To slide into the iterable's items, use :func:`chain` to add filler items + to the left: + + >>> iterable = [1, 2, 3, 4] + >>> n = 3 + >>> padding = [None] * (n - 1) + >>> list(windowed(chain(padding, iterable), 3)) + [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] + """ + if n < 0: + raise ValueError('n must be >= 0') + if n == 0: + yield tuple() + return + if step < 1: + raise ValueError('step must be >= 1') + + window = deque(maxlen=n) + i = n + for _ in map(window.append, seq): + i -= 1 + if not i: + i = step + yield tuple(window) + + size = len(window) + if size < n: + yield tuple(chain(window, repeat(fillvalue, n - size))) + elif 0 < i < min(step, n): + window += (fillvalue,) * i + yield tuple(window) + + +def substrings(iterable): + """Yield all of the substrings of *iterable*. + + >>> [''.join(s) for s in substrings('more')] + ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more'] + + Note that non-string iterables can also be subdivided. + + >>> list(substrings([0, 1, 2])) + [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)] + + """ + # The length-1 substrings + seq = [] + for item in iter(iterable): + seq.append(item) + yield (item,) + seq = tuple(seq) + item_count = len(seq) + + # And the rest + for n in range(2, item_count + 1): + for i in range(item_count - n + 1): + yield seq[i : i + n] + + +def substrings_indexes(seq, reverse=False): + """Yield all substrings and their positions in *seq* + + The items yielded will be a tuple of the form ``(substr, i, j)``, where + ``substr == seq[i:j]``. + + This function only works for iterables that support slicing, such as + ``str`` objects. + + >>> for item in substrings_indexes('more'): + ... print(item) + ('m', 0, 1) + ('o', 1, 2) + ('r', 2, 3) + ('e', 3, 4) + ('mo', 0, 2) + ('or', 1, 3) + ('re', 2, 4) + ('mor', 0, 3) + ('ore', 1, 4) + ('more', 0, 4) + + Set *reverse* to ``True`` to yield the same items in the opposite order. + + + """ + r = range(1, len(seq) + 1) + if reverse: + r = reversed(r) + return ( + (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) + ) + + +class bucket: + """Wrap *iterable* and return an object that buckets it iterable into + child iterables based on a *key* function. + + >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] + >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character + >>> sorted(list(s)) # Get the keys + ['a', 'b', 'c'] + >>> a_iterable = s['a'] + >>> next(a_iterable) + 'a1' + >>> next(a_iterable) + 'a2' + >>> list(s['b']) + ['b1', 'b2', 'b3'] + + The original iterable will be advanced and its items will be cached until + they are used by the child iterables. This may require significant storage. + + By default, attempting to select a bucket to which no items belong will + exhaust the iterable and cache all values. + If you specify a *validator* function, selected buckets will instead be + checked against it. + + >>> from itertools import count + >>> it = count(1, 2) # Infinite sequence of odd numbers + >>> key = lambda x: x % 10 # Bucket by last digit + >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only + >>> s = bucket(it, key=key, validator=validator) + >>> 2 in s + False + >>> list(s[2]) + [] + + """ + + def __init__(self, iterable, key, validator=None): + self._it = iter(iterable) + self._key = key + self._cache = defaultdict(deque) + self._validator = validator or (lambda x: True) + + def __contains__(self, value): + if not self._validator(value): + return False + + try: + item = next(self[value]) + except StopIteration: + return False + else: + self._cache[value].appendleft(item) + + return True + + def _get_values(self, value): + """ + Helper to yield items from the parent iterator that match *value*. + Items that don't match are stored in the local cache as they + are encountered. + """ + while True: + # If we've cached some items that match the target value, emit + # the first one and evict it from the cache. + if self._cache[value]: + yield self._cache[value].popleft() + # Otherwise we need to advance the parent iterator to search for + # a matching item, caching the rest. + else: + while True: + try: + item = next(self._it) + except StopIteration: + return + item_value = self._key(item) + if item_value == value: + yield item + break + elif self._validator(item_value): + self._cache[item_value].append(item) + + def __iter__(self): + for item in self._it: + item_value = self._key(item) + if self._validator(item_value): + self._cache[item_value].append(item) + + yield from self._cache.keys() + + def __getitem__(self, value): + if not self._validator(value): + return iter(()) + + return self._get_values(value) + + +def spy(iterable, n=1): + """Return a 2-tuple with a list containing the first *n* elements of + *iterable*, and an iterator with the same items as *iterable*. + This allows you to "look ahead" at the items in the iterable without + advancing it. + + There is one item in the list by default: + + >>> iterable = 'abcdefg' + >>> head, iterable = spy(iterable) + >>> head + ['a'] + >>> list(iterable) + ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + + You may use unpacking to retrieve items instead of lists: + + >>> (head,), iterable = spy('abcdefg') + >>> head + 'a' + >>> (first, second), iterable = spy('abcdefg', 2) + >>> first + 'a' + >>> second + 'b' + + The number of items requested can be larger than the number of items in + the iterable: + + >>> iterable = [1, 2, 3, 4, 5] + >>> head, iterable = spy(iterable, 10) + >>> head + [1, 2, 3, 4, 5] + >>> list(iterable) + [1, 2, 3, 4, 5] + + """ + it = iter(iterable) + head = take(n, it) + + return head.copy(), chain(head, it) + + +def interleave(*iterables): + """Return a new iterable yielding from each iterable in turn, + until the shortest is exhausted. + + >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7] + + For a version that doesn't terminate after the shortest iterable is + exhausted, see :func:`interleave_longest`. + + """ + return chain.from_iterable(zip(*iterables)) + + +def interleave_longest(*iterables): + """Return a new iterable yielding from each iterable in turn, + skipping any that are exhausted. + + >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7, 3, 8] + + This function produces the same output as :func:`roundrobin`, but may + perform better for some inputs (in particular when the number of iterables + is large). + + """ + i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker)) + return (x for x in i if x is not _marker) + + +def interleave_evenly(iterables, lengths=None): + """ + Interleave multiple iterables so that their elements are evenly distributed + throughout the output sequence. + + >>> iterables = [1, 2, 3, 4, 5], ['a', 'b'] + >>> list(interleave_evenly(iterables)) + [1, 2, 'a', 3, 4, 'b', 5] + + >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]] + >>> list(interleave_evenly(iterables)) + [1, 6, 4, 2, 7, 3, 8, 5] + + This function requires iterables of known length. Iterables without + ``__len__()`` can be used by manually specifying lengths with *lengths*: + + >>> from itertools import combinations, repeat + >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']] + >>> lengths = [4 * (4 - 1) // 2, 3] + >>> list(interleave_evenly(iterables, lengths=lengths)) + [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c'] + + Based on Bresenham's algorithm. + """ + if lengths is None: + try: + lengths = [len(it) for it in iterables] + except TypeError: + raise ValueError( + 'Iterable lengths could not be determined automatically. ' + 'Specify them with the lengths keyword.' + ) + elif len(iterables) != len(lengths): + raise ValueError('Mismatching number of iterables and lengths.') + + dims = len(lengths) + + # sort iterables by length, descending + lengths_permute = sorted( + range(dims), key=lambda i: lengths[i], reverse=True + ) + lengths_desc = [lengths[i] for i in lengths_permute] + iters_desc = [iter(iterables[i]) for i in lengths_permute] + + # the longest iterable is the primary one (Bresenham: the longest + # distance along an axis) + delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:] + iter_primary, iters_secondary = iters_desc[0], iters_desc[1:] + errors = [delta_primary // dims] * len(deltas_secondary) + + to_yield = sum(lengths) + while to_yield: + yield next(iter_primary) + to_yield -= 1 + # update errors for each secondary iterable + errors = [e - delta for e, delta in zip(errors, deltas_secondary)] + + # those iterables for which the error is negative are yielded + # ("diagonal step" in Bresenham) + for i, e in enumerate(errors): + if e < 0: + yield next(iters_secondary[i]) + to_yield -= 1 + errors[i] += delta_primary + + +def collapse(iterable, base_type=None, levels=None): + """Flatten an iterable with multiple levels of nesting (e.g., a list of + lists of tuples) into non-iterable types. + + >>> iterable = [(1, 2), ([3, 4], [[5], [6]])] + >>> list(collapse(iterable)) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and + will not be collapsed. + + To avoid collapsing other types, specify *base_type*: + + >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] + >>> list(collapse(iterable, base_type=tuple)) + ['ab', ('cd', 'ef'), 'gh', 'ij'] + + Specify *levels* to stop flattening after a certain level: + + >>> iterable = [('a', ['b']), ('c', ['d'])] + >>> list(collapse(iterable)) # Fully flattened + ['a', 'b', 'c', 'd'] + >>> list(collapse(iterable, levels=1)) # Only one level flattened + ['a', ['b'], 'c', ['d']] + + """ + + def walk(node, level): + if ( + ((levels is not None) and (level > levels)) + or isinstance(node, (str, bytes)) + or ((base_type is not None) and isinstance(node, base_type)) + ): + yield node + return + + try: + tree = iter(node) + except TypeError: + yield node + return + else: + for child in tree: + yield from walk(child, level + 1) + + yield from walk(iterable, 0) + + +def side_effect(func, iterable, chunk_size=None, before=None, after=None): + """Invoke *func* on each item in *iterable* (or on each *chunk_size* group + of items) before yielding the item. + + `func` must be a function that takes a single argument. Its return value + will be discarded. + + *before* and *after* are optional functions that take no arguments. They + will be executed before iteration starts and after it ends, respectively. + + `side_effect` can be used for logging, updating progress bars, or anything + that is not functionally "pure." + + Emitting a status message: + + >>> from more_itertools import consume + >>> func = lambda item: print('Received {}'.format(item)) + >>> consume(side_effect(func, range(2))) + Received 0 + Received 1 + + Operating on chunks of items: + + >>> pair_sums = [] + >>> func = lambda chunk: pair_sums.append(sum(chunk)) + >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2)) + [0, 1, 2, 3, 4, 5] + >>> list(pair_sums) + [1, 5, 9] + + Writing to a file-like object: + + >>> from io import StringIO + >>> from more_itertools import consume + >>> f = StringIO() + >>> func = lambda x: print(x, file=f) + >>> before = lambda: print(u'HEADER', file=f) + >>> after = f.close + >>> it = [u'a', u'b', u'c'] + >>> consume(side_effect(func, it, before=before, after=after)) + >>> f.closed + True + + """ + try: + if before is not None: + before() + + if chunk_size is None: + for item in iterable: + func(item) + yield item + else: + for chunk in chunked(iterable, chunk_size): + func(chunk) + yield from chunk + finally: + if after is not None: + after() + + +def sliced(seq, n, strict=False): + """Yield slices of length *n* from the sequence *seq*. + + >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) + [(1, 2, 3), (4, 5, 6)] + + By the default, the last yielded slice will have fewer than *n* elements + if the length of *seq* is not divisible by *n*: + + >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) + [(1, 2, 3), (4, 5, 6), (7, 8)] + + If the length of *seq* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + slice is yielded. + + This function will only work for iterables that support slicing. + For non-sliceable iterables, see :func:`chunked`. + + """ + iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) + if strict: + + def ret(): + for _slice in iterator: + if len(_slice) != n: + raise ValueError("seq is not divisible by n.") + yield _slice + + return iter(ret()) + else: + return iterator + + +def split_at(iterable, pred, maxsplit=-1, keep_separator=False): + """Yield lists of items from *iterable*, where each list is delimited by + an item where callable *pred* returns ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b')) + [['a'], ['c', 'd', 'c'], ['a']] + + >>> list(split_at(range(10), lambda n: n % 2 == 1)) + [[0], [2], [4], [6], [8], []] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) + [[0], [2], [4, 5, 6, 7, 8, 9]] + + By default, the delimiting items are not included in the output. + The include them, set *keep_separator* to ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) + [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item): + yield buf + if keep_separator: + yield [item] + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + else: + buf.append(item) + yield buf + + +def split_before(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends just before + an item for which callable *pred* returns ``True``: + + >>> list(split_before('OneTwo', lambda s: s.isupper())) + [['O', 'n', 'e'], ['T', 'w', 'o']] + + >>> list(split_before(range(10), lambda n: n % 3 == 0)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield [item] + list(it) + return + buf = [] + maxsplit -= 1 + buf.append(item) + if buf: + yield buf + + +def split_after(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends with an + item where callable *pred* returns ``True``: + + >>> list(split_after('one1two2', lambda s: s.isdigit())) + [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']] + + >>> list(split_after(range(10), lambda n: n % 3 == 0)) + [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + buf.append(item) + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + if buf: + yield buf + + +def split_when(iterable, pred, maxsplit=-1): + """Split *iterable* into pieces based on the output of *pred*. + *pred* should be a function that takes successive pairs of items and + returns ``True`` if the iterable should be split in between them. + + For example, to find runs of increasing numbers, split the iterable when + element ``i`` is larger than element ``i + 1``: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) + [[1, 2, 3, 3], [2, 5], [2, 4], [2]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], + ... lambda x, y: x > y, maxsplit=2)) + [[1, 2, 3, 3], [2, 5], [2, 4, 2]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + it = iter(iterable) + try: + cur_item = next(it) + except StopIteration: + return + + buf = [cur_item] + for next_item in it: + if pred(cur_item, next_item): + yield buf + if maxsplit == 1: + yield [next_item] + list(it) + return + buf = [] + maxsplit -= 1 + + buf.append(next_item) + cur_item = next_item + + yield buf + + +def split_into(iterable, sizes): + """Yield a list of sequential items from *iterable* of length 'n' for each + integer 'n' in *sizes*. + + >>> list(split_into([1,2,3,4,5,6], [1,2,3])) + [[1], [2, 3], [4, 5, 6]] + + If the sum of *sizes* is smaller than the length of *iterable*, then the + remaining items of *iterable* will not be returned. + + >>> list(split_into([1,2,3,4,5,6], [2,3])) + [[1, 2], [3, 4, 5]] + + If the sum of *sizes* is larger than the length of *iterable*, fewer items + will be returned in the iteration that overruns *iterable* and further + lists will be empty: + + >>> list(split_into([1,2,3,4], [1,2,3,4])) + [[1], [2, 3], [4], []] + + When a ``None`` object is encountered in *sizes*, the returned list will + contain items up to the end of *iterable* the same way that itertools.slice + does: + + >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) + [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] + + :func:`split_into` can be useful for grouping a series of items where the + sizes of the groups are not uniform. An example would be where in a row + from a table, multiple columns represent elements of the same feature + (e.g. a point represented by x,y,z) but, the format is not the same for + all columns. + """ + # convert the iterable argument into an iterator so its contents can + # be consumed by islice in case it is a generator + it = iter(iterable) + + for size in sizes: + if size is None: + yield list(it) + return + else: + yield list(islice(it, size)) + + +def padded(iterable, fillvalue=None, n=None, next_multiple=False): + """Yield the elements from *iterable*, followed by *fillvalue*, such that + at least *n* items are emitted. + + >>> list(padded([1, 2, 3], '?', 5)) + [1, 2, 3, '?', '?'] + + If *next_multiple* is ``True``, *fillvalue* will be emitted until the + number of items emitted is a multiple of *n*:: + + >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True)) + [1, 2, 3, 4, None, None] + + If *n* is ``None``, *fillvalue* will be emitted indefinitely. + + """ + it = iter(iterable) + if n is None: + yield from chain(it, repeat(fillvalue)) + elif n < 1: + raise ValueError('n must be at least 1') + else: + item_count = 0 + for item in it: + yield item + item_count += 1 + + remaining = (n - item_count) % n if next_multiple else n - item_count + for _ in range(remaining): + yield fillvalue + + +def repeat_each(iterable, n=2): + """Repeat each element in *iterable* *n* times. + + >>> list(repeat_each('ABC', 3)) + ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'] + """ + return chain.from_iterable(map(repeat, iterable, repeat(n))) + + +def repeat_last(iterable, default=None): + """After the *iterable* is exhausted, keep yielding its last element. + + >>> list(islice(repeat_last(range(3)), 5)) + [0, 1, 2, 2, 2] + + If the iterable is empty, yield *default* forever:: + + >>> list(islice(repeat_last(range(0), 42), 5)) + [42, 42, 42, 42, 42] + + """ + item = _marker + for item in iterable: + yield item + final = default if item is _marker else item + yield from repeat(final) + + +def distribute(n, iterable): + """Distribute the items from *iterable* among *n* smaller iterables. + + >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 3, 5] + >>> list(group_2) + [2, 4, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 4, 7], [2, 5], [3, 6]] + + If the length of *iterable* is smaller than *n*, then the last returned + iterables will be empty: + + >>> children = distribute(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function uses :func:`itertools.tee` and may require significant + storage. If you need the order items in the smaller iterables to match the + original iterable, see :func:`divide`. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + children = tee(iterable, n) + return [islice(it, index, None, n) for index, it in enumerate(children)] + + +def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): + """Yield tuples whose elements are offset from *iterable*. + The amount by which the `i`-th item in each tuple is offset is given by + the `i`-th item in *offsets*. + + >>> list(stagger([0, 1, 2, 3])) + [(None, 0, 1), (0, 1, 2), (1, 2, 3)] + >>> list(stagger(range(8), offsets=(0, 2, 4))) + [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)] + + By default, the sequence will end when the final element of a tuple is the + last item in the iterable. To continue until the first element of a tuple + is the last item in the iterable, set *longest* to ``True``:: + + >>> list(stagger([0, 1, 2, 3], longest=True)) + [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + children = tee(iterable, len(offsets)) + + return zip_offset( + *children, offsets=offsets, longest=longest, fillvalue=fillvalue + ) + + +class UnequalIterablesError(ValueError): + def __init__(self, details=None): + msg = 'Iterables have different lengths' + if details is not None: + msg += (': index 0 has length {}; index {} has length {}').format( + *details + ) + + super().__init__(msg) + + +def _zip_equal_generator(iterables): + for combo in zip_longest(*iterables, fillvalue=_marker): + for val in combo: + if val is _marker: + raise UnequalIterablesError() + yield combo + + +def _zip_equal(*iterables): + # Check whether the iterables are all the same size. + try: + first_size = len(iterables[0]) + for i, it in enumerate(iterables[1:], 1): + size = len(it) + if size != first_size: + break + else: + # If we didn't break out, we can use the built-in zip. + return zip(*iterables) + + # If we did break out, there was a mismatch. + raise UnequalIterablesError(details=(first_size, i, size)) + # If any one of the iterables didn't have a length, start reading + # them until one runs out. + except TypeError: + return _zip_equal_generator(iterables) + + +def zip_equal(*iterables): + """``zip`` the input *iterables* together, but raise + ``UnequalIterablesError`` if they aren't all the same length. + + >>> it_1 = range(3) + >>> it_2 = iter('abc') + >>> list(zip_equal(it_1, it_2)) + [(0, 'a'), (1, 'b'), (2, 'c')] + + >>> it_1 = range(3) + >>> it_2 = iter('abcd') + >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + more_itertools.more.UnequalIterablesError: Iterables have different + lengths + + """ + if hexversion >= 0x30A00A6: + warnings.warn( + ( + 'zip_equal will be removed in a future version of ' + 'more-itertools. Use the builtin zip function with ' + 'strict=True instead.' + ), + DeprecationWarning, + ) + + return _zip_equal(*iterables) + + +def zip_offset(*iterables, offsets, longest=False, fillvalue=None): + """``zip`` the input *iterables* together, but offset the `i`-th iterable + by the `i`-th item in *offsets*. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1))) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')] + + This can be used as a lightweight alternative to SciPy or pandas to analyze + data sets in which some series have a lead or lag relationship. + + By default, the sequence will end when the shortest iterable is exhausted. + To continue until the longest iterable is exhausted, set *longest* to + ``True``. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True)) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + if len(iterables) != len(offsets): + raise ValueError("Number of iterables and offsets didn't match") + + staggered = [] + for it, n in zip(iterables, offsets): + if n < 0: + staggered.append(chain(repeat(fillvalue, -n), it)) + elif n > 0: + staggered.append(islice(it, n, None)) + else: + staggered.append(it) + + if longest: + return zip_longest(*staggered, fillvalue=fillvalue) + + return zip(*staggered) + + +def sort_together(iterables, key_list=(0,), key=None, reverse=False): + """Return the input iterables sorted together, with *key_list* as the + priority for sorting. All iterables are trimmed to the length of the + shortest one. + + This can be used like the sorting function in a spreadsheet. If each + iterable represents a column of data, the key list determines which + columns are used for sorting. + + By default, all iterables are sorted using the ``0``-th iterable:: + + >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] + >>> sort_together(iterables) + [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] + + Set a different key list to sort according to another iterable. + Specifying multiple keys dictates how ties are broken:: + + >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] + >>> sort_together(iterables, key_list=(1, 2)) + [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + + Set *reverse* to ``True`` to sort in descending order. + + >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) + [(3, 2, 1), ('a', 'b', 'c')] + + """ + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) + + +def unzip(iterable): + """The inverse of :func:`zip`, this function disaggregates the elements + of the zipped *iterable*. + + The ``i``-th iterable contains the ``i``-th element from each element + of the zipped iterable. The first element is used to to determine the + length of the remaining elements. + + >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> letters, numbers = unzip(iterable) + >>> list(letters) + ['a', 'b', 'c', 'd'] + >>> list(numbers) + [1, 2, 3, 4] + + This is similar to using ``zip(*iterable)``, but it avoids reading + *iterable* into memory. Note, however, that this function uses + :func:`itertools.tee` and thus may require significant storage. + + """ + head, iterable = spy(iter(iterable)) + if not head: + # empty iterable, e.g. zip([], [], []) + return () + # spy returns a one-length iterable as head + head = head[0] + iterables = tee(iterable, len(head)) + + def itemgetter(i): + def getter(obj): + try: + return obj[i] + except IndexError: + # basically if we have an iterable like + # iter([(1, 2, 3), (4, 5), (6,)]) + # the second unzipped iterable would fail at the third tuple + # since it would try to access tup[1] + # same with the third unzipped iterable and the second tuple + # to support these "improperly zipped" iterables, + # we create a custom itemgetter + # which just stops the unzipped iterables + # at first length mismatch + raise StopIteration + + return getter + + return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) + + +def divide(n, iterable): + """Divide the elements from *iterable* into *n* parts, maintaining + order. + + >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 2, 3] + >>> list(group_2) + [4, 5, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 2, 3], [4, 5], [6, 7]] + + If the length of the iterable is smaller than n, then the last returned + iterables will be empty: + + >>> children = divide(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function will exhaust the iterable before returning and may require + significant storage. If order is not important, see :func:`distribute`, + which does not first pull the iterable into memory. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + + q, r = divmod(len(seq), n) + + ret = [] + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q + ret.append(iter(seq[start:stop])) + + return ret + + +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +def adjacent(predicate, iterable, distance=1): + """Return an iterable over `(bool, item)` tuples where the `item` is + drawn from *iterable* and the `bool` indicates whether + that item satisfies the *predicate* or is adjacent to an item that does. + + For example, to find whether items are adjacent to a ``3``:: + + >>> list(adjacent(lambda x: x == 3, range(6))) + [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] + + Set *distance* to change what counts as adjacent. For example, to find + whether items are two places away from a ``3``: + + >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) + [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] + + This is useful for contextualizing the results of a search function. + For example, a code comparison tool might want to identify lines that + have changed, but also surrounding lines to give the viewer of the diff + context. + + The predicate function will only be called once for each item in the + iterable. + + See also :func:`groupby_transform`, which can be used with this function + to group ranges of items with the same `bool` value. + + """ + # Allow distance=0 mainly for testing that it reproduces results with map() + if distance < 0: + raise ValueError('distance must be at least 0') + + i1, i2 = tee(iterable) + padding = [False] * distance + selected = chain(padding, map(predicate, i1), padding) + adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) + return zip(adjacent_to_selected, i2) + + +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. + + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items + + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. + + :func:`groupby_transform` is useful when grouping elements of an iterable + using a separate iterable as the key. To do this, :func:`zip` the iterables + and pass a *keyfunc* that extracts the first element and a *valuefunc* + that extracts the second element:: + + >>> from operator import itemgetter + >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] + >>> values = 'abcdefghi' + >>> iterable = zip(keys, values) + >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) + >>> [(k, ''.join(g)) for k, g in grouper] + [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] + + Note that the order of items in the iterable is significant. + Only adjacent items are grouped together, so if you don't want any + duplicate groups, you should sort the iterable by the key function. + + """ + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret + + +class numeric_range(abc.Sequence, abc.Hashable): + """An extension of the built-in ``range()`` function whose arguments can + be any orderable numeric type. + + With only *stop* specified, *start* defaults to ``0`` and *step* + defaults to ``1``. The output items will match the type of *stop*: + + >>> list(numeric_range(3.5)) + [0.0, 1.0, 2.0, 3.0] + + With only *start* and *stop* specified, *step* defaults to ``1``. The + output items will match the type of *start*: + + >>> from decimal import Decimal + >>> start = Decimal('2.1') + >>> stop = Decimal('5.1') + >>> list(numeric_range(start, stop)) + [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] + + With *start*, *stop*, and *step* specified the output items will match + the type of ``start + step``: + + >>> from fractions import Fraction + >>> start = Fraction(1, 2) # Start at 1/2 + >>> stop = Fraction(5, 2) # End at 5/2 + >>> step = Fraction(1, 2) # Count by 1/2 + >>> list(numeric_range(start, stop, step)) + [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] + + If *step* is zero, ``ValueError`` is raised. Negative steps are supported: + + >>> list(numeric_range(3, -1, -1.0)) + [3.0, 2.0, 1.0, 0.0] + + Be aware of the limitations of floating point numbers; the representation + of the yielded numbers may be surprising. + + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: + + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step + + +def count_cycle(iterable, n=None): + """Cycle through the items from *iterable* up to *n* times, yielding + the number of completed cycles along with each item. If *n* is omitted the + process repeats indefinitely. + + >>> list(count_cycle('AB', 3)) + [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] + + """ + iterable = tuple(iterable) + if not iterable: + return iter(()) + counter = count() if n is None else range(n) + return ((i, item) for i in counter for item in iterable) + + +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + +def locate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(locate([0, 1, 1, 0, 1, 0, 0])) + [1, 2, 4] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item. + + >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) + [1, 3] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(locate(iterable, pred=pred, window_size=3)) + [1, 5, 9] + + Use with :func:`seekable` to find indexes and then retrieve the associated + items: + + >>> from itertools import count + >>> from more_itertools import seekable + >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) + >>> it = seekable(source) + >>> pred = lambda x: x > 100 + >>> indexes = locate(it, pred=pred) + >>> i = next(indexes) + >>> it.seek(i) + >>> next(it) + 106 + + """ + if window_size is None: + return compress(count(), map(pred, iterable)) + + if window_size < 1: + raise ValueError('window size must be at least 1') + + it = windowed(iterable, window_size, fillvalue=_marker) + return compress(count(), starmap(pred, it)) + + +def lstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the beginning + for which *pred* returns ``True``. + + For example, to remove a set of items from the start of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(lstrip(iterable, pred)) + [1, 2, None, 3, False, None] + + This function is analogous to to :func:`str.lstrip`, and is essentially + an wrapper for :func:`itertools.dropwhile`. + + """ + return dropwhile(pred, iterable) + + +def rstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the end + for which *pred* returns ``True``. + + For example, to remove a set of items from the end of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(rstrip(iterable, pred)) + [None, False, None, 1, 2, None, 3] + + This function is analogous to :func:`str.rstrip`. + + """ + cache = [] + cache_append = cache.append + cache_clear = cache.clear + for x in iterable: + if pred(x): + cache_append(x) + else: + yield from cache + cache_clear() + yield x + + +def strip(iterable, pred): + """Yield the items from *iterable*, but strip any from the + beginning and end for which *pred* returns ``True``. + + For example, to remove a set of items from both ends of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(strip(iterable, pred)) + [1, 2, None, 3] + + This function is analogous to :func:`str.strip`. + + """ + return rstrip(lstrip(iterable, pred), pred) + + +class islice_extended: + """An extension of :func:`itertools.islice` that supports negative values + for *stop*, *start*, and *step*. + + >>> iterable = iter('abcdefgh') + >>> list(islice_extended(iterable, -4, -1)) + ['e', 'f', 'g'] + + Slices with negative values require some caching of *iterable*, but this + function takes care to minimize the amount of memory required. + + For example, you can use a negative step with an infinite iterator: + + >>> from itertools import count + >>> list(islice_extended(count(), 110, 99, -2)) + [110, 108, 106, 104, 102, 100] + + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + + """ + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): + start = s.start + stop = s.stop + if s.step == 0: + raise ValueError('step argument must be a non-zero integer or None.') + step = s.step or 1 + + if step > 0: + start = 0 if (start is None) else start + + if start < 0: + # Consume all but the last -start items + cache = deque(enumerate(it, 1), maxlen=-start) + len_iter = cache[-1][0] if cache else 0 + + # Adjust start to be positive + i = max(len_iter + start, 0) + + # Adjust stop to be positive + if stop is None: + j = len_iter + elif stop >= 0: + j = min(stop, len_iter) + else: + j = max(len_iter + stop, 0) + + # Slice the cache + n = j - i + if n <= 0: + return + + for index, item in islice(cache, 0, n, step): + yield item + elif (stop is not None) and (stop < 0): + # Advance to the start position + next(islice(it, start, start), None) + + # When stop is negative, we have to carry -stop items while + # iterating + cache = deque(islice(it, -stop), maxlen=-stop) + + for index, item in enumerate(it): + cached_item = cache.popleft() + if index % step == 0: + yield cached_item + cache.append(item) + else: + # When both start and stop are positive we have the normal case + yield from islice(it, start, stop, step) + else: + start = -1 if (start is None) else start + + if (stop is not None) and (stop < 0): + # Consume all but the last items + n = -stop - 1 + cache = deque(enumerate(it, 1), maxlen=n) + len_iter = cache[-1][0] if cache else 0 + + # If start and stop are both negative they are comparable and + # we can just slice. Otherwise we can adjust start to be negative + # and then slice. + if start < 0: + i, j = start, stop + else: + i, j = min(start - len_iter, -1), None + + for index, item in list(cache)[i:j:step]: + yield item + else: + # Advance to the stop position + if stop is not None: + m = stop + 1 + next(islice(it, m, m), None) + + # stop is positive, so if start is negative they are not comparable + # and we need the rest of the items. + if start < 0: + i = start + n = None + # stop is None and start is positive, so we just need items up to + # the start index. + elif stop is None: + i = None + n = start + 1 + # Both stop and start are positive, so they are comparable. + else: + i = None + n = start - stop + if n <= 0: + return + + cache = list(islice(it, n)) + + yield from cache[i::step] + + +def always_reversible(iterable): + """An extension of :func:`reversed` that supports all iterables, not + just those which implement the ``Reversible`` or ``Sequence`` protocols. + + >>> print(*always_reversible(x for x in range(3))) + 2 1 0 + + If the iterable is already reversible, this function returns the + result of :func:`reversed()`. If the iterable is not reversible, + this function will cache the remaining items in the iterable and + yield them in reverse order, which may require significant storage. + """ + try: + return reversed(iterable) + except TypeError: + return reversed(list(iterable)) + + +def consecutive_groups(iterable, ordering=lambda x: x): + """Yield groups of consecutive items using :func:`itertools.groupby`. + The *ordering* function determines whether two items are adjacent by + returning their position. + + By default, the ordering function is the identity function. This is + suitable for finding runs of numbers: + + >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] + >>> for group in consecutive_groups(iterable): + ... print(list(group)) + [1] + [10, 11, 12] + [20] + [30, 31, 32, 33] + [40] + + For finding runs of adjacent letters, try using the :meth:`index` method + of a string of letters: + + >>> from string import ascii_lowercase + >>> iterable = 'abcdfgilmnop' + >>> ordering = ascii_lowercase.index + >>> for group in consecutive_groups(iterable, ordering): + ... print(list(group)) + ['a', 'b', 'c', 'd'] + ['f', 'g'] + ['i'] + ['l', 'm', 'n', 'o', 'p'] + + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + + """ + for k, g in groupby( + enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) + ): + yield map(itemgetter(1), g) + + +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: + + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 + >>> list(difference(iterable)) + [0, 1, 2, 3, 4] + + *func* defaults to :func:`operator.sub`, but other functions can be + specified. They will be applied as follows:: + + A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... + + For example, to do progressive division: + + >>> iterable = [1, 2, 6, 24, 120] + >>> func = lambda x, y: x // y + >>> list(difference(iterable, func)) + [1, 2, 3, 4, 5] + + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + + """ + a, b = tee(iterable) + try: + first = [next(b)] + except StopIteration: + return iter([]) + + if initial is not None: + first = [] + + return chain(first, starmap(func, zip(b, a))) + + +class SequenceView(Sequence): + """Return a read-only view of the sequence object *target*. + + :class:`SequenceView` objects are analogous to Python's built-in + "dictionary view" types. They provide a dynamic view of a sequence's items, + meaning that when the sequence updates, so does the view. + + >>> seq = ['0', '1', '2'] + >>> view = SequenceView(seq) + >>> view + SequenceView(['0', '1', '2']) + >>> seq.append('3') + >>> view + SequenceView(['0', '1', '2', '3']) + + Sequence views support indexing, slicing, and length queries. They act + like the underlying sequence, except they don't allow assignment: + + >>> view[1] + '1' + >>> view[1:-1] + ['1', '2'] + >>> len(view) + 4 + + Sequence views are useful as an alternative to copying, as they don't + require (much) extra storage. + + """ + + def __init__(self, target): + if not isinstance(target, Sequence): + raise TypeError + self._target = target + + def __getitem__(self, index): + return self._target[index] + + def __len__(self): + return len(self._target) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self._target)) + + +class seekable: + """Wrap an iterator to allow for seeking backward and forward. This + progressively caches the items in the source iterable so they can be + re-visited. + + Call :meth:`seek` with an index to seek to that position in the source + iterable. + + To "reset" an iterator, seek to ``0``: + + >>> from itertools import count + >>> it = seekable((str(n) for n in count())) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> it.seek(0) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> next(it) + '3' + + You can also seek forward: + + >>> it = seekable((str(n) for n in range(20))) + >>> it.seek(10) + >>> next(it) + '10' + >>> it.seek(20) # Seeking past the end of the source isn't a problem + >>> list(it) + [] + >>> it.seek(0) # Resetting works even after hitting the end + >>> next(it), next(it), next(it) + ('0', '1', '2') + + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False + + You may view the contents of the cache with the :meth:`elements` method. + That returns a :class:`SequenceView`, a view that updates automatically: + + >>> it = seekable((str(n) for n in range(10))) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> elements = it.elements() + >>> elements + SequenceView(['0', '1', '2']) + >>> next(it) + '3' + >>> elements + SequenceView(['0', '1', '2', '3']) + + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + + """ + + def __init__(self, iterable, maxlen=None): + self._source = iter(iterable) + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) + self._index = None + + def __iter__(self): + return self + + def __next__(self): + if self._index is not None: + try: + item = self._cache[self._index] + except IndexError: + self._index = None + else: + self._index += 1 + return item + + item = next(self._source) + self._cache.append(item) + return item + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked + + def elements(self): + return SequenceView(self._cache) + + def seek(self, index): + self._index = index + remainder = index - len(self._cache) + if remainder > 0: + consume(self, remainder) + + +class run_length: + """ + :func:`run_length.encode` compresses an iterable with run-length encoding. + It yields groups of repeated items with the count of how many times they + were repeated: + + >>> uncompressed = 'abbcccdddd' + >>> list(run_length.encode(uncompressed)) + [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + + :func:`run_length.decode` decompresses an iterable that was previously + compressed with run-length encoding. It yields the items of the + decompressed iterable: + + >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> list(run_length.decode(compressed)) + ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] + + """ + + @staticmethod + def encode(iterable): + return ((k, ilen(g)) for k, g in groupby(iterable)) + + @staticmethod + def decode(iterable): + return chain.from_iterable(repeat(k, n) for k, n in iterable) + + +def exactly_n(iterable, n, predicate=bool): + """Return ``True`` if exactly ``n`` items in the iterable are ``True`` + according to the *predicate* function. + + >>> exactly_n([True, True, False], 2) + True + >>> exactly_n([True, True, False], 1) + False + >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) + True + + The iterable will be advanced until ``n + 1`` truthy items are encountered, + so avoid calling it on infinite iterables. + + """ + return len(take(n + 1, filter(predicate, iterable))) == n + + +def circular_shifts(iterable): + """Return a list of circular shifts of *iterable*. + + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + """ + lst = list(iterable) + return take(len(lst), windowed(cycle(lst), len(lst))) + + +def make_decorator(wrapping_func, result_index=0): + """Return a decorator version of *wrapping_func*, which is a function that + modifies an iterable. *result_index* is the position in that function's + signature where the iterable goes. + + This lets you use itertools on the "production end," i.e. at function + definition. This can augment what the function returns without changing the + function's code. + + For example, to produce a decorator version of :func:`chunked`: + + >>> from more_itertools import chunked + >>> chunker = make_decorator(chunked, result_index=0) + >>> @chunker(3) + ... def iter_range(n): + ... return iter(range(n)) + ... + >>> list(iter_range(9)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + + To only allow truthy items to be returned: + + >>> truth_serum = make_decorator(filter, result_index=1) + >>> @truth_serum(bool) + ... def boolean_test(): + ... return [0, 1, '', ' ', False, True] + ... + >>> list(boolean_test()) + [1, ' ', True] + + The :func:`peekable` and :func:`seekable` wrappers make for practical + decorators: + + >>> from more_itertools import peekable + >>> peekable_function = make_decorator(peekable) + >>> @peekable_function() + ... def str_range(*args): + ... return (str(x) for x in range(*args)) + ... + >>> it = str_range(1, 20, 2) + >>> next(it), next(it), next(it) + ('1', '3', '5') + >>> it.peek() + '7' + >>> next(it) + '7' + + """ + # See https://sites.google.com/site/bbayles/index/decorator_factory for + # notes on how this works. + def decorator(*wrapping_args, **wrapping_kwargs): + def outer_wrapper(f): + def inner_wrapper(*args, **kwargs): + result = f(*args, **kwargs) + wrapping_args_ = list(wrapping_args) + wrapping_args_.insert(result_index, result) + return wrapping_func(*wrapping_args_, **wrapping_kwargs) + + return inner_wrapper + + return outer_wrapper + + return decorator + + +def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): + """Return a dictionary that maps the items in *iterable* to categories + defined by *keyfunc*, transforms them with *valuefunc*, and + then summarizes them by category with *reducefunc*. + + *valuefunc* defaults to the identity function if it is unspecified. + If *reducefunc* is unspecified, no summarization takes place: + + >>> keyfunc = lambda x: x.upper() + >>> result = map_reduce('abbccc', keyfunc) + >>> sorted(result.items()) + [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] + + Specifying *valuefunc* transforms the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> result = map_reduce('abbccc', keyfunc, valuefunc) + >>> sorted(result.items()) + [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] + + Specifying *reducefunc* summarizes the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> reducefunc = sum + >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) + >>> sorted(result.items()) + [('A', 1), ('B', 2), ('C', 3)] + + You may want to filter the input iterable before applying the map/reduce + procedure: + + >>> all_items = range(30) + >>> items = [x for x in all_items if 10 <= x <= 20] # Filter + >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 + >>> categories = map_reduce(items, keyfunc=keyfunc) + >>> sorted(categories.items()) + [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] + >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) + >>> sorted(summaries.items()) + [(0, 90), (1, 75)] + + Note that all items in the iterable are gathered into a list before the + summarization step, which may require significant storage. + + The returned object is a :obj:`collections.defaultdict` with the + ``default_factory`` set to ``None``, such that it behaves like a normal + dictionary. + + """ + valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc + + ret = defaultdict(list) + for item in iterable: + key = keyfunc(item) + value = valuefunc(item) + ret[key].append(value) + + if reducefunc is not None: + for key, value_list in ret.items(): + ret[key] = reducefunc(value_list) + + ret.default_factory = None + return ret + + +def rlocate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``, starting from the right and moving left. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4 + [4, 2, 1] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item: + + >>> iterable = iter('abcb') + >>> pred = lambda x: x == 'b' + >>> list(rlocate(iterable, pred)) + [3, 1] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(rlocate(iterable, pred=pred, window_size=3)) + [9, 5, 1] + + Beware, this function won't return anything for infinite iterables. + If *iterable* is reversible, ``rlocate`` will reverse it and search from + the right. Otherwise, it will search from the left and return the results + in reverse order. + + See :func:`locate` to for other example applications. + + """ + if window_size is None: + try: + len_iter = len(iterable) + return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) + except TypeError: + pass + + return reversed(list(locate(iterable, pred, window_size))) + + +def replace(iterable, pred, substitutes, count=None, window_size=1): + """Yield the items from *iterable*, replacing the items for which *pred* + returns ``True`` with the items from the iterable *substitutes*. + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1] + >>> pred = lambda x: x == 0 + >>> substitutes = (2, 3) + >>> list(replace(iterable, pred, substitutes)) + [1, 1, 2, 3, 1, 1, 2, 3, 1, 1] + + If *count* is given, the number of replacements will be limited: + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0] + >>> pred = lambda x: x == 0 + >>> substitutes = [None] + >>> list(replace(iterable, pred, substitutes, count=2)) + [1, 1, None, 1, 1, None, 1, 1, 0] + + Use *window_size* to control the number of items passed as arguments to + *pred*. This allows for locating and replacing subsequences. + + >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5] + >>> window_size = 3 + >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred + >>> substitutes = [3, 4] # Splice in these items + >>> list(replace(iterable, pred, substitutes, window_size=window_size)) + [3, 4, 5, 3, 4, 5] + + """ + if window_size < 1: + raise ValueError('window_size must be at least 1') + + # Save the substitutes iterable, since it's used more than once + substitutes = tuple(substitutes) + + # Add padding such that the number of windows matches the length of the + # iterable + it = chain(iterable, [_marker] * (window_size - 1)) + windows = windowed(it, window_size) + + n = 0 + for w in windows: + # If the current window matches our predicate (and we haven't hit + # our maximum number of replacements), splice in the substitutes + # and then consume the following windows that overlap with this one. + # For example, if the iterable is (0, 1, 2, 3, 4...) + # and the window size is 2, we have (0, 1), (1, 2), (2, 3)... + # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2) + if pred(*w): + if (count is None) or (n < count): + n += 1 + yield from substitutes + consume(windows, window_size - 1) + continue + + # If there was no match (or we've reached the replacement limit), + # yield the first item from the window. + if w and (w[0] is not _marker): + yield w[0] + + +def partitions(iterable): + """Yield all possible order-preserving partitions of *iterable*. + + >>> iterable = 'abc' + >>> for part in partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['a', 'b', 'c'] + + This is unrelated to :func:`partition`. + + """ + sequence = list(iterable) + n = len(sequence) + for i in powerset(range(1, n)): + yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] + + +def set_partitions(iterable, k=None): + """ + Yield the set partitions of *iterable* into *k* parts. Set partitions are + not order-preserving. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable, 2): + ... print([''.join(p) for p in part]) + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + + + If *k* is not given, every set partition is generated. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + ['a', 'b', 'c'] + + """ + L = list(iterable) + n = len(L) + if k is not None: + if k < 1: + raise ValueError( + "Can't partition in a negative or zero number of groups" + ) + elif k > n: + return + + def set_partitions_helper(L, k): + n = len(L) + if k == 1: + yield [L] + elif n == k: + yield [[s] for s in L] + else: + e, *M = L + for p in set_partitions_helper(M, k - 1): + yield [[e], *p] + for p in set_partitions_helper(M, k): + for i in range(len(p)): + yield p[:i] + [[e] + p[i]] + p[i + 1 :] + + if k is None: + for k in range(1, n + 1): + yield from set_partitions_helper(L, k) + else: + yield from set_partitions_helper(L, k) + + +class time_limited: + """ + Yield items from *iterable* until *limit_seconds* have passed. + If the time limit expires before all items have been yielded, the + ``timed_out`` parameter will be set to ``True``. + + >>> from time import sleep + >>> def generator(): + ... yield 1 + ... yield 2 + ... sleep(0.2) + ... yield 3 + >>> iterable = time_limited(0.1, generator()) + >>> list(iterable) + [1, 2] + >>> iterable.timed_out + True + + Note that the time is checked before each item is yielded, and iteration + stops if the time elapsed is greater than *limit_seconds*. If your time + limit is 1 second, but it takes 2 seconds to generate the first item from + the iterable, the function will run for 2 seconds and not yield anything. + + """ + + def __init__(self, limit_seconds, iterable): + if limit_seconds < 0: + raise ValueError('limit_seconds must be positive') + self.limit_seconds = limit_seconds + self._iterable = iter(iterable) + self._start_time = monotonic() + self.timed_out = False + + def __iter__(self): + return self + + def __next__(self): + item = next(self._iterable) + if monotonic() - self._start_time > self.limit_seconds: + self.timed_out = True + raise StopIteration + + return item + + +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def ichunked(iterable, n): + """Break *iterable* into sub-iterables with *n* elements each. + :func:`ichunked` is like :func:`chunked`, but it yields iterables + instead of lists. + + If the sub-iterables are read in order, the elements of *iterable* + won't be stored in memory. + If they are read out of order, :func:`itertools.tee` is used to cache + elements as necessary. + + >>> from itertools import count + >>> all_chunks = ichunked(count(), 4) + >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) + >>> list(c_2) # c_1's elements have been cached; c_3's haven't been + [4, 5, 6, 7] + >>> list(c_1) + [0, 1, 2, 3] + >>> list(c_3) + [8, 9, 10, 11] + + """ + source = iter(iterable) + + while True: + # Check to see whether we're at the end of the source iterable + item = next(source, _marker) + if item is _marker: + return + + # Clone the source and yield an n-length slice + source, it = tee(chain([item], source)) + yield islice(it, n) + + # Advance the source iterable + consume(source, n) + + +def distinct_combinations(iterable, r): + """Yield the distinct combinations of *r* items taken from *iterable*. + + >>> list(distinct_combinations([0, 0, 1], 2)) + [(0, 0), (0, 1)] + + Equivalent to ``set(combinations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + """ + if r < 0: + raise ValueError('r must be non-negative') + elif r == 0: + yield () + return + pool = tuple(iterable) + generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] + current_combo = [None] * r + level = 0 + while generators: + try: + cur_idx, p = next(generators[-1]) + except StopIteration: + generators.pop() + level -= 1 + continue + current_combo[level] = p + if level + 1 == r: + yield tuple(current_combo) + else: + generators.append( + unique_everseen( + enumerate(pool[cur_idx + 1 :], cur_idx + 1), + key=itemgetter(1), + ) + ) + level += 1 + + +def filter_except(validator, iterable, *exceptions): + """Yield the items from *iterable* for which the *validator* function does + not raise one of the specified *exceptions*. + + *validator* is called for each item in *iterable*. + It should be a function that accepts one argument and raises an exception + if that item is not valid. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(filter_except(int, iterable, ValueError, TypeError)) + ['1', '2', '4'] + + If an exception other than one given by *exceptions* is raised by + *validator*, it is raised like normal. + """ + for item in iterable: + try: + validator(item) + except exceptions: + pass + else: + yield item + + +def map_except(function, iterable, *exceptions): + """Transform each item from *iterable* with *function* and yield the + result, unless *function* raises one of the specified *exceptions*. + + *function* is called to transform each item in *iterable*. + It should accept one argument. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(map_except(int, iterable, ValueError, TypeError)) + [1, 2, 4] + + If an exception other than one given by *exceptions* is raised by + *function*, it is raised like normal. + """ + for item in iterable: + try: + yield function(item) + except exceptions: + pass + + +def map_if(iterable, pred, func, func_else=lambda x: x): + """Evaluate each item from *iterable* using *pred*. If the result is + equivalent to ``True``, transform the item with *func* and yield it. + Otherwise, transform the item with *func_else* and yield it. + + *pred*, *func*, and *func_else* should each be functions that accept + one argument. By default, *func_else* is the identity function. + + >>> from math import sqrt + >>> iterable = list(range(-5, 5)) + >>> iterable + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] + >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig')) + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig'] + >>> list(map_if(iterable, lambda x: x >= 0, + ... lambda x: f'{sqrt(x):.2f}', lambda x: None)) + [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00'] + """ + for item in iterable: + yield func(item) if pred(item) else func_else(item) + + +def _sample_unweighted(iterable, k): + # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: + # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". + + # Fill up the reservoir (collection of samples) with the first `k` samples + reservoir = take(k, iterable) + + # Generate random number that's the largest in a sample of k U(0,1) numbers + # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic + W = exp(log(random()) / k) + + # The number of elements to skip before changing the reservoir is a random + # number with a geometric distribution. Sample it using random() and logs. + next_index = k + floor(log(random()) / log(1 - W)) + + for index, element in enumerate(iterable, k): + + if index == next_index: + reservoir[randrange(k)] = element + # The new W is the largest in a sample of k U(0, `old_W`) numbers + W *= exp(log(random()) / k) + next_index += floor(log(random()) / log(1 - W)) + 1 + + return reservoir + + +def _sample_weighted(iterable, k, weights): + # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : + # "Weighted random sampling with a reservoir". + + # Log-transform for numerical stability for weights that are small/large + weight_keys = (log(random()) / weight for weight in weights) + + # Fill up the reservoir (collection of samples) with the first `k` + # weight-keys and elements, then heapify the list. + reservoir = take(k, zip(weight_keys, iterable)) + heapify(reservoir) + + # The number of jumps before changing the reservoir is a random variable + # with an exponential distribution. Sample it using random() and logs. + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + + for weight, element in zip(weights, iterable): + if weight >= weights_to_skip: + # The notation here is consistent with the paper, but we store + # the weight-keys in log-space for better numerical stability. + smallest_weight_key, _ = reservoir[0] + t_w = exp(weight * smallest_weight_key) + r_2 = uniform(t_w, 1) # generate U(t_w, 1) + weight_key = log(r_2) / weight + heapreplace(reservoir, (weight_key, element)) + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + else: + weights_to_skip -= weight + + # Equivalent to [element for weight_key, element in sorted(reservoir)] + return [heappop(reservoir)[1] for _ in range(k)] + + +def sample(iterable, k, weights=None): + """Return a *k*-length list of elements chosen (without replacement) + from the *iterable*. Like :func:`random.sample`, but works on iterables + of unknown length. + + >>> iterable = range(100) + >>> sample(iterable, 5) # doctest: +SKIP + [81, 60, 96, 16, 4] + + An iterable with *weights* may also be given: + + >>> iterable = range(100) + >>> weights = (i * i + 1 for i in range(100)) + >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP + [79, 67, 74, 66, 78] + + The algorithm can also be used to generate weighted random permutations. + The relative weight of each item determines the probability that it + appears late in the permutation. + + >>> data = "abcdefgh" + >>> weights = range(1, len(data) + 1) + >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP + ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] + """ + if k == 0: + return [] + + iterable = iter(iterable) + if weights is None: + return _sample_unweighted(iterable, k) + else: + weights = iter(weights) + return _sample_weighted(iterable, k, weights) + + +def is_sorted(iterable, key=None, reverse=False, strict=False): + """Returns ``True`` if the items of iterable are in sorted order, and + ``False`` otherwise. *key* and *reverse* have the same meaning that they do + in the built-in :func:`sorted` function. + + >>> is_sorted(['1', '2', '3', '4', '5'], key=int) + True + >>> is_sorted([5, 4, 3, 1, 2], reverse=True) + False + + If *strict*, tests for strict sorting, that is, returns ``False`` if equal + elements are found: + + >>> is_sorted([1, 2, 2]) + True + >>> is_sorted([1, 2, 2], strict=True) + False + + The function returns ``False`` after encountering the first out-of-order + item. If there are no out-of-order items, the iterable is exhausted. + """ + + compare = (le if reverse else ge) if strict else (lt if reverse else gt) + it = iterable if key is None else map(key, iterable) + return not any(starmap(compare, pairwise(it))) + + +class AbortThread(BaseException): + pass + + +class callback_iter: + """Convert a function that uses callbacks to an iterator. + + Let *func* be a function that takes a `callback` keyword argument. + For example: + + >>> def func(callback=None): + ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: + ... if callback: + ... callback(i, c) + ... return 4 + + + Use ``with callback_iter(func)`` to get an iterator over the parameters + that are delivered to the callback. + + >>> with callback_iter(func) as it: + ... for args, kwargs in it: + ... print(args) + (1, 'a') + (2, 'b') + (3, 'c') + + The function will be called in a background thread. The ``done`` property + indicates whether it has completed execution. + + >>> it.done + True + + If it completes successfully, its return value will be available + in the ``result`` property. + + >>> it.result + 4 + + Notes: + + * If the function uses some keyword argument besides ``callback``, supply + *callback_kwd*. + * If it finished executing, but raised an exception, accessing the + ``result`` property will raise the same exception. + * If it hasn't finished executing, accessing the ``result`` + property from within the ``with`` block will raise ``RuntimeError``. + * If it hasn't finished executing, accessing the ``result`` property from + outside the ``with`` block will raise a + ``more_itertools.AbortThread`` exception. + * Provide *wait_seconds* to adjust how frequently the it is polled for + output. + + """ + + def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): + self._func = func + self._callback_kwd = callback_kwd + self._aborted = False + self._future = None + self._wait_seconds = wait_seconds + self._executor = __import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1) + self._iterator = self._reader() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._aborted = True + self._executor.shutdown() + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterator) + + @property + def done(self): + if self._future is None: + return False + return self._future.done() + + @property + def result(self): + if not self.done: + raise RuntimeError('Function has not yet completed') + + return self._future.result() + + def _reader(self): + q = Queue() + + def callback(*args, **kwargs): + if self._aborted: + raise AbortThread('canceled by user') + + q.put((args, kwargs)) + + self._future = self._executor.submit( + self._func, **{self._callback_kwd: callback} + ) + + while True: + try: + item = q.get(timeout=self._wait_seconds) + except Empty: + pass + else: + q.task_done() + yield item + + if self._future.done(): + break + + remaining = [] + while True: + try: + item = q.get_nowait() + except Empty: + break + else: + q.task_done() + remaining.append(item) + q.join() + yield from remaining + + +def windowed_complete(iterable, n): + """ + Yield ``(beginning, middle, end)`` tuples, where: + + * Each ``middle`` has *n* items from *iterable* + * Each ``beginning`` has the items before the ones in ``middle`` + * Each ``end`` has the items after the ones in ``middle`` + + >>> iterable = range(7) + >>> n = 3 + >>> for beginning, middle, end in windowed_complete(iterable, n): + ... print(beginning, middle, end) + () (0, 1, 2) (3, 4, 5, 6) + (0,) (1, 2, 3) (4, 5, 6) + (0, 1) (2, 3, 4) (5, 6) + (0, 1, 2) (3, 4, 5) (6,) + (0, 1, 2, 3) (4, 5, 6) () + + Note that *n* must be at least 0 and most equal to the length of + *iterable*. + + This function will exhaust the iterable and may require significant + storage. + """ + if n < 0: + raise ValueError('n must be >= 0') + + seq = tuple(iterable) + size = len(seq) + + if n > size: + raise ValueError('n must be <= len(seq)') + + for i in range(size - n + 1): + beginning = seq[:i] + middle = seq[i : i + n] + end = seq[i + n :] + yield beginning, middle, end + + +def all_unique(iterable, key=None): + """ + Returns ``True`` if all the elements of *iterable* are unique (no two + elements are equal). + + >>> all_unique('ABCB') + False + + If a *key* function is specified, it will be used to make comparisons. + + >>> all_unique('ABCb') + True + >>> all_unique('ABCb', str.lower) + False + + The function returns as soon as the first non-unique element is + encountered. Iterables with a mix of hashable and unhashable items can + be used, but the function will be slower for unhashable items. + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + for element in map(key, iterable) if key else iterable: + try: + if element in seenset: + return False + seenset_add(element) + except TypeError: + if element in seenlist: + return False + seenlist_add(element) + return True + + +def nth_product(index, *args): + """Equivalent to ``list(product(*args))[index]``. + + The products of *args* can be ordered lexicographically. + :func:`nth_product` computes the product at sort position *index* without + computing the previous products. + + >>> nth_product(8, range(2), range(2), range(2), range(2)) + (1, 0, 0, 0) + + ``IndexError`` will be raised if the given *index* is invalid. + """ + pools = list(map(tuple, reversed(args))) + ns = list(map(len, pools)) + + c = reduce(mul, ns) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + result = [] + for pool, n in zip(pools, ns): + result.append(pool[index % n]) + index //= n + + return tuple(reversed(result)) + + +def nth_permutation(iterable, r, index): + """Equivalent to ``list(permutations(iterable, r))[index]``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`nth_permutation` + computes the subsequence at sort position *index* directly, without + computing the previous subsequences. + + >>> nth_permutation('ghijk', 2, 5) + ('h', 'i') + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = list(iterable) + n = len(pool) + + if r is None or r == n: + r, c = n, factorial(n) + elif not 0 <= r < n: + raise ValueError + else: + c = factorial(n) // factorial(n - r) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + if c == 0: + return tuple() + + result = [0] * r + q = index * factorial(n) // c if r < n else index + for d in range(1, n + 1): + q, i = divmod(q, d) + if 0 <= n - d < r: + result[n - d] = i + if q == 0: + break + + return tuple(map(pool.pop, result)) + + +def value_chain(*args): + """Yield all arguments passed to the function in the same order in which + they were passed. If an argument itself is iterable then iterate over its + values. + + >>> list(value_chain(1, 2, 3, [4, 5, 6])) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and are emitted + as-is: + + >>> list(value_chain('12', '34', ['56', '78'])) + ['12', '34', '56', '78'] + + + Multiple levels of nesting are not flattened. + + """ + for value in args: + if isinstance(value, (str, bytes)): + yield value + continue + try: + yield from value + except TypeError: + yield value + + +def product_index(element, *args): + """Equivalent to ``list(product(*args)).index(element)`` + + The products of *args* can be ordered lexicographically. + :func:`product_index` computes the first index of *element* without + computing the previous products. + + >>> product_index([8, 2], range(10), range(5)) + 42 + + ``ValueError`` will be raised if the given *element* isn't in the product + of *args*. + """ + index = 0 + + for x, pool in zip_longest(element, args, fillvalue=_marker): + if x is _marker or pool is _marker: + raise ValueError('element is not a product of args') + + pool = tuple(pool) + index = index * len(pool) + pool.index(x) + + return index + + +def combination_index(element, iterable): + """Equivalent to ``list(combinations(iterable, r)).index(element)`` + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`combination_index` computes the index of the + first *element*, without computing the previous combinations. + + >>> combination_index('adf', 'abcdefg') + 10 + + ``ValueError`` will be raised if the given *element* isn't one of the + combinations of *iterable*. + """ + element = enumerate(element) + k, y = next(element, (None, None)) + if k is None: + return 0 + + indexes = [] + pool = enumerate(iterable) + for n, x in pool: + if x == y: + indexes.append(n) + tmp, y = next(element, (None, None)) + if tmp is None: + break + else: + k = tmp + else: + raise ValueError('element is not a combination of iterable') + + n, _ = last(pool, default=(n, None)) + + # Python versiosn below 3.8 don't have math.comb + index = 1 + for i, j in enumerate(reversed(indexes), start=1): + j = n - j + if i <= j: + index += factorial(j) // (factorial(i) * factorial(j - i)) + + return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index + + +def permutation_index(element, iterable): + """Equivalent to ``list(permutations(iterable, r)).index(element)``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`permutation_index` + computes the index of the first *element* directly, without computing + the previous permutations. + + >>> permutation_index([1, 3, 2], range(5)) + 19 + + ``ValueError`` will be raised if the given *element* isn't one of the + permutations of *iterable*. + """ + index = 0 + pool = list(iterable) + for i, x in zip(range(len(pool), -1, -1), element): + r = pool.index(x) + index = index * i + r + del pool[r] + + return index + + +class countable: + """Wrap *iterable* and keep a count of how many items have been consumed. + + The ``items_seen`` attribute starts at ``0`` and increments as the iterable + is consumed: + + >>> iterable = map(str, range(10)) + >>> it = countable(iterable) + >>> it.items_seen + 0 + >>> next(it), next(it) + ('0', '1') + >>> list(it) + ['2', '3', '4', '5', '6', '7', '8', '9'] + >>> it.items_seen + 10 + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self.items_seen = 0 + + def __iter__(self): + return self + + def __next__(self): + item = next(self._it) + self.items_seen += 1 + + return item + + +def chunked_even(iterable, n): + """Break *iterable* into lists of approximately length *n*. + Items are distributed such the lengths of the lists differ by at most + 1 item. + + >>> iterable = [1, 2, 3, 4, 5, 6, 7] + >>> n = 3 + >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2 + [[1, 2, 3], [4, 5], [6, 7]] + >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1 + [[1, 2, 3], [4, 5, 6], [7]] + + """ + + len_method = getattr(iterable, '__len__', None) + + if len_method is None: + return _chunked_even_online(iterable, n) + else: + return _chunked_even_finite(iterable, len_method(), n) + + +def _chunked_even_online(iterable, n): + buffer = [] + maxbuf = n + (n - 2) * (n - 1) + for x in iterable: + buffer.append(x) + if len(buffer) == maxbuf: + yield buffer[:n] + buffer = buffer[n:] + yield from _chunked_even_finite(buffer, len(buffer), n) + + +def _chunked_even_finite(iterable, N, n): + if N < 1: + return + + # Lists are either size `full_size <= n` or `partial_size = full_size - 1` + q, r = divmod(N, n) + num_lists = q + (1 if r > 0 else 0) + q, r = divmod(N, num_lists) + full_size = q + (1 if r > 0 else 0) + partial_size = full_size - 1 + num_full = N - partial_size * num_lists + num_partial = num_lists - num_full + + buffer = [] + iterator = iter(iterable) + + # Yield num_full lists of full_size + for x in iterator: + buffer.append(x) + if len(buffer) == full_size: + yield buffer + buffer = [] + num_full -= 1 + if num_full <= 0: + break + + # Yield num_partial lists of partial_size + for x in iterator: + buffer.append(x) + if len(buffer) == partial_size: + yield buffer + buffer = [] + num_partial -= 1 + + +def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False): + """A version of :func:`zip` that "broadcasts" any scalar + (i.e., non-iterable) items into output tuples. + + >>> iterable_1 = [1, 2, 3] + >>> iterable_2 = ['a', 'b', 'c'] + >>> scalar = '_' + >>> list(zip_broadcast(iterable_1, iterable_2, scalar)) + [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')] + + The *scalar_types* keyword argument determines what types are considered + scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to + treat strings and byte strings as iterable: + + >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None)) + [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')] + + If the *strict* keyword argument is ``True``, then + ``UnequalIterablesError`` will be raised if any of the iterables have + different lengthss. + """ + + def is_scalar(obj): + if scalar_types and isinstance(obj, scalar_types): + return True + try: + iter(obj) + except TypeError: + return True + else: + return False + + size = len(objects) + if not size: + return + + iterables, iterable_positions = [], [] + scalars, scalar_positions = [], [] + for i, obj in enumerate(objects): + if is_scalar(obj): + scalars.append(obj) + scalar_positions.append(i) + else: + iterables.append(iter(obj)) + iterable_positions.append(i) + + if len(scalars) == size: + yield tuple(objects) + return + + zipper = _zip_equal if strict else zip + for item in zipper(*iterables): + new_item = [None] * size + + for i, elem in zip(iterable_positions, item): + new_item[i] = elem + + for i, elem in zip(scalar_positions, scalars): + new_item[i] = elem + + yield tuple(new_item) + + +def unique_in_window(iterable, n, key=None): + """Yield the items from *iterable* that haven't been seen recently. + *n* is the size of the lookback window. + + >>> iterable = [0, 1, 0, 2, 3, 0] + >>> n = 3 + >>> list(unique_in_window(iterable, n)) + [0, 1, 2, 3, 0] + + The *key* function, if provided, will be used to determine uniqueness: + + >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower())) + ['a', 'b', 'c', 'd', 'a'] + + The items in *iterable* must be hashable. + + """ + if n <= 0: + raise ValueError('n must be greater than 0') + + window = deque(maxlen=n) + uniques = set() + use_key = key is not None + + for item in iterable: + k = key(item) if use_key else item + if k in uniques: + continue + + if len(uniques) == n: + uniques.discard(window[0]) + + uniques.add(k) + window.append(k) + + yield item + + +def duplicates_everseen(iterable, key=None): + """Yield duplicate elements after their first appearance. + + >>> list(duplicates_everseen('mississippi')) + ['s', 'i', 's', 's', 'i', 'p', 'i'] + >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a'] + + This function is analagous to :func:`unique_everseen` and is subject to + the same performance considerations. + + """ + seen_set = set() + seen_list = [] + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seen_set: + seen_set.add(k) + else: + yield element + except TypeError: + if k not in seen_list: + seen_list.append(k) + else: + yield element + + +def duplicates_justseen(iterable, key=None): + """Yields serially-duplicate elements after their first appearance. + + >>> list(duplicates_justseen('mississippi')) + ['s', 's', 'p'] + >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a'] + + This function is analagous to :func:`unique_justseen`. + + """ + return flatten( + map( + lambda group_tuple: islice_extended(group_tuple[1])[1:], + groupby(iterable, key), + ) + ) + + +def minmax(iterable_or_value, *others, key=None, default=_marker): + """Returns both the smallest and largest items in an iterable + or the largest of two or more arguments. + + >>> minmax([3, 1, 5]) + (1, 5) + + >>> minmax(4, 2, 6) + (2, 6) + + If a *key* function is provided, it will be used to transform the input + items for comparison. + + >>> minmax([5, 30], key=str) # '30' sorts before '5' + (30, 5) + + If a *default* value is provided, it will be returned if there are no + input items. + + >>> minmax([], default=(0, 0)) + (0, 0) + + Otherwise ``ValueError`` is raised. + + This function is based on the + `recipe `__ by + Raymond Hettinger and takes care to minimize the number of comparisons + performed. + """ + iterable = (iterable_or_value, *others) if others else iterable_or_value + + it = iter(iterable) + + try: + lo = hi = next(it) + except StopIteration as e: + if default is _marker: + raise ValueError( + '`minmax()` argument is an empty iterable. ' + 'Provide a `default` value to suppress this error.' + ) from e + return default + + # Different branches depending on the presence of key. This saves a lot + # of unimportant copies which would slow the "key=None" branch + # significantly down. + if key is None: + for x, y in zip_longest(it, it, fillvalue=lo): + if y < x: + x, y = y, x + if x < lo: + lo = x + if hi < y: + hi = y + + else: + lo_key = hi_key = key(lo) + + for x, y in zip_longest(it, it, fillvalue=lo): + + x_key, y_key = key(x), key(y) + + if y_key < x_key: + x, y, x_key, y_key = y, x, y_key, x_key + if x_key < lo_key: + lo, lo_key = x, x_key + if hi_key < y_key: + hi, hi_key = y, y_key + + return lo, hi diff --git a/libs/common/pkg_resources/_vendor/more_itertools/recipes.py b/libs/common/pkg_resources/_vendor/more_itertools/recipes.py new file mode 100644 index 00000000..a2596423 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/more_itertools/recipes.py @@ -0,0 +1,698 @@ +"""Imported from the recipes section of the itertools documentation. + +All functions taken from the recipes section of the itertools library docs +[1]_. +Some backward-compatible usability improvements have been made. + +.. [1] http://docs.python.org/library/itertools.html#recipes + +""" +import warnings +from collections import deque +from itertools import ( + chain, + combinations, + count, + cycle, + groupby, + islice, + repeat, + starmap, + tee, + zip_longest, +) +import operator +from random import randrange, sample, choice + +__all__ = [ + 'all_equal', + 'before_and_after', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'flatten', + 'grouper', + 'iter_except', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'powerset', + 'prepend', + 'quantify', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'sliding_window', + 'tabulate', + 'tail', + 'take', + 'triplewise', + 'unique_everseen', + 'unique_justseen', +] + + +def take(n, iterable): + """Return first *n* items of the iterable as a list. + + >>> take(3, range(10)) + [0, 1, 2] + + If there are fewer than *n* items in the iterable, all of them are + returned. + + >>> take(10, range(3)) + [0, 1, 2] + + """ + return list(islice(iterable, n)) + + +def tabulate(function, start=0): + """Return an iterator over the results of ``func(start)``, + ``func(start + 1)``, ``func(start + 2)``... + + *func* should be a function that accepts one integer argument. + + If *start* is not specified it defaults to 0. It will be incremented each + time the iterator is advanced. + + >>> square = lambda x: x ** 2 + >>> iterator = tabulate(square, -3) + >>> take(4, iterator) + [9, 4, 1, 0] + + """ + return map(function, count(start)) + + +def tail(n, iterable): + """Return an iterator over the last *n* items of *iterable*. + + >>> t = tail(3, 'ABCDEFG') + >>> list(t) + ['E', 'F', 'G'] + + """ + return iter(deque(iterable, maxlen=n)) + + +def consume(iterator, n=None): + """Advance *iterable* by *n* steps. If *n* is ``None``, consume it + entirely. + + Efficiently exhausts an iterator without returning values. Defaults to + consuming the whole iterator, but an optional second argument may be + provided to limit consumption. + + >>> i = (x for x in range(10)) + >>> next(i) + 0 + >>> consume(i, 3) + >>> next(i) + 4 + >>> consume(i) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + If the iterator has fewer items remaining than the provided limit, the + whole iterator will be consumed. + + >>> i = (x for x in range(3)) + >>> consume(i, 5) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def nth(iterable, n, default=None): + """Returns the nth item or a default value. + + >>> l = range(10) + >>> nth(l, 3) + 3 + >>> nth(l, 20, "zebra") + 'zebra' + + """ + return next(islice(iterable, n, None), default) + + +def all_equal(iterable): + """ + Returns ``True`` if all the elements are equal to each other. + + >>> all_equal('aaaa') + True + >>> all_equal('aaab') + False + + """ + g = groupby(iterable) + return next(g, True) and not next(g, False) + + +def quantify(iterable, pred=bool): + """Return the how many times the predicate is true. + + >>> quantify([True, False, True]) + 2 + + """ + return sum(map(pred, iterable)) + + +def pad_none(iterable): + """Returns the sequence of elements and then returns ``None`` indefinitely. + + >>> take(5, pad_none(range(3))) + [0, 1, 2, None, None] + + Useful for emulating the behavior of the built-in :func:`map` function. + + See also :func:`padded`. + + """ + return chain(iterable, repeat(None)) + + +padnone = pad_none + + +def ncycles(iterable, n): + """Returns the sequence elements *n* times + + >>> list(ncycles(["a", "b"], 3)) + ['a', 'b', 'a', 'b', 'a', 'b'] + + """ + return chain.from_iterable(repeat(tuple(iterable), n)) + + +def dotproduct(vec1, vec2): + """Returns the dot product of the two iterables. + + >>> dotproduct([10, 10], [20, 20]) + 400 + + """ + return sum(map(operator.mul, vec1, vec2)) + + +def flatten(listOfLists): + """Return an iterator flattening one level of nesting in a list of lists. + + >>> list(flatten([[0, 1], [2, 3]])) + [0, 1, 2, 3] + + See also :func:`collapse`, which can flatten multiple levels of nesting. + + """ + return chain.from_iterable(listOfLists) + + +def repeatfunc(func, times=None, *args): + """Call *func* with *args* repeatedly, returning an iterable over the + results. + + If *times* is specified, the iterable will terminate after that many + repetitions: + + >>> from operator import add + >>> times = 4 + >>> args = 3, 5 + >>> list(repeatfunc(add, times, *args)) + [8, 8, 8, 8] + + If *times* is ``None`` the iterable will not terminate: + + >>> from random import randrange + >>> times = None + >>> args = 1, 11 + >>> take(6, repeatfunc(randrange, times, *args)) # doctest:+SKIP + [2, 4, 8, 1, 8, 4] + + """ + if times is None: + return starmap(func, repeat(args)) + return starmap(func, repeat(args, times)) + + +def _pairwise(iterable): + """Returns an iterator of paired items, overlapping, from the original + + >>> take(4, pairwise(count())) + [(0, 1), (1, 2), (2, 3), (3, 4)] + + On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`. + + """ + a, b = tee(iterable) + next(b, None) + yield from zip(a, b) + + +try: + from itertools import pairwise as itertools_pairwise +except ImportError: + pairwise = _pairwise +else: + + def pairwise(iterable): + yield from itertools_pairwise(iterable) + + pairwise.__doc__ = _pairwise.__doc__ + + +def grouper(iterable, n, fillvalue=None): + """Collect data into fixed-length chunks or blocks. + + >>> list(grouper('ABCDEFG', 3, 'x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + + """ + if isinstance(iterable, int): + warnings.warn( + "grouper expects iterable as first parameter", DeprecationWarning + ) + n, iterable = iterable, n + args = [iter(iterable)] * n + return zip_longest(fillvalue=fillvalue, *args) + + +def roundrobin(*iterables): + """Yields an item from each iterable, alternating between them. + + >>> list(roundrobin('ABC', 'D', 'EF')) + ['A', 'D', 'E', 'B', 'F', 'C'] + + This function produces the same output as :func:`interleave_longest`, but + may perform better for some inputs (in particular when the number of + iterables is small). + + """ + # Recipe credited to George Sakkis + pending = len(iterables) + nexts = cycle(iter(it).__next__ for it in iterables) + while pending: + try: + for next in nexts: + yield next() + except StopIteration: + pending -= 1 + nexts = cycle(islice(nexts, pending)) + + +def partition(pred, iterable): + """ + Returns a 2-tuple of iterables derived from the input iterable. + The first yields the items that have ``pred(item) == False``. + The second yields the items that have ``pred(item) == True``. + + >>> is_odd = lambda x: x % 2 != 0 + >>> iterable = range(10) + >>> even_items, odd_items = partition(is_odd, iterable) + >>> list(even_items), list(odd_items) + ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]) + + If *pred* is None, :func:`bool` is used. + + >>> iterable = [0, 1, False, True, '', ' '] + >>> false_items, true_items = partition(None, iterable) + >>> list(false_items), list(true_items) + ([0, False, ''], [1, True, ' ']) + + """ + if pred is None: + pred = bool + + evaluations = ((pred(x), x) for x in iterable) + t1, t2 = tee(evaluations) + return ( + (x for (cond, x) in t1 if not cond), + (x for (cond, x) in t2 if cond), + ) + + +def powerset(iterable): + """Yields all possible subsets of the iterable. + + >>> list(powerset([1, 2, 3])) + [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)] + + :func:`powerset` will operate on iterables that aren't :class:`set` + instances, so repeated elements in the input will produce repeated elements + in the output. Use :func:`unique_everseen` on the input to avoid generating + duplicates: + + >>> seq = [1, 1, 0] + >>> list(powerset(seq)) + [(), (1,), (1,), (0,), (1, 1), (1, 0), (1, 0), (1, 1, 0)] + >>> from more_itertools import unique_everseen + >>> list(powerset(unique_everseen(seq))) + [(), (1,), (0,), (1, 0)] + + """ + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def unique_everseen(iterable, key=None): + """ + Yield unique elements, preserving order. + + >>> list(unique_everseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D'] + >>> list(unique_everseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'D'] + + Sequences with a mix of hashable and unhashable items can be used. + The function will be slower (i.e., `O(n^2)`) for unhashable items. + + Remember that ``list`` objects are unhashable - you can use the *key* + parameter to transform the list to a tuple (which is hashable) to + avoid a slowdown. + + >>> iterable = ([1, 2], [2, 3], [1, 2]) + >>> list(unique_everseen(iterable)) # Slow + [[1, 2], [2, 3]] + >>> list(unique_everseen(iterable, key=tuple)) # Faster + [[1, 2], [2, 3]] + + Similary, you may want to convert unhashable ``set`` objects with + ``key=frozenset``. For ``dict`` objects, + ``key=lambda x: frozenset(x.items())`` can be used. + + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seenset: + seenset_add(k) + yield element + except TypeError: + if k not in seenlist: + seenlist_add(k) + yield element + + +def unique_justseen(iterable, key=None): + """Yields elements in order, ignoring serial duplicates + + >>> list(unique_justseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D', 'A', 'B'] + >>> list(unique_justseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'A', 'D'] + + """ + return map(next, map(operator.itemgetter(1), groupby(iterable, key))) + + +def iter_except(func, exception, first=None): + """Yields results from a function repeatedly until an exception is raised. + + Converts a call-until-exception interface to an iterator interface. + Like ``iter(func, sentinel)``, but uses an exception instead of a sentinel + to end the loop. + + >>> l = [0, 1, 2] + >>> list(iter_except(l.pop, IndexError)) + [2, 1, 0] + + Multiple exceptions can be specified as a stopping condition: + + >>> l = [1, 2, 3, '...', 4, 5, 6] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [7, 6, 5] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [4, 3, 2] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [] + + """ + try: + if first is not None: + yield first() + while 1: + yield func() + except exception: + pass + + +def first_true(iterable, default=None, pred=None): + """ + Returns the first true value in the iterable. + + If no true value is found, returns *default* + + If *pred* is not None, returns the first item for which + ``pred(item) == True`` . + + >>> first_true(range(10)) + 1 + >>> first_true(range(10), pred=lambda x: x > 5) + 6 + >>> first_true(range(10), default='missing', pred=lambda x: x > 9) + 'missing' + + """ + return next(filter(pred, iterable), default) + + +def random_product(*args, repeat=1): + """Draw an item at random from each of the input iterables. + + >>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP + ('c', 3, 'Z') + + If *repeat* is provided as a keyword argument, that many items will be + drawn from each iterable. + + >>> random_product('abcd', range(4), repeat=2) # doctest:+SKIP + ('a', 2, 'd', 3) + + This equivalent to taking a random selection from + ``itertools.product(*args, **kwarg)``. + + """ + pools = [tuple(pool) for pool in args] * repeat + return tuple(choice(pool) for pool in pools) + + +def random_permutation(iterable, r=None): + """Return a random *r* length permutation of the elements in *iterable*. + + If *r* is not specified or is ``None``, then *r* defaults to the length of + *iterable*. + + >>> random_permutation(range(5)) # doctest:+SKIP + (3, 4, 0, 1, 2) + + This equivalent to taking a random selection from + ``itertools.permutations(iterable, r)``. + + """ + pool = tuple(iterable) + r = len(pool) if r is None else r + return tuple(sample(pool, r)) + + +def random_combination(iterable, r): + """Return a random *r* length subsequence of the elements in *iterable*. + + >>> random_combination(range(5), 3) # doctest:+SKIP + (2, 3, 4) + + This equivalent to taking a random selection from + ``itertools.combinations(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(sample(range(n), r)) + return tuple(pool[i] for i in indices) + + +def random_combination_with_replacement(iterable, r): + """Return a random *r* length subsequence of elements in *iterable*, + allowing individual elements to be repeated. + + >>> random_combination_with_replacement(range(3), 5) # doctest:+SKIP + (0, 0, 1, 2, 2) + + This equivalent to taking a random selection from + ``itertools.combinations_with_replacement(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(randrange(n) for i in range(r)) + return tuple(pool[i] for i in indices) + + +def nth_combination(iterable, r, index): + """Equivalent to ``list(combinations(iterable, r))[index]``. + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`nth_combination` computes the subsequence at + sort position *index* directly, without computing the previous + subsequences. + + >>> nth_combination(range(5), 3, 5) + (0, 3, 4) + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = tuple(iterable) + n = len(pool) + if (r < 0) or (r > n): + raise ValueError + + c = 1 + k = min(r, n - r) + for i in range(1, k + 1): + c = c * (n - k + i) // i + + if index < 0: + index += c + + if (index < 0) or (index >= c): + raise IndexError + + result = [] + while r: + c, n, r = c * r // n, n - 1, r - 1 + while index >= c: + index -= c + c, n = c * (n - r) // n, n - 1 + result.append(pool[-1 - n]) + + return tuple(result) + + +def prepend(value, iterator): + """Yield *value*, followed by the elements in *iterator*. + + >>> value = '0' + >>> iterator = ['1', '2', '3'] + >>> list(prepend(value, iterator)) + ['0', '1', '2', '3'] + + To prepend multiple values, see :func:`itertools.chain` + or :func:`value_chain`. + + """ + return chain([value], iterator) + + +def convolve(signal, kernel): + """Convolve the iterable *signal* with the iterable *kernel*. + + >>> signal = (1, 2, 3, 4, 5) + >>> kernel = [3, 2, 1] + >>> list(convolve(signal, kernel)) + [3, 8, 14, 20, 26, 14, 5] + + Note: the input arguments are not interchangeable, as the *kernel* + is immediately consumed and stored. + + """ + kernel = tuple(kernel)[::-1] + n = len(kernel) + window = deque([0], maxlen=n) * n + for x in chain(signal, repeat(0, n - 1)): + window.append(x) + yield sum(map(operator.mul, kernel, window)) + + +def before_and_after(predicate, it): + """A variant of :func:`takewhile` that allows complete access to the + remainder of the iterator. + + >>> it = iter('ABCdEfGhI') + >>> all_upper, remainder = before_and_after(str.isupper, it) + >>> ''.join(all_upper) + 'ABC' + >>> ''.join(remainder) # takewhile() would lose the 'd' + 'dEfGhI' + + Note that the first iterator must be fully consumed before the second + iterator can generate valid results. + """ + it = iter(it) + transition = [] + + def true_iterator(): + for elem in it: + if predicate(elem): + yield elem + else: + transition.append(elem) + return + + def remainder_iterator(): + yield from transition + yield from it + + return true_iterator(), remainder_iterator() + + +def triplewise(iterable): + """Return overlapping triplets from *iterable*. + + >>> list(triplewise('ABCDE')) + [('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E')] + + """ + for (a, _), (b, c) in pairwise(pairwise(iterable)): + yield a, b, c + + +def sliding_window(iterable, n): + """Return a sliding window of width *n* over *iterable*. + + >>> list(sliding_window(range(6), 4)) + [(0, 1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5)] + + If *iterable* has fewer than *n* items, then nothing is yielded: + + >>> list(sliding_window(range(3), 4)) + [] + + For a variant with more features, see :func:`windowed`. + """ + it = iter(iterable) + window = deque(islice(it, n), maxlen=n) + if len(window) == n: + yield tuple(window) + for x in it: + window.append(x) + yield tuple(window) diff --git a/libs/common/pkg_resources/_vendor/packaging/__about__.py b/libs/common/pkg_resources/_vendor/packaging/__about__.py index 95d330ef..3551bc2d 100644 --- a/libs/common/pkg_resources/_vendor/packaging/__about__.py +++ b/libs/common/pkg_resources/_vendor/packaging/__about__.py @@ -1,21 +1,26 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function __all__ = [ - "__title__", "__summary__", "__uri__", "__version__", "__author__", - "__email__", "__license__", "__copyright__", + "__title__", + "__summary__", + "__uri__", + "__version__", + "__author__", + "__email__", + "__license__", + "__copyright__", ] __title__ = "packaging" __summary__ = "Core utilities for Python packages" __uri__ = "https://github.com/pypa/packaging" -__version__ = "16.8" +__version__ = "21.3" __author__ = "Donald Stufft and individual contributors" __email__ = "donald@stufft.io" -__license__ = "BSD or Apache License, Version 2.0" -__copyright__ = "Copyright 2014-2016 %s" % __author__ +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014-2019 %s" % __author__ diff --git a/libs/common/pkg_resources/_vendor/packaging/__init__.py b/libs/common/pkg_resources/_vendor/packaging/__init__.py index 5ee62202..3c50c5dc 100644 --- a/libs/common/pkg_resources/_vendor/packaging/__init__.py +++ b/libs/common/pkg_resources/_vendor/packaging/__init__.py @@ -1,14 +1,25 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function from .__about__ import ( - __author__, __copyright__, __email__, __license__, __summary__, __title__, - __uri__, __version__ + __author__, + __copyright__, + __email__, + __license__, + __summary__, + __title__, + __uri__, + __version__, ) __all__ = [ - "__title__", "__summary__", "__uri__", "__version__", "__author__", - "__email__", "__license__", "__copyright__", + "__title__", + "__summary__", + "__uri__", + "__version__", + "__author__", + "__email__", + "__license__", + "__copyright__", ] diff --git a/libs/common/pkg_resources/_vendor/packaging/_compat.py b/libs/common/pkg_resources/_vendor/packaging/_compat.py deleted file mode 100644 index 210bb80b..00000000 --- a/libs/common/pkg_resources/_vendor/packaging/_compat.py +++ /dev/null @@ -1,30 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. -from __future__ import absolute_import, division, print_function - -import sys - - -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -# flake8: noqa - -if PY3: - string_types = str, -else: - string_types = basestring, - - -def with_metaclass(meta, *bases): - """ - Create a base class with a metaclass. - """ - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): - def __new__(cls, name, this_bases, d): - return meta(name, bases, d) - return type.__new__(metaclass, 'temporary_class', (), {}) diff --git a/libs/common/pkg_resources/_vendor/packaging/_manylinux.py b/libs/common/pkg_resources/_vendor/packaging/_manylinux.py new file mode 100644 index 00000000..4c379aa6 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/packaging/_manylinux.py @@ -0,0 +1,301 @@ +import collections +import functools +import os +import re +import struct +import sys +import warnings +from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple + + +# Python does not provide platform information at sufficient granularity to +# identify the architecture of the running executable in some cases, so we +# determine it dynamically by reading the information from the running +# process. This only applies on Linux, which uses the ELF format. +class _ELFFileHeader: + # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header + class _InvalidELFFileHeader(ValueError): + """ + An invalid ELF file header was found. + """ + + ELF_MAGIC_NUMBER = 0x7F454C46 + ELFCLASS32 = 1 + ELFCLASS64 = 2 + ELFDATA2LSB = 1 + ELFDATA2MSB = 2 + EM_386 = 3 + EM_S390 = 22 + EM_ARM = 40 + EM_X86_64 = 62 + EF_ARM_ABIMASK = 0xFF000000 + EF_ARM_ABI_VER5 = 0x05000000 + EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + def __init__(self, file: IO[bytes]) -> None: + def unpack(fmt: str) -> int: + try: + data = file.read(struct.calcsize(fmt)) + result: Tuple[int, ...] = struct.unpack(fmt, data) + except struct.error: + raise _ELFFileHeader._InvalidELFFileHeader() + return result[0] + + self.e_ident_magic = unpack(">I") + if self.e_ident_magic != self.ELF_MAGIC_NUMBER: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_class = unpack("B") + if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_data = unpack("B") + if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_version = unpack("B") + self.e_ident_osabi = unpack("B") + self.e_ident_abiversion = unpack("B") + self.e_ident_pad = file.read(7) + format_h = "H" + format_i = "I" + format_q = "Q" + format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q + self.e_type = unpack(format_h) + self.e_machine = unpack(format_h) + self.e_version = unpack(format_i) + self.e_entry = unpack(format_p) + self.e_phoff = unpack(format_p) + self.e_shoff = unpack(format_p) + self.e_flags = unpack(format_i) + self.e_ehsize = unpack(format_h) + self.e_phentsize = unpack(format_h) + self.e_phnum = unpack(format_h) + self.e_shentsize = unpack(format_h) + self.e_shnum = unpack(format_h) + self.e_shstrndx = unpack(format_h) + + +def _get_elf_header() -> Optional[_ELFFileHeader]: + try: + with open(sys.executable, "rb") as f: + elf_header = _ELFFileHeader(f) + except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): + return None + return elf_header + + +def _is_linux_armhf() -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_ARM + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABIMASK + ) == elf_header.EF_ARM_ABI_VER5 + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD + ) == elf_header.EF_ARM_ABI_FLOAT_HARD + return result + + +def _is_linux_i686() -> bool: + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_386 + return result + + +def _have_compatible_abi(arch: str) -> bool: + if arch == "armv7l": + return _is_linux_armhf() + if arch == "i686": + return _is_linux_i686() + return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". + version_string = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.split() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + "Expected glibc version with 2 components major.minor," + " got: %s" % version_str, + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux # noqa + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(linux: str, arch: str) -> Iterator[str]: + if not _have_compatible_abi(arch): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if arch in {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(tag, arch, glibc_version): + yield linux.replace("linux", tag) + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(legacy_tag, arch, glibc_version): + yield linux.replace("linux", legacy_tag) diff --git a/libs/common/pkg_resources/_vendor/packaging/_musllinux.py b/libs/common/pkg_resources/_vendor/packaging/_musllinux.py new file mode 100644 index 00000000..8ac3059b --- /dev/null +++ b/libs/common/pkg_resources/_vendor/packaging/_musllinux.py @@ -0,0 +1,136 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import contextlib +import functools +import operator +import os +import re +import struct +import subprocess +import sys +from typing import IO, Iterator, NamedTuple, Optional, Tuple + + +def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, f.read(struct.calcsize(fmt))) + + +def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: + """Detect musl libc location by parsing the Python executable. + + Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca + ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html + """ + f.seek(0) + try: + ident = _read_unpacked(f, "16B") + except struct.error: + return None + if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. + return None + f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, p_fmt, p_idx = { + 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. + 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. + }[ident[4]] + except KeyError: + return None + else: + p_get = operator.itemgetter(*p_idx) + + # Find the interpreter section and return its content. + try: + _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) + except struct.error: + return None + for i in range(e_phnum + 1): + f.seek(e_phoff + e_phentsize * i) + try: + p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) + except struct.error: + return None + if p_type != 3: # Not PT_INTERP. + continue + f.seek(p_offset) + interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") + if "musl" not in interpreter: + return None + return interpreter + return None + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + with contextlib.ExitStack() as stack: + try: + f = stack.enter_context(open(executable, "rb")) + except OSError: + return None + ld = _parse_ld_musl_from_elf(f) + if not ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(arch: str) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param arch: Should be the part of platform tag after the ``linux_`` + prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a + prerequisite for the current platform to be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/libs/common/pkg_resources/_vendor/packaging/_structures.py b/libs/common/pkg_resources/_vendor/packaging/_structures.py index ccc27861..90a6465f 100644 --- a/libs/common/pkg_resources/_vendor/packaging/_structures.py +++ b/libs/common/pkg_resources/_vendor/packaging/_structures.py @@ -1,68 +1,61 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -class Infinity(object): - - def __repr__(self): +class InfinityType: + def __repr__(self) -> str: return "Infinity" - def __hash__(self): + def __hash__(self) -> int: return hash(repr(self)) - def __lt__(self, other): + def __lt__(self, other: object) -> bool: return False - def __le__(self, other): + def __le__(self, other: object) -> bool: return False - def __eq__(self, other): + def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other): - return not isinstance(other, self.__class__) - - def __gt__(self, other): + def __gt__(self, other: object) -> bool: return True - def __ge__(self, other): + def __ge__(self, other: object) -> bool: return True - def __neg__(self): + def __neg__(self: object) -> "NegativeInfinityType": return NegativeInfinity -Infinity = Infinity() + +Infinity = InfinityType() -class NegativeInfinity(object): - - def __repr__(self): +class NegativeInfinityType: + def __repr__(self) -> str: return "-Infinity" - def __hash__(self): + def __hash__(self) -> int: return hash(repr(self)) - def __lt__(self, other): + def __lt__(self, other: object) -> bool: return True - def __le__(self, other): + def __le__(self, other: object) -> bool: return True - def __eq__(self, other): + def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other): - return not isinstance(other, self.__class__) - - def __gt__(self, other): + def __gt__(self, other: object) -> bool: return False - def __ge__(self, other): + def __ge__(self, other: object) -> bool: return False - def __neg__(self): + def __neg__(self: object) -> InfinityType: return Infinity -NegativeInfinity = NegativeInfinity() + +NegativeInfinity = NegativeInfinityType() diff --git a/libs/common/pkg_resources/_vendor/packaging/markers.py b/libs/common/pkg_resources/_vendor/packaging/markers.py index 892e578e..18769b09 100644 --- a/libs/common/pkg_resources/_vendor/packaging/markers.py +++ b/libs/common/pkg_resources/_vendor/packaging/markers.py @@ -1,26 +1,37 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import operator import os import platform import sys +from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pkg_resources.extern.pyparsing import ParseException, ParseResults, stringStart, stringEnd -from pkg_resources.extern.pyparsing import ZeroOrMore, Group, Forward, QuotedString -from pkg_resources.extern.pyparsing import Literal as L # noqa - -from ._compat import string_types -from .specifiers import Specifier, InvalidSpecifier +from pkg_resources.extern.pyparsing import ( # noqa: N817 + Forward, + Group, + Literal as L, + ParseException, + ParseResults, + QuotedString, + ZeroOrMore, + stringEnd, + stringStart, +) +from .specifiers import InvalidSpecifier, Specifier __all__ = [ - "InvalidMarker", "UndefinedComparison", "UndefinedEnvironmentName", - "Marker", "default_environment", + "InvalidMarker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "Marker", + "default_environment", ] +Operator = Callable[[str, str], bool] + class InvalidMarker(ValueError): """ @@ -41,78 +52,67 @@ class UndefinedEnvironmentName(ValueError): """ -class Node(object): - - def __init__(self, value): +class Node: + def __init__(self, value: Any) -> None: self.value = value - def __str__(self): + def __str__(self) -> str: return str(self.value) - def __repr__(self): - return "<{0}({1!r})>".format(self.__class__.__name__, str(self)) + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" - def serialize(self): + def serialize(self) -> str: raise NotImplementedError class Variable(Node): - - def serialize(self): + def serialize(self) -> str: return str(self) class Value(Node): - - def serialize(self): - return '"{0}"'.format(self) + def serialize(self) -> str: + return f'"{self}"' class Op(Node): - - def serialize(self): + def serialize(self) -> str: return str(self) VARIABLE = ( - L("implementation_version") | - L("platform_python_implementation") | - L("implementation_name") | - L("python_full_version") | - L("platform_release") | - L("platform_version") | - L("platform_machine") | - L("platform_system") | - L("python_version") | - L("sys_platform") | - L("os_name") | - L("os.name") | # PEP-345 - L("sys.platform") | # PEP-345 - L("platform.version") | # PEP-345 - L("platform.machine") | # PEP-345 - L("platform.python_implementation") | # PEP-345 - L("python_implementation") | # undocumented setuptools legacy - L("extra") + L("implementation_version") + | L("platform_python_implementation") + | L("implementation_name") + | L("python_full_version") + | L("platform_release") + | L("platform_version") + | L("platform_machine") + | L("platform_system") + | L("python_version") + | L("sys_platform") + | L("os_name") + | L("os.name") # PEP-345 + | L("sys.platform") # PEP-345 + | L("platform.version") # PEP-345 + | L("platform.machine") # PEP-345 + | L("platform.python_implementation") # PEP-345 + | L("python_implementation") # undocumented setuptools legacy + | L("extra") # PEP-508 ) ALIASES = { - 'os.name': 'os_name', - 'sys.platform': 'sys_platform', - 'platform.version': 'platform_version', - 'platform.machine': 'platform_machine', - 'platform.python_implementation': 'platform_python_implementation', - 'python_implementation': 'platform_python_implementation' + "os.name": "os_name", + "sys.platform": "sys_platform", + "platform.version": "platform_version", + "platform.machine": "platform_machine", + "platform.python_implementation": "platform_python_implementation", + "python_implementation": "platform_python_implementation", } VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) VERSION_CMP = ( - L("===") | - L("==") | - L(">=") | - L("<=") | - L("!=") | - L("~=") | - L(">") | - L("<") + L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") ) MARKER_OP = VERSION_CMP | L("not in") | L("in") @@ -138,22 +138,28 @@ MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) MARKER = stringStart + MARKER_EXPR + stringEnd -def _coerce_parse_result(results): +def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: if isinstance(results, ParseResults): return [_coerce_parse_result(i) for i in results] else: return results -def _format_marker(marker, first=True): - assert isinstance(marker, (list, tuple, string_types)) +def _format_marker( + marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True +) -> str: + + assert isinstance(marker, (list, tuple, str)) # Sometimes we have a structure like [[...]] which is a single item list # where the single item is itself it's own list. In that case we want skip # the rest of this function so that we don't get extraneous () on the # outside. - if (isinstance(marker, list) and len(marker) == 1 and - isinstance(marker[0], (list, tuple))): + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): return _format_marker(marker[0]) if isinstance(marker, list): @@ -168,7 +174,7 @@ def _format_marker(marker, first=True): return marker -_operators = { +_operators: Dict[str, Operator] = { "in": lambda lhs, rhs: lhs in rhs, "not in": lambda lhs, rhs: lhs not in rhs, "<": operator.lt, @@ -180,7 +186,7 @@ _operators = { } -def _eval_op(lhs, op, rhs): +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: try: spec = Specifier("".join([op.serialize(), rhs])) except InvalidSpecifier: @@ -188,34 +194,36 @@ def _eval_op(lhs, op, rhs): else: return spec.contains(lhs) - oper = _operators.get(op.serialize()) + oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: - raise UndefinedComparison( - "Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs) - ) + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") return oper(lhs, rhs) -_undefined = object() +class Undefined: + pass -def _get_env(environment, name): - value = environment.get(name, _undefined) +_undefined = Undefined() - if value is _undefined: + +def _get_env(environment: Dict[str, str], name: str) -> str: + value: Union[str, Undefined] = environment.get(name, _undefined) + + if isinstance(value, Undefined): raise UndefinedEnvironmentName( - "{0!r} does not exist in evaluation environment.".format(name) + f"{name!r} does not exist in evaluation environment." ) return value -def _evaluate_markers(markers, environment): - groups = [[]] +def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] for marker in markers: - assert isinstance(marker, (list, tuple, string_types)) + assert isinstance(marker, (list, tuple, str)) if isinstance(marker, list): groups[-1].append(_evaluate_markers(marker, environment)) @@ -238,22 +246,17 @@ def _evaluate_markers(markers, environment): return any(all(item) for item in groups) -def format_full_version(info): - version = '{0.major}.{0.minor}.{0.micro}'.format(info) +def format_full_version(info: "sys._version_info") -> str: + version = "{0.major}.{0.minor}.{0.micro}".format(info) kind = info.releaselevel - if kind != 'final': + if kind != "final": version += kind[0] + str(info.serial) return version -def default_environment(): - if hasattr(sys, 'implementation'): - iver = format_full_version(sys.implementation.version) - implementation_name = sys.implementation.name - else: - iver = '0' - implementation_name = '' - +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name return { "implementation_name": implementation_name, "implementation_version": iver, @@ -264,28 +267,28 @@ def default_environment(): "platform_version": platform.version(), "python_full_version": platform.python_version(), "platform_python_implementation": platform.python_implementation(), - "python_version": platform.python_version()[:3], + "python_version": ".".join(platform.python_version_tuple()[:2]), "sys_platform": sys.platform, } -class Marker(object): - - def __init__(self, marker): +class Marker: + def __init__(self, marker: str) -> None: try: self._markers = _coerce_parse_result(MARKER.parseString(marker)) except ParseException as e: - err_str = "Invalid marker: {0!r}, parse error at {1!r}".format( - marker, marker[e.loc:e.loc + 8]) - raise InvalidMarker(err_str) + raise InvalidMarker( + f"Invalid marker: {marker!r}, parse error at " + f"{marker[e.loc : e.loc + 8]!r}" + ) - def __str__(self): + def __str__(self) -> str: return _format_marker(self._markers) - def __repr__(self): - return "".format(str(self)) + def __repr__(self) -> str: + return f"" - def evaluate(self, environment=None): + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. Return the boolean from evaluating the given marker against the diff --git a/libs/common/pkg_resources/_vendor/packaging/requirements.py b/libs/common/pkg_resources/_vendor/packaging/requirements.py index 0c8c4a38..6af14ec4 100644 --- a/libs/common/pkg_resources/_vendor/packaging/requirements.py +++ b/libs/common/pkg_resources/_vendor/packaging/requirements.py @@ -1,15 +1,24 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -import string import re +import string +import urllib.parse +from typing import List, Optional as TOptional, Set -from pkg_resources.extern.pyparsing import stringStart, stringEnd, originalTextFor, ParseException -from pkg_resources.extern.pyparsing import ZeroOrMore, Word, Optional, Regex, Combine -from pkg_resources.extern.pyparsing import Literal as L # noqa -from pkg_resources.extern.six.moves.urllib import parse as urlparse +from pkg_resources.extern.pyparsing import ( # noqa + Combine, + Literal as L, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + originalTextFor, + stringEnd, + stringStart, +) from .markers import MARKER_EXPR, Marker from .specifiers import LegacySpecifier, Specifier, SpecifierSet @@ -38,8 +47,8 @@ IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) NAME = IDENTIFIER("name") EXTRA = IDENTIFIER -URI = Regex(r'[^ ]+')("url") -URL = (AT + URI) +URI = Regex(r"[^ ]+")("url") +URL = AT + URI EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") @@ -48,31 +57,34 @@ VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), - joinString=",", adjacent=False)("_raw_spec") -_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '') +VERSION_MANY = Combine( + VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False +)("_raw_spec") +_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) +_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start:t._original_end]) + lambda s, l, t: Marker(s[t._original_start : t._original_end]) ) -MARKER_SEPERATOR = SEMICOLON -MARKER = MARKER_SEPERATOR + MARKER_EXPR +MARKER_SEPARATOR = SEMICOLON +MARKER = MARKER_SEPARATOR + MARKER_EXPR VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) URL_AND_MARKER = URL + Optional(MARKER) -NAMED_REQUIREMENT = \ - NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) +NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd +# pkg_resources.extern.pyparsing isn't thread safe during initialization, so we do it eagerly, see +# issue #104 +REQUIREMENT.parseString("x[]") -class Requirement(object): +class Requirement: """Parse a requirement. Parse a given requirement string into its parts, such as name, specifier, @@ -85,43 +97,50 @@ class Requirement(object): # the thing as well as the version? What about the markers? # TODO: Can we normalize the name and extra name? - def __init__(self, requirement_string): + def __init__(self, requirement_string: str) -> None: try: req = REQUIREMENT.parseString(requirement_string) except ParseException as e: raise InvalidRequirement( - "Invalid requirement, parse error at \"{0!r}\"".format( - requirement_string[e.loc:e.loc + 8])) + f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' + ) - self.name = req.name + self.name: str = req.name if req.url: - parsed_url = urlparse.urlparse(req.url) - if not (parsed_url.scheme and parsed_url.netloc) or ( - not parsed_url.scheme and not parsed_url.netloc): - raise InvalidRequirement("Invalid URL given") - self.url = req.url + parsed_url = urllib.parse.urlparse(req.url) + if parsed_url.scheme == "file": + if urllib.parse.urlunparse(parsed_url) != req.url: + raise InvalidRequirement("Invalid URL given") + elif not (parsed_url.scheme and parsed_url.netloc) or ( + not parsed_url.scheme and not parsed_url.netloc + ): + raise InvalidRequirement(f"Invalid URL: {req.url}") + self.url: TOptional[str] = req.url else: self.url = None - self.extras = set(req.extras.asList() if req.extras else []) - self.specifier = SpecifierSet(req.specifier) - self.marker = req.marker if req.marker else None + self.extras: Set[str] = set(req.extras.asList() if req.extras else []) + self.specifier: SpecifierSet = SpecifierSet(req.specifier) + self.marker: TOptional[Marker] = req.marker if req.marker else None - def __str__(self): - parts = [self.name] + def __str__(self) -> str: + parts: List[str] = [self.name] if self.extras: - parts.append("[{0}]".format(",".join(sorted(self.extras)))) + formatted_extras = ",".join(sorted(self.extras)) + parts.append(f"[{formatted_extras}]") if self.specifier: parts.append(str(self.specifier)) if self.url: - parts.append("@ {0}".format(self.url)) + parts.append(f"@ {self.url}") + if self.marker: + parts.append(" ") if self.marker: - parts.append("; {0}".format(self.marker)) + parts.append(f"; {self.marker}") return "".join(parts) - def __repr__(self): - return "".format(str(self)) + def __repr__(self) -> str: + return f"" diff --git a/libs/common/pkg_resources/_vendor/packaging/specifiers.py b/libs/common/pkg_resources/_vendor/packaging/specifiers.py index 7f5a76cf..0e218a6f 100644 --- a/libs/common/pkg_resources/_vendor/packaging/specifiers.py +++ b/libs/common/pkg_resources/_vendor/packaging/specifiers.py @@ -1,15 +1,33 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import abc import functools import itertools import re +import warnings +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Pattern, + Set, + Tuple, + TypeVar, + Union, +) -from ._compat import string_types, with_metaclass -from .version import Version, LegacyVersion, parse +from .utils import canonicalize_version +from .version import LegacyVersion, Version, parse + +ParsedVersion = Union[Version, LegacyVersion] +UnparsedVersion = Union[Version, LegacyVersion, str] +VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) +CallableOperator = Callable[[ParsedVersion, str], bool] class InvalidSpecifier(ValueError): @@ -18,57 +36,51 @@ class InvalidSpecifier(ValueError): """ -class BaseSpecifier(with_metaclass(abc.ABCMeta, object)): - +class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod - def __str__(self): + def __str__(self) -> str: """ Returns the str representation of this Specifier like object. This should be representative of the Specifier itself. """ @abc.abstractmethod - def __hash__(self): + def __hash__(self) -> int: """ Returns a hash value for this Specifier like object. """ @abc.abstractmethod - def __eq__(self, other): + def __eq__(self, other: object) -> bool: """ Returns a boolean representing whether or not the two Specifier like objects are equal. """ - @abc.abstractmethod - def __ne__(self, other): - """ - Returns a boolean representing whether or not the two Specifier like - objects are not equal. - """ - @abc.abstractproperty - def prereleases(self): + def prereleases(self) -> Optional[bool]: """ Returns whether or not pre-releases as a whole are allowed by this specifier. """ @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: """ Sets whether or not pre-releases as a whole are allowed by this specifier. """ @abc.abstractmethod - def contains(self, item, prereleases=None): + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: """ Determines if the given item is contained within this specifier. """ @abc.abstractmethod - def filter(self, iterable, prereleases=None): + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. @@ -77,14 +89,15 @@ class BaseSpecifier(with_metaclass(abc.ABCMeta, object)): class _IndividualSpecifier(BaseSpecifier): - _operators = {} + _operators: Dict[str, str] = {} + _regex: Pattern[str] - def __init__(self, spec="", prereleases=None): + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: match = self._regex.search(spec) if not match: - raise InvalidSpecifier("Invalid specifier: '{0}'".format(spec)) + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") - self._spec = ( + self._spec: Tuple[str, str] = ( match.group("operator").strip(), match.group("version").strip(), ) @@ -92,94 +105,93 @@ class _IndividualSpecifier(BaseSpecifier): # Store whether or not this Specifier should accept prereleases self._prereleases = prereleases - def __repr__(self): + def __repr__(self) -> str: pre = ( - ", prereleases={0!r}".format(self.prereleases) + f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "<{0}({1!r}{2})>".format( - self.__class__.__name__, - str(self), - pre, + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + return self._spec[0], canonicalize_version(self._spec[1]) + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" ) + return operator_callable - def __str__(self): - return "{0}{1}".format(*self._spec) - - def __hash__(self): - return hash(self._spec) - - def __eq__(self, other): - if isinstance(other, string_types): - try: - other = self.__class__(other) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._spec == other._spec - - def __ne__(self, other): - if isinstance(other, string_types): - try: - other = self.__class__(other) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._spec != other._spec - - def _get_operator(self, op): - return getattr(self, "_compare_{0}".format(self._operators[op])) - - def _coerce_version(self, version): + def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: if not isinstance(version, (LegacyVersion, Version)): version = parse(version) return version @property - def operator(self): + def operator(self) -> str: return self._spec[0] @property - def version(self): + def version(self) -> str: return self._spec[1] @property - def prereleases(self): + def prereleases(self) -> Optional[bool]: return self._prereleases @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: self._prereleases = value - def __contains__(self, item): + def __contains__(self, item: str) -> bool: return self.contains(item) - def contains(self, item, prereleases=None): + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + # Determine if prereleases are to be allowed or not. if prereleases is None: prereleases = self.prereleases # Normalize item to a Version or LegacyVersion, this allows us to have # a shortcut for ``"2.0" in Specifier(">=2") - item = self._coerce_version(item) + normalized_item = self._coerce_version(item) # Determine if we should be supporting prereleases in this specifier # or not, if we do not support prereleases than we can short circuit # logic if this version is a prereleases. - if item.is_prerelease and not prereleases: + if normalized_item.is_prerelease and not prereleases: return False # Actually do the comparison to determine if this item is contained # within this Specifier or not. - return self._get_operator(self.operator)(item, self.version) + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: - def filter(self, iterable, prereleases=None): yielded = False found_prereleases = [] @@ -192,13 +204,14 @@ class _IndividualSpecifier(BaseSpecifier): if self.contains(parsed_version, **kw): # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later incase nothing + # prereleases, then we'll store it for later in case nothing # else matches this specifier. - if (parsed_version.is_prerelease and not - (prereleases or self.prereleases)): + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): found_prereleases.append(version) # Either this is not a prerelease, or we should have been - # accepting prereleases from the begining. + # accepting prereleases from the beginning. else: yielded = True yield version @@ -213,8 +226,7 @@ class _IndividualSpecifier(BaseSpecifier): class LegacySpecifier(_IndividualSpecifier): - _regex_str = ( - r""" + _regex_str = r""" (?P(==|!=|<=|>=|<|>)) \s* (?P @@ -225,10 +237,8 @@ class LegacySpecifier(_IndividualSpecifier): # them, and a comma since it's a version separator. ) """ - ) - _regex = re.compile( - r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) _operators = { "==": "equal", @@ -239,43 +249,56 @@ class LegacySpecifier(_IndividualSpecifier): ">": "greater_than", } - def _coerce_version(self, version): + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + super().__init__(spec, prereleases) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: if not isinstance(version, LegacyVersion): version = LegacyVersion(str(version)) return version - def _compare_equal(self, prospective, spec): + def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: return prospective == self._coerce_version(spec) - def _compare_not_equal(self, prospective, spec): + def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: return prospective != self._coerce_version(spec) - def _compare_less_than_equal(self, prospective, spec): + def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: return prospective <= self._coerce_version(spec) - def _compare_greater_than_equal(self, prospective, spec): + def _compare_greater_than_equal( + self, prospective: LegacyVersion, spec: str + ) -> bool: return prospective >= self._coerce_version(spec) - def _compare_less_than(self, prospective, spec): + def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: return prospective < self._coerce_version(spec) - def _compare_greater_than(self, prospective, spec): + def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: return prospective > self._coerce_version(spec) -def _require_version_compare(fn): +def _require_version_compare( + fn: Callable[["Specifier", ParsedVersion, str], bool] +) -> Callable[["Specifier", ParsedVersion, str], bool]: @functools.wraps(fn) - def wrapped(self, prospective, spec): + def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: if not isinstance(prospective, Version): return False return fn(self, prospective, spec) + return wrapped class Specifier(_IndividualSpecifier): - _regex_str = ( - r""" + _regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) (?P (?: @@ -367,10 +390,8 @@ class Specifier(_IndividualSpecifier): ) ) """ - ) - _regex = re.compile( - r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) _operators = { "~=": "compatible", @@ -384,7 +405,8 @@ class Specifier(_IndividualSpecifier): } @_require_version_compare - def _compare_compatible(self, prospective, spec): + def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to # implement this in terms of the other specifiers instead of @@ -392,76 +414,86 @@ class Specifier(_IndividualSpecifier): # the other specifiers. # We want everything but the last item in the version, but we want to - # ignore post and dev releases and we want to treat the pre-release as - # it's own separate segment. + # ignore suffix segments. prefix = ".".join( - list( - itertools.takewhile( - lambda x: (not x.startswith("post") and not - x.startswith("dev")), - _version_split(spec), - ) - )[:-1] + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] ) # Add the prefix notation to the end of our string prefix += ".*" - return (self._get_operator(">=")(prospective, spec) and - self._get_operator("==")(prospective, prefix)) + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) @_require_version_compare - def _compare_equal(self, prospective, spec): + def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. prospective = Version(prospective.public) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(spec[:-2]) # Remove the trailing .* # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - prospective = _version_split(str(prospective)) + split_prospective = _version_split(str(prospective)) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - prospective = prospective[:len(spec)] + shortened_prospective = split_prospective[: len(split_spec)] # Pad out our two sides with zeros so that they both equal the same # length. - spec, prospective = _pad_version(spec, prospective) + padded_spec, padded_prospective = _pad_version( + split_spec, shortened_prospective + ) + + return padded_prospective == padded_spec else: # Convert our spec string into a Version - spec = Version(spec) + spec_version = Version(spec) # If the specifier does not have a local segment, then we want to # act as if the prospective version also does not have a local # segment. - if not spec.local: + if not spec_version.local: prospective = Version(prospective.public) - return prospective == spec + return prospective == spec_version @_require_version_compare - def _compare_not_equal(self, prospective, spec): + def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: return not self._compare_equal(prospective, spec) @_require_version_compare - def _compare_less_than_equal(self, prospective, spec): - return prospective <= Version(spec) + def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) @_require_version_compare - def _compare_greater_than_equal(self, prospective, spec): - return prospective >= Version(spec) + def _compare_greater_than_equal( + self, prospective: ParsedVersion, spec: str + ) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) @_require_version_compare - def _compare_less_than(self, prospective, spec): + def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + # Convert our spec to a Version instance, since we'll want to work with # it as a version. - spec = Version(spec) + spec = Version(spec_str) # Check to see if the prospective version is less than the spec # version. If it's not we can short circuit and just return False now @@ -483,10 +515,11 @@ class Specifier(_IndividualSpecifier): return True @_require_version_compare - def _compare_greater_than(self, prospective, spec): + def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + # Convert our spec to a Version instance, since we'll want to work with # it as a version. - spec = Version(spec) + spec = Version(spec_str) # Check to see if the prospective version is greater than the spec # version. If it's not we can short circuit and just return False now @@ -503,7 +536,7 @@ class Specifier(_IndividualSpecifier): return False # Ensure that we do not allow a local version of the version mentioned - # in the specifier, which is techincally greater than, to match. + # in the specifier, which is technically greater than, to match. if prospective.local is not None: if Version(prospective.base_version) == Version(spec.base_version): return False @@ -513,11 +546,12 @@ class Specifier(_IndividualSpecifier): # same version in the spec. return True - def _compare_arbitrary(self, prospective, spec): + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() @property - def prereleases(self): + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just # blindly use that. if self._prereleases is not None: @@ -541,15 +575,15 @@ class Specifier(_IndividualSpecifier): return False @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: self._prereleases = value _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") -def _version_split(version): - result = [] +def _version_split(version: str) -> List[str]: + result: List[str] = [] for item in version.split("."): match = _prefix_regex.search(item) if match: @@ -559,7 +593,13 @@ def _version_split(version): return result -def _pad_version(left, right): +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: left_split, right_split = [], [] # Get the release segment of our versions @@ -567,36 +607,29 @@ def _pad_version(left, right): right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) # Get the rest of our versions - left_split.append(left[len(left_split[0]):]) - right_split.append(right[len(right_split[0]):]) + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) # Insert our padding - left_split.insert( - 1, - ["0"] * max(0, len(right_split[0]) - len(left_split[0])), - ) - right_split.insert( - 1, - ["0"] * max(0, len(left_split[0]) - len(right_split[0])), - ) + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) - return ( - list(itertools.chain(*left_split)), - list(itertools.chain(*right_split)), - ) + return (list(itertools.chain(*left_split)), list(itertools.chain(*right_split))) class SpecifierSet(BaseSpecifier): + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: - def __init__(self, specifiers="", prereleases=None): - # Split on , to break each indidivual specifier into it's own item, and + # Split on , to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. - specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a # Specifier and falling back to a LegacySpecifier. - parsed = set() - for specifier in specifiers: + parsed: Set[_IndividualSpecifier] = set() + for specifier in split_specifiers: try: parsed.add(Specifier(specifier)) except InvalidSpecifier: @@ -609,23 +642,23 @@ class SpecifierSet(BaseSpecifier): # we accept prereleases or not. self._prereleases = prereleases - def __repr__(self): + def __repr__(self) -> str: pre = ( - ", prereleases={0!r}".format(self.prereleases) + f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "".format(str(self), pre) + return f"" - def __str__(self): + def __str__(self) -> str: return ",".join(sorted(str(s) for s in self._specs)) - def __hash__(self): + def __hash__(self) -> int: return hash(self._specs) - def __and__(self, other): - if isinstance(other, string_types): + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): return NotImplemented @@ -647,34 +680,23 @@ class SpecifierSet(BaseSpecifier): return specifier - def __eq__(self, other): - if isinstance(other, string_types): - other = SpecifierSet(other) - elif isinstance(other, _IndividualSpecifier): + def __eq__(self, other: object) -> bool: + if isinstance(other, (str, _IndividualSpecifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented return self._specs == other._specs - def __ne__(self, other): - if isinstance(other, string_types): - other = SpecifierSet(other) - elif isinstance(other, _IndividualSpecifier): - other = SpecifierSet(str(other)) - elif not isinstance(other, SpecifierSet): - return NotImplemented - - return self._specs != other._specs - - def __len__(self): + def __len__(self) -> int: return len(self._specs) - def __iter__(self): + def __iter__(self) -> Iterator[_IndividualSpecifier]: return iter(self._specs) @property - def prereleases(self): + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll # pass that through here. if self._prereleases is not None: @@ -691,13 +713,16 @@ class SpecifierSet(BaseSpecifier): return any(s.prereleases for s in self._specs) @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: self._prereleases = value - def __contains__(self, item): + def __contains__(self, item: UnparsedVersion) -> bool: return self.contains(item) - def contains(self, item, prereleases=None): + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + # Ensure that our item is a Version or LegacyVersion instance. if not isinstance(item, (LegacyVersion, Version)): item = parse(item) @@ -721,12 +746,12 @@ class SpecifierSet(BaseSpecifier): # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers # will always return True, this is an explicit design decision. - return all( - s.contains(item, prereleases=prereleases) - for s in self._specs - ) + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: - def filter(self, iterable, prereleases=None): # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -744,8 +769,11 @@ class SpecifierSet(BaseSpecifier): # which will filter out any pre-releases, unless there are no final # releases, and which will filter out LegacyVersion in general. else: - filtered = [] - found_prereleases = [] + filtered: List[VersionTypeVar] = [] + found_prereleases: List[VersionTypeVar] = [] + + item: UnparsedVersion + parsed_version: Union[Version, LegacyVersion] for item in iterable: # Ensure that we some kind of Version class for this item. diff --git a/libs/common/pkg_resources/_vendor/packaging/tags.py b/libs/common/pkg_resources/_vendor/packaging/tags.py new file mode 100644 index 00000000..9a3d25a7 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/packaging/tags.py @@ -0,0 +1,487 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import logging +import platform +import sys +import sysconfig +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from . import _manylinux, _musllinux + +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} + + +_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 + + +class Tag: + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self) -> str: + return self._interpreter + + @property + def abi(self) -> str: + return self._abi + + @property + def platform(self) -> str: + return self._platform + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) + ) + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" + + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" + + +def parse_tag(tag: str) -> FrozenSet[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_") + + +def _abi3_applies(python_version: PythonVersion) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) + + +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append(f"cp{version}") + abis.insert( + 0, + "cp{version}{debug}{pymalloc}{ucs4}".format( + version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4 + ), + ) + return abis + + +def cpython_tags( + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = f"cp{_version_nodot(python_version[:2])}" + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + if _abi3_applies(python_version): + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if _abi3_applies(python_version): + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi() -> Iterator[str]: + abi = sysconfig.get_config_var("SOABI") + if abi: + yield _normalize_string(abi) + + +def generic_tags( + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + platforms = list(platforms or platform_tags()) + abis = list(abis) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield f"py{_version_nodot((py_version[0], minor))}" + + +def compatible_tags( + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() + if version is None: + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + + +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv7l" + _, arch = linux.split("_", 1) + yield from _manylinux.platform_tags(linux, arch) + yield from _musllinux.platform_tags(arch) + yield linux + + +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) + + +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + """ + name = sys.implementation.name + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) + else: + yield from generic_tags() + + if interp_name == "pp": + yield from compatible_tags(interpreter="pp3") + else: + yield from compatible_tags() diff --git a/libs/common/pkg_resources/_vendor/packaging/utils.py b/libs/common/pkg_resources/_vendor/packaging/utils.py index 942387ce..bab11b80 100644 --- a/libs/common/pkg_resources/_vendor/packaging/utils.py +++ b/libs/common/pkg_resources/_vendor/packaging/utils.py @@ -1,14 +1,136 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import re +from typing import FrozenSet, NewType, Tuple, Union, cast + +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version + +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ _canonicalize_regex = re.compile(r"[-_.]+") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name): +def canonicalize_name(name: str) -> NormalizedName: # This is taken from PEP 503. - return _canonicalize_regex.sub("-", name).lower() + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def canonicalize_version(version: Union[Version, str]) -> str: + """ + This is very similar to Version.__str__, but has one subtle difference + with the way it handles the release segment. + """ + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version + + parts = [] + + # Epoch + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") + + # Release segment + # NB: This strips trailing '.0's to normalize + parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + + # Pre-release + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) + + # Post-release + if parsed.post is not None: + parts.append(f".post{parsed.post}") + + # Development release + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") + + # Local version segment + if parsed.local is not None: + parts.append(f"+{parsed.local}") + + return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + version = Version(parts[1]) + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + version = Version(version_part) + return (name, version) diff --git a/libs/common/pkg_resources/_vendor/packaging/version.py b/libs/common/pkg_resources/_vendor/packaging/version.py index 83b5ee8c..de9a09a4 100644 --- a/libs/common/pkg_resources/_vendor/packaging/version.py +++ b/libs/common/pkg_resources/_vendor/packaging/version.py @@ -1,27 +1,45 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import collections import itertools import re +import warnings +from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union -from ._structures import Infinity +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType +__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] -__all__ = [ - "parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN" +InfiniteTypes = Union[InfinityType, NegativeInfinityType] +PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] +SubLocalType = Union[InfiniteTypes, int, str] +LocalType = Union[ + NegativeInfinityType, + Tuple[ + Union[ + SubLocalType, + Tuple[SubLocalType, str], + Tuple[NegativeInfinityType, SubLocalType], + ], + ..., + ], +] +CmpKey = Tuple[ + int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType +] +LegacyCmpKey = Tuple[int, Tuple[str, ...]] +VersionComparisonMethod = Callable[ + [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool ] - _Version = collections.namedtuple( - "_Version", - ["epoch", "release", "dev", "pre", "post", "local"], + "_Version", ["epoch", "release", "dev", "pre", "post", "local"] ) -def parse(version): +def parse(version: str) -> Union["LegacyVersion", "Version"]: """ Parse the given version string and return either a :class:`Version` object or a :class:`LegacyVersion` object depending on if the given version is @@ -39,79 +57,126 @@ class InvalidVersion(ValueError): """ -class _BaseVersion(object): +class _BaseVersion: + _key: Union[CmpKey, LegacyCmpKey] - def __hash__(self): + def __hash__(self) -> int: return hash(self._key) - def __lt__(self, other): - return self._compare(other, lambda s, o: s < o) - - def __le__(self, other): - return self._compare(other, lambda s, o: s <= o) - - def __eq__(self, other): - return self._compare(other, lambda s, o: s == o) - - def __ge__(self, other): - return self._compare(other, lambda s, o: s >= o) - - def __gt__(self, other): - return self._compare(other, lambda s, o: s > o) - - def __ne__(self, other): - return self._compare(other, lambda s, o: s != o) - - def _compare(self, other, method): + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: if not isinstance(other, _BaseVersion): return NotImplemented - return method(self._key, other._key) + return self._key < other._key + + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key class LegacyVersion(_BaseVersion): - - def __init__(self, version): + def __init__(self, version: str) -> None: self._version = str(version) self._key = _legacy_cmpkey(self._version) - def __str__(self): + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def __str__(self) -> str: return self._version - def __repr__(self): - return "".format(repr(str(self))) + def __repr__(self) -> str: + return f"" @property - def public(self): + def public(self) -> str: return self._version @property - def base_version(self): + def base_version(self) -> str: return self._version @property - def local(self): + def epoch(self) -> int: + return -1 + + @property + def release(self) -> None: return None @property - def is_prerelease(self): + def pre(self) -> None: + return None + + @property + def post(self) -> None: + return None + + @property + def dev(self) -> None: + return None + + @property + def local(self) -> None: + return None + + @property + def is_prerelease(self) -> bool: return False @property - def is_postrelease(self): + def is_postrelease(self) -> bool: + return False + + @property + def is_devrelease(self) -> bool: return False -_legacy_version_component_re = re.compile( - r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE, -) +_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) _legacy_version_replacement_map = { - "pre": "c", "preview": "c", "-": "final-", "rc": "c", "dev": "@", + "pre": "c", + "preview": "c", + "-": "final-", + "rc": "c", + "dev": "@", } -def _parse_version_parts(s): +def _parse_version_parts(s: str) -> Iterator[str]: for part in _legacy_version_component_re.split(s): part = _legacy_version_replacement_map.get(part, part) @@ -128,7 +193,8 @@ def _parse_version_parts(s): yield "*final" -def _legacy_cmpkey(version): +def _legacy_cmpkey(version: str) -> LegacyCmpKey: + # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch # greater than or equal to 0. This will effectively put the LegacyVersion, # which uses the defacto standard originally implemented by setuptools, @@ -137,7 +203,7 @@ def _legacy_cmpkey(version): # This scheme is taken from pkg_resources.parse_version setuptools prior to # it's adoption of the packaging library. - parts = [] + parts: List[str] = [] for part in _parse_version_parts(version.lower()): if part.startswith("*"): # remove "-" before a prerelease tag @@ -150,9 +216,9 @@ def _legacy_cmpkey(version): parts.pop() parts.append(part) - parts = tuple(parts) - return epoch, parts + return epoch, tuple(parts) + # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse @@ -190,33 +256,24 @@ VERSION_PATTERN = r""" class Version(_BaseVersion): - _regex = re.compile( - r"^\s*" + VERSION_PATTERN + r"\s*$", - re.VERBOSE | re.IGNORECASE, - ) + _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + + def __init__(self, version: str) -> None: - def __init__(self, version): # Validate the version and parse it into pieces match = self._regex.search(version) if not match: - raise InvalidVersion("Invalid version: '{0}'".format(version)) + raise InvalidVersion(f"Invalid version: '{version}'") # Store the parsed out pieces of the version self._version = _Version( epoch=int(match.group("epoch")) if match.group("epoch") else 0, release=tuple(int(i) for i in match.group("release").split(".")), - pre=_parse_letter_version( - match.group("pre_l"), - match.group("pre_n"), - ), + pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")), post=_parse_letter_version( - match.group("post_l"), - match.group("post_n1") or match.group("post_n2"), - ), - dev=_parse_letter_version( - match.group("dev_l"), - match.group("dev_n"), + match.group("post_l"), match.group("post_n1") or match.group("post_n2") ), + dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")), local=_parse_local_version(match.group("local")), ) @@ -230,72 +287,113 @@ class Version(_BaseVersion): self._version.local, ) - def __repr__(self): - return "".format(repr(str(self))) + def __repr__(self) -> str: + return f"" - def __str__(self): + def __str__(self) -> str: parts = [] # Epoch - if self._version.epoch != 0: - parts.append("{0}!".format(self._version.epoch)) + if self.epoch != 0: + parts.append(f"{self.epoch}!") # Release segment - parts.append(".".join(str(x) for x in self._version.release)) + parts.append(".".join(str(x) for x in self.release)) # Pre-release - if self._version.pre is not None: - parts.append("".join(str(x) for x in self._version.pre)) + if self.pre is not None: + parts.append("".join(str(x) for x in self.pre)) # Post-release - if self._version.post is not None: - parts.append(".post{0}".format(self._version.post[1])) + if self.post is not None: + parts.append(f".post{self.post}") # Development release - if self._version.dev is not None: - parts.append(".dev{0}".format(self._version.dev[1])) + if self.dev is not None: + parts.append(f".dev{self.dev}") # Local version segment - if self._version.local is not None: - parts.append( - "+{0}".format(".".join(str(x) for x in self._version.local)) - ) + if self.local is not None: + parts.append(f"+{self.local}") return "".join(parts) @property - def public(self): + def epoch(self) -> int: + _epoch: int = self._version.epoch + return _epoch + + @property + def release(self) -> Tuple[int, ...]: + _release: Tuple[int, ...] = self._version.release + return _release + + @property + def pre(self) -> Optional[Tuple[str, int]]: + _pre: Optional[Tuple[str, int]] = self._version.pre + return _pre + + @property + def post(self) -> Optional[int]: + return self._version.post[1] if self._version.post else None + + @property + def dev(self) -> Optional[int]: + return self._version.dev[1] if self._version.dev else None + + @property + def local(self) -> Optional[str]: + if self._version.local: + return ".".join(str(x) for x in self._version.local) + else: + return None + + @property + def public(self) -> str: return str(self).split("+", 1)[0] @property - def base_version(self): + def base_version(self) -> str: parts = [] # Epoch - if self._version.epoch != 0: - parts.append("{0}!".format(self._version.epoch)) + if self.epoch != 0: + parts.append(f"{self.epoch}!") # Release segment - parts.append(".".join(str(x) for x in self._version.release)) + parts.append(".".join(str(x) for x in self.release)) return "".join(parts) @property - def local(self): - version_string = str(self) - if "+" in version_string: - return version_string.split("+", 1)[1] + def is_prerelease(self) -> bool: + return self.dev is not None or self.pre is not None @property - def is_prerelease(self): - return bool(self._version.dev or self._version.pre) + def is_postrelease(self) -> bool: + return self.post is not None @property - def is_postrelease(self): - return bool(self._version.post) + def is_devrelease(self) -> bool: + return self.dev is not None + + @property + def major(self) -> int: + return self.release[0] if len(self.release) >= 1 else 0 + + @property + def minor(self) -> int: + return self.release[1] if len(self.release) >= 2 else 0 + + @property + def micro(self) -> int: + return self.release[2] if len(self.release) >= 3 else 0 -def _parse_letter_version(letter, number): +def _parse_letter_version( + letter: str, number: Union[str, bytes, SupportsInt] +) -> Optional[Tuple[str, int]]: + if letter: # We consider there to be an implicit 0 in a pre-release if there is # not a numeral associated with it. @@ -325,34 +423,40 @@ def _parse_letter_version(letter, number): return letter, int(number) - -_local_version_seperators = re.compile(r"[\._-]") + return None -def _parse_local_version(local): +_local_version_separators = re.compile(r"[\._-]") + + +def _parse_local_version(local: str) -> Optional[LocalType]: """ Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve"). """ if local is not None: return tuple( part.lower() if not part.isdigit() else int(part) - for part in _local_version_seperators.split(local) + for part in _local_version_separators.split(local) ) + return None -def _cmpkey(epoch, release, pre, post, dev, local): +def _cmpkey( + epoch: int, + release: Tuple[int, ...], + pre: Optional[Tuple[str, int]], + post: Optional[Tuple[str, int]], + dev: Optional[Tuple[str, int]], + local: Optional[Tuple[SubLocalType]], +) -> CmpKey: + # When we compare a release version, we want to compare it with all of the # trailing zeros removed. So we'll use a reverse the list, drop all the now # leading zeros until we come to something non zero, then take the rest # re-reverse it back into the correct order and make it a tuple and use # that for our sorting key. - release = tuple( - reversed(list( - itertools.dropwhile( - lambda x: x == 0, - reversed(release), - ) - )) + _release = tuple( + reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release)))) ) # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0. @@ -360,23 +464,31 @@ def _cmpkey(epoch, release, pre, post, dev, local): # if there is not a pre or a post segment. If we have one of those then # the normal sorting rules will handle this case correctly. if pre is None and post is None and dev is not None: - pre = -Infinity + _pre: PrePostDevType = NegativeInfinity # Versions without a pre-release (except as noted above) should sort after # those with one. elif pre is None: - pre = Infinity + _pre = Infinity + else: + _pre = pre # Versions without a post segment should sort before those with one. if post is None: - post = -Infinity + _post: PrePostDevType = NegativeInfinity + + else: + _post = post # Versions without a development segment should sort after those with one. if dev is None: - dev = Infinity + _dev: PrePostDevType = Infinity + + else: + _dev = dev if local is None: # Versions without a local segment should sort before those with one. - local = -Infinity + _local: LocalType = NegativeInfinity else: # Versions with a local segment need that segment parsed to implement # the sorting rules in PEP440. @@ -385,9 +497,8 @@ def _cmpkey(epoch, release, pre, post, dev, local): # - Numeric segments sort numerically # - Shorter versions sort before longer versions when the prefixes # match exactly - local = tuple( - (i, "") if isinstance(i, int) else (-Infinity, i) - for i in local + _local = tuple( + (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local ) - return epoch, release, pre, post, dev, local + return epoch, _release, _pre, _post, _dev, _local diff --git a/libs/common/pkg_resources/_vendor/pyparsing.py b/libs/common/pkg_resources/_vendor/pyparsing.py deleted file mode 100644 index 4aa30ee6..00000000 --- a/libs/common/pkg_resources/_vendor/pyparsing.py +++ /dev/null @@ -1,5742 +0,0 @@ -# module pyparsing.py -# -# Copyright (c) 2003-2018 Paul T. McGuire -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -__doc__ = \ -""" -pyparsing module - Classes and methods to define and execute parsing grammars -============================================================================= - -The pyparsing module is an alternative approach to creating and executing simple grammars, -vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module -provides a library of classes that you use to construct the grammar directly in Python. - -Here is a program to parse "Hello, World!" (or any greeting of the form -C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements -(L{'+'} operator gives L{And} expressions, strings are auto-converted to -L{Literal} expressions):: - - from pyparsing import Word, alphas - - # define grammar of a greeting - greet = Word(alphas) + "," + Word(alphas) + "!" - - hello = "Hello, World!" - print (hello, "->", greet.parseString(hello)) - -The program outputs the following:: - - Hello, World! -> ['Hello', ',', 'World', '!'] - -The Python representation of the grammar is quite readable, owing to the self-explanatory -class names, and the use of '+', '|' and '^' operators. - -The L{ParseResults} object returned from L{ParserElement.parseString} can be accessed as a nested list, a dictionary, or an -object with named attributes. - -The pyparsing module handles some of the problems that are typically vexing when writing text parsers: - - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) - - quoted strings - - embedded comments - - -Getting Started - ------------------ -Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing -classes inherit from. Use the docstrings for examples of how to: - - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes - - construct character word-group expressions using the L{Word} class - - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes - - use L{'+'}, L{'|'}, L{'^'}, and L{'&'} operators to combine simple expressions into more complex ones - - associate names with your parsed results using L{ParserElement.setResultsName} - - find some helpful expression short-cuts like L{delimitedList} and L{oneOf} - - find more useful common expressions in the L{pyparsing_common} namespace class -""" - -__version__ = "2.2.1" -__versionTime__ = "18 Sep 2018 00:49 UTC" -__author__ = "Paul McGuire " - -import string -from weakref import ref as wkref -import copy -import sys -import warnings -import re -import sre_constants -import collections -import pprint -import traceback -import types -from datetime import datetime - -try: - from _thread import RLock -except ImportError: - from threading import RLock - -try: - # Python 3 - from collections.abc import Iterable - from collections.abc import MutableMapping -except ImportError: - # Python 2.7 - from collections import Iterable - from collections import MutableMapping - -try: - from collections import OrderedDict as _OrderedDict -except ImportError: - try: - from ordereddict import OrderedDict as _OrderedDict - except ImportError: - _OrderedDict = None - -#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) - -__all__ = [ -'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', -'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', -'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', -'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', -'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', -'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', -'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', -'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', -'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', -'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', -'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', -'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', -'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', -'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', -'CloseMatch', 'tokenMap', 'pyparsing_common', -] - -system_version = tuple(sys.version_info)[:3] -PY_3 = system_version[0] == 3 -if PY_3: - _MAX_INT = sys.maxsize - basestring = str - unichr = chr - _ustr = str - - # build list of single arg builtins, that can be used as parse actions - singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] - -else: - _MAX_INT = sys.maxint - range = xrange - - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - if isinstance(obj,unicode): - return obj - - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) - - except UnicodeEncodeError: - # Else encode it - ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') - xmlcharref = Regex(r'&#\d+;') - xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) - return xmlcharref.transformString(ret) - - # build list of single arg builtins, tolerant of Python version, that can be used as parse actions - singleArgBuiltins = [] - import __builtin__ - for fname in "sum len sorted reversed list tuple set any all min max".split(): - try: - singleArgBuiltins.append(getattr(__builtin__,fname)) - except AttributeError: - continue - -_generatorType = type((y for y in range(1))) - -def _xml_escape(data): - """Escape &, <, >, ", ', etc. in a string of data.""" - - # ampersand must be replaced first - from_symbols = '&><"\'' - to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) - for from_,to_ in zip(from_symbols, to_symbols): - data = data.replace(from_, to_) - return data - -class _Constants(object): - pass - -alphas = string.ascii_uppercase + string.ascii_lowercase -nums = "0123456789" -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = chr(92) -printables = "".join(c for c in string.printable if c not in string.whitespace) - -class ParseBaseException(Exception): - """base exception class for all parsing runtime exceptions""" - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, pstr, loc=0, msg=None, elem=None ): - self.loc = loc - if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr - self.parserElement = elem - self.args = (pstr, loc, msg) - - @classmethod - def _from_exception(cls, pe): - """ - internal factory method to simplify creating one type of ParseException - from another - avoids having __init__ signature conflicts among subclasses - """ - return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) - - def __getattr__( self, aname ): - """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - if( aname == "lineno" ): - return lineno( self.loc, self.pstr ) - elif( aname in ("col", "column") ): - return col( self.loc, self.pstr ) - elif( aname == "line" ): - return line( self.loc, self.pstr ) - else: - raise AttributeError(aname) - - def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % \ - ( self.msg, self.loc, self.lineno, self.column ) - def __repr__( self ): - return _ustr(self) - def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. - """ - line_str = self.line - line_column = self.column - 1 - if markerString: - line_str = "".join((line_str[:line_column], - markerString, line_str[line_column:])) - return line_str.strip() - def __dir__(self): - return "lineno col line".split() + dir(type(self)) - -class ParseException(ParseBaseException): - """ - Exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - - Example:: - try: - Word(nums).setName("integer").parseString("ABC") - except ParseException as pe: - print(pe) - print("column: {}".format(pe.col)) - - prints:: - Expected integer (at char 0), (line:1, col:1) - column: 1 - """ - pass - -class ParseFatalException(ParseBaseException): - """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" - pass - -class ParseSyntaxException(ParseFatalException): - """just like L{ParseFatalException}, but thrown internally when an - L{ErrorStop} ('-' operator) indicates that parsing is to stop - immediately because an unbacktrackable syntax error has been found""" - pass - -#~ class ReparseException(ParseBaseException): - #~ """Experimental class - parse actions can raise this exception to cause - #~ pyparsing to reparse the input string: - #~ - with a modified input string, and/or - #~ - with a modified start location - #~ Set the values of the ReparseException in the constructor, and raise the - #~ exception in a parse action to cause pyparsing to use the new string/location. - #~ Setting the values as None causes no change to be made. - #~ """ - #~ def __init_( self, newstring, restartLoc ): - #~ self.newParseText = newstring - #~ self.reparseLoc = restartLoc - -class RecursiveGrammarException(Exception): - """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" - def __init__( self, parseElementList ): - self.parseElementTrace = parseElementList - - def __str__( self ): - return "RecursiveGrammarException: %s" % self.parseElementTrace - -class _ParseResultsWithOffset(object): - def __init__(self,p1,p2): - self.tup = (p1,p2) - def __getitem__(self,i): - return self.tup[i] - def __repr__(self): - return repr(self.tup[0]) - def setOffset(self,i): - self.tup = (self.tup[0],i) - -class ParseResults(object): - """ - Structured parse results, to provide multiple means of access to the parsed data: - - as a list (C{len(results)}) - - by list index (C{results[0], results[1]}, etc.) - - by attribute (C{results.} - see L{ParserElement.setResultsName}) - - Example:: - integer = Word(nums) - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' - + integer.setResultsName("day")) - # equivalent form: - # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - # parseString returns a ParseResults object - result = date_str.parseString("1999/12/31") - - def test(s, fn=repr): - print("%s -> %s" % (s, fn(eval(s)))) - test("list(result)") - test("result[0]") - test("result['month']") - test("result.day") - test("'month' in result") - test("'minutes' in result") - test("result.dump()", str) - prints:: - list(result) -> ['1999', '/', '12', '/', '31'] - result[0] -> '1999' - result['month'] -> '12' - result.day -> '31' - 'month' in result -> True - 'minutes' in result -> False - result.dump() -> ['1999', '/', '12', '/', '31'] - - day: 31 - - month: 12 - - year: 1999 - """ - def __new__(cls, toklist=None, name=None, asList=True, modal=True ): - if isinstance(toklist, cls): - return toklist - retobj = object.__new__(cls) - retobj.__doinit = True - return retobj - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): - if self.__doinit: - self.__doinit = False - self.__name = None - self.__parent = None - self.__accumNames = {} - self.__asList = asList - self.__modal = modal - if toklist is None: - toklist = [] - if isinstance(toklist, list): - self.__toklist = toklist[:] - elif isinstance(toklist, _generatorType): - self.__toklist = list(toklist) - else: - self.__toklist = [toklist] - self.__tokdict = dict() - - if name is not None and name: - if not modal: - self.__accumNames[name] = 0 - if isinstance(name,int): - name = _ustr(name) # will always return a str, but use _ustr for consistency - self.__name = name - if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): - if isinstance(toklist,basestring): - toklist = [ toklist ] - if asList: - if isinstance(toklist,ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(),0) - else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) - self[name].__name = name - else: - try: - self[name] = toklist[0] - except (KeyError,TypeError,IndexError): - self[name] = toklist - - def __getitem__( self, i ): - if isinstance( i, (int,slice) ): - return self.__toklist[i] - else: - if i not in self.__accumNames: - return self.__tokdict[i][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[i] ]) - - def __setitem__( self, k, v, isinstance=isinstance ): - if isinstance(v,_ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] - sub = v[0] - elif isinstance(k,(int,slice)): - self.__toklist[k] = v - sub = v - else: - self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] - sub = v - if isinstance(sub,ParseResults): - sub.__parent = wkref(self) - - def __delitem__( self, i ): - if isinstance(i,(int,slice)): - mylen = len( self.__toklist ) - del self.__toklist[i] - - # convert int to slice - if isinstance(i, int): - if i < 0: - i += mylen - i = slice(i, i+1) - # get removed indices - removed = list(range(*i.indices(mylen))) - removed.reverse() - # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): - for j in removed: - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) - else: - del self.__tokdict[i] - - def __contains__( self, k ): - return k in self.__tokdict - - def __len__( self ): return len( self.__toklist ) - def __bool__(self): return ( not not self.__toklist ) - __nonzero__ = __bool__ - def __iter__( self ): return iter( self.__toklist ) - def __reversed__( self ): return iter( self.__toklist[::-1] ) - def _iterkeys( self ): - if hasattr(self.__tokdict, "iterkeys"): - return self.__tokdict.iterkeys() - else: - return iter(self.__tokdict) - - def _itervalues( self ): - return (self[k] for k in self._iterkeys()) - - def _iteritems( self ): - return ((k, self[k]) for k in self._iterkeys()) - - if PY_3: - keys = _iterkeys - """Returns an iterator of all named result keys (Python 3.x only).""" - - values = _itervalues - """Returns an iterator of all named result values (Python 3.x only).""" - - items = _iteritems - """Returns an iterator of all named result key-value tuples (Python 3.x only).""" - - else: - iterkeys = _iterkeys - """Returns an iterator of all named result keys (Python 2.x only).""" - - itervalues = _itervalues - """Returns an iterator of all named result values (Python 2.x only).""" - - iteritems = _iteritems - """Returns an iterator of all named result key-value tuples (Python 2.x only).""" - - def keys( self ): - """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" - return list(self.iterkeys()) - - def values( self ): - """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" - return list(self.itervalues()) - - def items( self ): - """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" - return list(self.iteritems()) - - def haskeys( self ): - """Since keys() returns an iterator, this method is helpful in bypassing - code that looks for the existence of any defined results names.""" - return bool(self.__tokdict) - - def pop( self, *args, **kwargs): - """ - Removes and returns item at specified index (default=C{last}). - Supports both C{list} and C{dict} semantics for C{pop()}. If passed no - argument or an integer argument, it will use C{list} semantics - and pop tokens from the list of parsed tokens. If passed a - non-integer argument (most likely a string), it will use C{dict} - semantics and pop the corresponding value from any defined - results names. A second default return value argument is - supported, just as in C{dict.pop()}. - - Example:: - def remove_first(tokens): - tokens.pop(0) - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] - - label = Word(alphas) - patt = label("LABEL") + OneOrMore(Word(nums)) - print(patt.parseString("AAB 123 321").dump()) - - # Use pop() in a parse action to remove named result (note that corresponding value is not - # removed from list form of results) - def remove_LABEL(tokens): - tokens.pop("LABEL") - return tokens - patt.addParseAction(remove_LABEL) - print(patt.parseString("AAB 123 321").dump()) - prints:: - ['AAB', '123', '321'] - - LABEL: AAB - - ['AAB', '123', '321'] - """ - if not args: - args = [-1] - for k,v in kwargs.items(): - if k == 'default': - args = (args[0], v) - else: - raise TypeError("pop() got an unexpected keyword argument '%s'" % k) - if (isinstance(args[0], int) or - len(args) == 1 or - args[0] in self): - index = args[0] - ret = self[index] - del self[index] - return ret - else: - defaultvalue = args[1] - return defaultvalue - - def get(self, key, defaultValue=None): - """ - Returns named result matching the given key, or if there is no - such name, then returns the given C{defaultValue} or C{None} if no - C{defaultValue} is specified. - - Similar to C{dict.get()}. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString("1999/12/31") - print(result.get("year")) # -> '1999' - print(result.get("hour", "not specified")) # -> 'not specified' - print(result.get("hour")) # -> None - """ - if key in self: - return self[key] - else: - return defaultValue - - def insert( self, index, insStr ): - """ - Inserts new element at location index in the list of parsed tokens. - - Similar to C{list.insert()}. - - Example:: - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to insert the parse location in the front of the parsed results - def insert_locn(locn, tokens): - tokens.insert(0, locn) - print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] - """ - self.__toklist.insert(index, insStr) - # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) - - def append( self, item ): - """ - Add single element to end of ParseResults list of elements. - - Example:: - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to compute the sum of the parsed integers, and add it to the end - def append_sum(tokens): - tokens.append(sum(map(int, tokens))) - print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] - """ - self.__toklist.append(item) - - def extend( self, itemseq ): - """ - Add sequence of elements to end of ParseResults list of elements. - - Example:: - patt = OneOrMore(Word(alphas)) - - # use a parse action to append the reverse of the matched strings, to make a palindrome - def make_palindrome(tokens): - tokens.extend(reversed([t[::-1] for t in tokens])) - return ''.join(tokens) - print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' - """ - if isinstance(itemseq, ParseResults): - self += itemseq - else: - self.__toklist.extend(itemseq) - - def clear( self ): - """ - Clear all elements and results names. - """ - del self.__toklist[:] - self.__tokdict.clear() - - def __getattr__( self, name ): - try: - return self[name] - except KeyError: - return "" - - if name in self.__tokdict: - if name not in self.__accumNames: - return self.__tokdict[name][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[name] ]) - else: - return "" - - def __add__( self, other ): - ret = self.copy() - ret += other - return ret - - def __iadd__( self, other ): - if other.__tokdict: - offset = len(self.__toklist) - addoffset = lambda a: offset if a<0 else a+offset - otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) - for (k,vlist) in otheritems for v in vlist] - for k,v in otherdictitems: - self[k] = v - if isinstance(v[0],ParseResults): - v[0].__parent = wkref(self) - - self.__toklist += other.__toklist - self.__accumNames.update( other.__accumNames ) - return self - - def __radd__(self, other): - if isinstance(other,int) and other == 0: - # useful for merging many ParseResults using sum() builtin - return self.copy() - else: - # this may raise a TypeError - so be it - return other + self - - def __repr__( self ): - return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - - def __str__( self ): - return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' - - def _asStringList( self, sep='' ): - out = [] - for item in self.__toklist: - if out and sep: - out.append(sep) - if isinstance( item, ParseResults ): - out += item._asStringList() - else: - out.append( _ustr(item) ) - return out - - def asList( self ): - """ - Returns the parse results as a nested list of matching tokens, all converted to strings. - - Example:: - patt = OneOrMore(Word(alphas)) - result = patt.parseString("sldkj lsdkj sldkj") - # even though the result prints in string-like form, it is actually a pyparsing ParseResults - print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] - - # Use asList() to create an actual list - result_list = result.asList() - print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] - """ - return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] - - def asDict( self ): - """ - Returns the named parse results as a nested dictionary. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString('12/31/1999') - print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) - - result_dict = result.asDict() - print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} - - # even though a ParseResults supports dict-like access, sometime you just need to have a dict - import json - print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable - print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} - """ - if PY_3: - item_fn = self.items - else: - item_fn = self.iteritems - - def toItem(obj): - if isinstance(obj, ParseResults): - if obj.haskeys(): - return obj.asDict() - else: - return [toItem(v) for v in obj] - else: - return obj - - return dict((k,toItem(v)) for k,v in item_fn()) - - def copy( self ): - """ - Returns a new copy of a C{ParseResults} object. - """ - ret = ParseResults( self.__toklist ) - ret.__tokdict = self.__tokdict.copy() - ret.__parent = self.__parent - ret.__accumNames.update( self.__accumNames ) - ret.__name = self.__name - return ret - - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): - """ - (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. - """ - nl = "\n" - out = [] - namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() - for v in vlist) - nextLevelIndent = indent + " " - - # collapse out indents if formatting is not desired - if not formatted: - indent = "" - nextLevelIndent = "" - nl = "" - - selfTag = None - if doctag is not None: - selfTag = doctag - else: - if self.__name: - selfTag = self.__name - - if not selfTag: - if namedItemsOnly: - return "" - else: - selfTag = "ITEM" - - out += [ nl, indent, "<", selfTag, ">" ] - - for i,res in enumerate(self.__toklist): - if isinstance(res,ParseResults): - if i in namedItems: - out += [ res.asXML(namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - out += [ res.asXML(None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - # individual token, see if there is a name for it - resTag = None - if i in namedItems: - resTag = namedItems[i] - if not resTag: - if namedItemsOnly: - continue - else: - resTag = "ITEM" - xmlBodyText = _xml_escape(_ustr(res)) - out += [ nl, nextLevelIndent, "<", resTag, ">", - xmlBodyText, - "" ] - - out += [ nl, indent, "" ] - return "".join(out) - - def __lookup(self,sub): - for k,vlist in self.__tokdict.items(): - for v,loc in vlist: - if sub is v: - return k - return None - - def getName(self): - r""" - Returns the results name for this token expression. Useful when several - different expressions might match at a particular location. - - Example:: - integer = Word(nums) - ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") - house_number_expr = Suppress('#') + Word(nums, alphanums) - user_data = (Group(house_number_expr)("house_number") - | Group(ssn_expr)("ssn") - | Group(integer)("age")) - user_info = OneOrMore(user_data) - - result = user_info.parseString("22 111-22-3333 #221B") - for item in result: - print(item.getName(), ':', item[0]) - prints:: - age : 22 - ssn : 111-22-3333 - house_number : 221B - """ - if self.__name: - return self.__name - elif self.__parent: - par = self.__parent() - if par: - return par.__lookup(self) - else: - return None - elif (len(self) == 1 and - len(self.__tokdict) == 1 and - next(iter(self.__tokdict.values()))[0][1] in (0,-1)): - return next(iter(self.__tokdict.keys())) - else: - return None - - def dump(self, indent='', depth=0, full=True): - """ - Diagnostic method for listing out the contents of a C{ParseResults}. - Accepts an optional C{indent} argument so that this string can be embedded - in a nested display of other data. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString('12/31/1999') - print(result.dump()) - prints:: - ['12', '/', '31', '/', '1999'] - - day: 1999 - - month: 31 - - year: 12 - """ - out = [] - NL = '\n' - out.append( indent+_ustr(self.asList()) ) - if full: - if self.haskeys(): - items = sorted((str(k), v) for k,v in self.items()) - for k,v in items: - if out: - out.append(NL) - out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) - if isinstance(v,ParseResults): - if v: - out.append( v.dump(indent,depth+1) ) - else: - out.append(_ustr(v)) - else: - out.append(repr(v)) - elif any(isinstance(vv,ParseResults) for vv in self): - v = self - for i,vv in enumerate(v): - if isinstance(vv,ParseResults): - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) - else: - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) - - return "".join(out) - - def pprint(self, *args, **kwargs): - """ - Pretty-printer for parsed results as a list, using the C{pprint} module. - Accepts additional positional or keyword args as defined for the - C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) - - Example:: - ident = Word(alphas, alphanums) - num = Word(nums) - func = Forward() - term = ident | num | Group('(' + func + ')') - func <<= ident + Group(Optional(delimitedList(term))) - result = func.parseString("fna a,b,(fnb c,d,200),100") - result.pprint(width=40) - prints:: - ['fna', - ['a', - 'b', - ['(', 'fnb', ['c', 'd', '200'], ')'], - '100']] - """ - pprint.pprint(self.asList(), *args, **kwargs) - - # add support for pickle protocol - def __getstate__(self): - return ( self.__toklist, - ( self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name ) ) - - def __setstate__(self,state): - self.__toklist = state[0] - (self.__tokdict, - par, - inAccumNames, - self.__name) = state[1] - self.__accumNames = {} - self.__accumNames.update(inAccumNames) - if par is not None: - self.__parent = wkref(par) - else: - self.__parent = None - - def __getnewargs__(self): - return self.__toklist, self.__name, self.__asList, self.__modal - - def __dir__(self): - return (dir(type(self)) + list(self.keys())) - -MutableMapping.register(ParseResults) - -def col (loc,strg): - """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - s = strg - return 1 if 0} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n",0,loc) + 1 - -def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ - lastCR = strg.rfind("\n", 0, loc) - nextCR = strg.find("\n", loc) - if nextCR >= 0: - return strg[lastCR+1:nextCR] - else: - return strg[lastCR+1:] - -def _defaultStartDebugAction( instring, loc, expr ): - print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) - -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) - -def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print ("Exception raised:" + _ustr(exc)) - -def nullDebugAction(*args): - """'Do-nothing' debug action, to suppress debugging output during parsing.""" - pass - -# Only works on Python 3.x - nonlocal is toxic to Python 2 installs -#~ 'decorator to trim function calls to match the arity of the target' -#~ def _trim_arity(func, maxargs=3): - #~ if func in singleArgBuiltins: - #~ return lambda s,l,t: func(t) - #~ limit = 0 - #~ foundArity = False - #~ def wrapper(*args): - #~ nonlocal limit,foundArity - #~ while 1: - #~ try: - #~ ret = func(*args[limit:]) - #~ foundArity = True - #~ return ret - #~ except TypeError: - #~ if limit == maxargs or foundArity: - #~ raise - #~ limit += 1 - #~ continue - #~ return wrapper - -# this version is Python 2.x-3.x cross-compatible -'decorator to trim function calls to match the arity of the target' -def _trim_arity(func, maxargs=2): - if func in singleArgBuiltins: - return lambda s,l,t: func(t) - limit = [0] - foundArity = [False] - - # traceback return data structure changed in Py3.5 - normalize back to plain tuples - if system_version[:2] >= (3,5): - def extract_stack(limit=0): - # special handling for Python 3.5.0 - extra deep call stack by 1 - offset = -3 if system_version == (3,5,0) else -2 - frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] - return [frame_summary[:2]] - def extract_tb(tb, limit=0): - frames = traceback.extract_tb(tb, limit=limit) - frame_summary = frames[-1] - return [frame_summary[:2]] - else: - extract_stack = traceback.extract_stack - extract_tb = traceback.extract_tb - - # synthesize what would be returned by traceback.extract_stack at the call to - # user's parse action 'func', so that we don't incur call penalty at parse time - - LINE_DIFF = 6 - # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND - # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! - this_line = extract_stack(limit=2)[-1] - pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) - - def wrapper(*args): - while 1: - try: - ret = func(*args[limit[0]:]) - foundArity[0] = True - return ret - except TypeError: - # re-raise TypeErrors if they did not come from our arity testing - if foundArity[0]: - raise - else: - try: - tb = sys.exc_info()[-1] - if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: - raise - finally: - del tb - - if limit[0] <= maxargs: - limit[0] += 1 - continue - raise - - # copy func name to wrapper for sensible debug output - func_name = "" - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - wrapper.__name__ = func_name - - return wrapper - -class ParserElement(object): - """Abstract base level parser element class.""" - DEFAULT_WHITE_CHARS = " \n\t\r" - verbose_stacktrace = False - - @staticmethod - def setDefaultWhitespaceChars( chars ): - r""" - Overrides the default whitespace chars - - Example:: - # default whitespace chars are space, and newline - OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] - - # change to just treat newline as significant - ParserElement.setDefaultWhitespaceChars(" \t") - OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] - """ - ParserElement.DEFAULT_WHITE_CHARS = chars - - @staticmethod - def inlineLiteralsUsing(cls): - """ - Set class to be used for inclusion of string literals into a parser. - - Example:: - # default literal class used is Literal - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - - # change to Suppress - ParserElement.inlineLiteralsUsing(Suppress) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] - """ - ParserElement._literalStringClass = cls - - def __init__( self, savelist=False ): - self.parseAction = list() - self.failAction = None - #~ self.name = "" # don't define self.name, let subclasses try/except upcall - self.strRepr = None - self.resultsName = None - self.saveAsList = savelist - self.skipWhitespace = True - self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion - self.keepTabs = False - self.ignoreExprs = list() - self.debug = False - self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index - self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = ( None, None, None ) #custom debug actions - self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse - self.callDuringTry = False - - def copy( self ): - """ - Make a copy of this C{ParserElement}. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element. - - Example:: - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") - integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - - print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) - prints:: - [5120, 100, 655360, 268435456] - Equivalent form of C{expr.copy()} is just C{expr()}:: - integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - """ - cpy = copy.copy( self ) - cpy.parseAction = self.parseAction[:] - cpy.ignoreExprs = self.ignoreExprs[:] - if self.copyDefaultWhiteChars: - cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - return cpy - - def setName( self, name ): - """ - Define name for this expression, makes debugging and exception messages clearer. - - Example:: - Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) - Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) - """ - self.name = name - self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): - self.exception.msg = self.errmsg - return self - - def setResultsName( self, name, listAllMatches=False ): - """ - Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original C{ParserElement} object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - - You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - - see L{I{__call__}<__call__>}. - - Example:: - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' - + integer.setResultsName("day")) - - # equivalent form: - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - """ - newself = self.copy() - if name.endswith("*"): - name = name[:-1] - listAllMatches=True - newself.resultsName = name - newself.modalResults = not listAllMatches - return newself - - def setBreak(self,breakFlag = True): - """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set C{breakFlag} to True to enable, False to - disable. - """ - if breakFlag: - _parseMethod = self._parse - def breaker(instring, loc, doActions=True, callPreParse=True): - import pdb - pdb.set_trace() - return _parseMethod( instring, loc, doActions, callPreParse ) - breaker._originalParseMethod = _parseMethod - self._parse = breaker - else: - if hasattr(self._parse,"_originalParseMethod"): - self._parse = self._parse._originalParseMethod - return self - - def setParseAction( self, *fns, **kwargs ): - """ - Define one or more actions to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, - C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Optional keyword arguments: - - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - - Example:: - integer = Word(nums) - date_str = integer + '/' + integer + '/' + integer - - date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - # use parse action to convert to ints at parse time - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - date_str = integer + '/' + integer + '/' + integer - - # note that integer fields are now ints, not strings - date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] - """ - self.parseAction = list(map(_trim_arity, list(fns))) - self.callDuringTry = kwargs.get("callDuringTry", False) - return self - - def addParseAction( self, *fns, **kwargs ): - """ - Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}}. - - See examples in L{I{copy}}. - """ - self.parseAction += list(map(_trim_arity, list(fns))) - self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) - return self - - def addCondition(self, *fns, **kwargs): - """Add a boolean predicate function to expression's list of parse actions. See - L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, - functions passed to C{addCondition} need to return boolean success/fail of the condition. - - Optional keyword arguments: - - message = define a custom message to be used in the raised exception - - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException - - Example:: - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - year_int = integer.copy() - year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") - date_str = year_int + '/' + integer + '/' + integer - - result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) - """ - msg = kwargs.get("message", "failed user-defined condition") - exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException - for fn in fns: - def pa(s,l,t): - if not bool(_trim_arity(fn)(s,l,t)): - raise exc_type(s,l,msg) - self.parseAction.append(pa) - self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) - return self - - def setFailAction( self, fn ): - """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - C{fn(s,loc,expr,err)} where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw C{L{ParseFatalException}} - if it is desired to stop parsing immediately.""" - self.failAction = fn - return self - - def _skipIgnorables( self, instring, loc ): - exprsFound = True - while exprsFound: - exprsFound = False - for e in self.ignoreExprs: - try: - while 1: - loc,dummy = e._parse( instring, loc ) - exprsFound = True - except ParseException: - pass - return loc - - def preParse( self, instring, loc ): - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - - if self.skipWhitespace: - wt = self.whiteChars - instrlen = len(instring) - while loc < instrlen and instring[loc] in wt: - loc += 1 - - return loc - - def parseImpl( self, instring, loc, doActions=True ): - return loc, [] - - def postParse( self, instring, loc, tokenlist ): - return tokenlist - - #~ @profile - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): - debugging = ( self.debug ) #and doActions ) - - if debugging or self.failAction: - #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - try: - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException as err: - #~ print ("Exception raised:", err) - if self.debugActions[2]: - self.debugActions[2]( instring, tokensStart, self, err ) - if self.failAction: - self.failAction( instring, tokensStart, self, err ) - raise - else: - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - if self.mayIndexError or preloc >= len(instring): - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - else: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - - tokens = self.postParse( instring, loc, tokens ) - - retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) - if self.parseAction and (doActions or self.callDuringTry): - if debugging: - try: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - except ParseBaseException as err: - #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - self.debugActions[2]( instring, tokensStart, self, err ) - raise - else: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - if debugging: - #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) - - return loc, retTokens - - def tryParse( self, instring, loc ): - try: - return self._parse( instring, loc, doActions=False )[0] - except ParseFatalException: - raise ParseException( instring, loc, self.errmsg, self) - - def canParseNext(self, instring, loc): - try: - self.tryParse(instring, loc) - except (ParseException, IndexError): - return False - else: - return True - - class _UnboundedCache(object): - def __init__(self): - cache = {} - self.not_in_cache = not_in_cache = object() - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - - def clear(self): - cache.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - if _OrderedDict is not None: - class _FifoCache(object): - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - - cache = _OrderedDict() - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - while len(cache) > size: - try: - cache.popitem(False) - except KeyError: - pass - - def clear(self): - cache.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - else: - class _FifoCache(object): - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - - cache = {} - key_fifo = collections.deque([], size) - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - while len(key_fifo) > size: - cache.pop(key_fifo.popleft(), None) - key_fifo.append(key) - - def clear(self): - cache.clear() - key_fifo.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail - packrat_cache_lock = RLock() - packrat_cache_stats = [0, 0] - - # this method gets repeatedly called during backtracking with the same arguments - - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): - HIT, MISS = 0, 1 - lookup = (self, instring, loc, callPreParse, doActions) - with ParserElement.packrat_cache_lock: - cache = ParserElement.packrat_cache - value = cache.get(lookup) - if value is cache.not_in_cache: - ParserElement.packrat_cache_stats[MISS] += 1 - try: - value = self._parseNoCache(instring, loc, doActions, callPreParse) - except ParseBaseException as pe: - # cache a copy of the exception, without the traceback - cache.set(lookup, pe.__class__(*pe.args)) - raise - else: - cache.set(lookup, (value[0], value[1].copy())) - return value - else: - ParserElement.packrat_cache_stats[HIT] += 1 - if isinstance(value, Exception): - raise value - return (value[0], value[1].copy()) - - _parse = _parseNoCache - - @staticmethod - def resetCache(): - ParserElement.packrat_cache.clear() - ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) - - _packratEnabled = False - @staticmethod - def enablePackrat(cache_size_limit=128): - """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - Parameters: - - cache_size_limit - (default=C{128}) - if an integer value is provided - will limit the size of the packrat cache; if None is passed, then - the cache size will be unbounded; if 0 is passed, the cache will - be effectively disabled. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method C{ParserElement.enablePackrat()}. If - your program uses C{psyco} to "compile as you go", you must call - C{enablePackrat} before calling C{psyco.full()}. If you do not do this, - Python will crash. For best results, call C{enablePackrat()} immediately - after importing pyparsing. - - Example:: - import pyparsing - pyparsing.ParserElement.enablePackrat() - """ - if not ParserElement._packratEnabled: - ParserElement._packratEnabled = True - if cache_size_limit is None: - ParserElement.packrat_cache = ParserElement._UnboundedCache() - else: - ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) - ParserElement._parse = ParserElement._parseCache - - def parseString( self, instring, parseAll=False ): - """ - Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{L{StringEnd()}}). - - Note: C{parseString} implicitly calls C{expandtabs()} on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the C{loc} argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling C{parseWithTabs} on your grammar before calling C{parseString} - (see L{I{parseWithTabs}}) - - define your parse action using the full C{(s,loc,toks)} signature, and - reference the input string using the parse action's C{s} argument - - explictly expand the tabs in your input string before calling - C{parseString} - - Example:: - Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] - Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text - """ - ParserElement.resetCache() - if not self.streamlined: - self.streamline() - #~ self.saveAsList = True - for e in self.ignoreExprs: - e.streamline() - if not self.keepTabs: - instring = instring.expandtabs() - try: - loc, tokens = self._parse( instring, 0 ) - if parseAll: - loc = self.preParse( instring, loc ) - se = Empty() + StringEnd() - se._parse( instring, loc ) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - else: - return tokens - - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): - """ - Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - C{maxMatches} argument, to clip scanning after 'n' matches are found. If - C{overlap} is specified, then overlapping matches will be reported. - - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs. - - Example:: - source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" - print(source) - for tokens,start,end in Word(alphas).scanString(source): - print(' '*start + '^'*(end-start)) - print(' '*start + tokens[0]) - - prints:: - - sldjf123lsdjjkf345sldkjf879lkjsfd987 - ^^^^^ - sldjf - ^^^^^^^ - lsdjjkf - ^^^^^^ - sldkjf - ^^^^^^ - lkjsfd - """ - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - - if not self.keepTabs: - instring = _ustr(instring).expandtabs() - instrlen = len(instring) - loc = 0 - preparseFn = self.preParse - parseFn = self._parse - ParserElement.resetCache() - matches = 0 - try: - while loc <= instrlen and matches < maxMatches: - try: - preloc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) - except ParseException: - loc = preloc+1 - else: - if nextLoc > loc: - matches += 1 - yield tokens, preloc, nextLoc - if overlap: - nextloc = preparseFn( instring, loc ) - if nextloc > loc: - loc = nextLoc - else: - loc += 1 - else: - loc = nextLoc - else: - loc = preloc+1 - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def transformString( self, instring ): - """ - Extension to C{L{scanString}}, to modify matching text with modified tokens that may - be returned from a parse action. To use C{transformString}, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking C{transformString()} on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. C{transformString()} returns the resulting transformed string. - - Example:: - wd = Word(alphas) - wd.setParseAction(lambda toks: toks[0].title()) - - print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) - Prints:: - Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. - """ - out = [] - lastE = 0 - # force preservation of s, to minimize unwanted transformation of string, and to - # keep string locs straight between transformString and scanString - self.keepTabs = True - try: - for t,s,e in self.scanString( instring ): - out.append( instring[lastE:s] ) - if t: - if isinstance(t,ParseResults): - out += t.asList() - elif isinstance(t,list): - out += t - else: - out.append(t) - lastE = e - out.append(instring[lastE:]) - out = [o for o in out if o] - return "".join(map(_ustr,_flatten(out))) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def searchString( self, instring, maxMatches=_MAX_INT ): - """ - Another extension to C{L{scanString}}, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - C{maxMatches} argument, to clip searching after 'n' matches are found. - - Example:: - # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters - cap_word = Word(alphas.upper(), alphas.lower()) - - print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) - - # the sum() builtin can be used to merge results into a single ParseResults object - print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) - prints:: - [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] - ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] - """ - try: - return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): - """ - Generator method to split a string using the given expression as a separator. - May be called with optional C{maxsplit} argument, to limit the number of splits; - and the optional C{includeSeparators} argument (default=C{False}), if the separating - matching text should be included in the split results. - - Example:: - punc = oneOf(list(".,;:/-!?")) - print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) - prints:: - ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] - """ - splits = 0 - last = 0 - for t,s,e in self.scanString(instring, maxMatches=maxsplit): - yield instring[last:s] - if includeSeparators: - yield t[0] - last = e - yield instring[last:] - - def __add__(self, other ): - """ - Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement - converts them to L{Literal}s by default. - - Example:: - greet = Word(alphas) + "," + Word(alphas) + "!" - hello = "Hello, World!" - print (hello, "->", greet.parseString(hello)) - Prints:: - Hello, World! -> ['Hello', ',', 'World', '!'] - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, other ] ) - - def __radd__(self, other ): - """ - Implementation of + operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other + self - - def __sub__(self, other): - """ - Implementation of - operator, returns C{L{And}} with error stop - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return self + And._ErrorStop() + other - - def __rsub__(self, other ): - """ - Implementation of - operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other - self - - def __mul__(self,other): - """ - Implementation of * operator, allows use of C{expr * 3} in place of - C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer - tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples - may also include C{None} as in: - - C{expr*(n,None)} or C{expr*(n,)} is equivalent - to C{expr*n + L{ZeroOrMore}(expr)} - (read as "at least n instances of C{expr}") - - C{expr*(None,n)} is equivalent to C{expr*(0,n)} - (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} - - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} - - Note that C{expr*(None,n)} does not raise an exception if - more than n exprs exist in the input stream; that is, - C{expr*(None,n)} does not enforce a maximum number of expr - occurrences. If this behavior is desired, then write - C{expr*(None,n) + ~expr} - """ - if isinstance(other,int): - minElements, optElements = other,0 - elif isinstance(other,tuple): - other = (other + (None, None))[:2] - if other[0] is None: - other = (0, other[1]) - if isinstance(other[0],int) and other[1] is None: - if other[0] == 0: - return ZeroOrMore(self) - if other[0] == 1: - return OneOrMore(self) - else: - return self*other[0] + ZeroOrMore(self) - elif isinstance(other[0],int) and isinstance(other[1],int): - minElements, optElements = other - optElements -= minElements - else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) - else: - raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) - - if minElements < 0: - raise ValueError("cannot multiply ParserElement by negative value") - if optElements < 0: - raise ValueError("second tuple value must be greater or equal to first tuple value") - if minElements == optElements == 0: - raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - - if (optElements): - def makeOptionalList(n): - if n>1: - return Optional(self + makeOptionalList(n-1)) - else: - return Optional(self) - if minElements: - if minElements == 1: - ret = self + makeOptionalList(optElements) - else: - ret = And([self]*minElements) + makeOptionalList(optElements) - else: - ret = makeOptionalList(optElements) - else: - if minElements == 1: - ret = self - else: - ret = And([self]*minElements) - return ret - - def __rmul__(self, other): - return self.__mul__(other) - - def __or__(self, other ): - """ - Implementation of | operator - returns C{L{MatchFirst}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return MatchFirst( [ self, other ] ) - - def __ror__(self, other ): - """ - Implementation of | operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other | self - - def __xor__(self, other ): - """ - Implementation of ^ operator - returns C{L{Or}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Or( [ self, other ] ) - - def __rxor__(self, other ): - """ - Implementation of ^ operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other ^ self - - def __and__(self, other ): - """ - Implementation of & operator - returns C{L{Each}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Each( [ self, other ] ) - - def __rand__(self, other ): - """ - Implementation of & operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other & self - - def __invert__( self ): - """ - Implementation of ~ operator - returns C{L{NotAny}} - """ - return NotAny( self ) - - def __call__(self, name=None): - """ - Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. - - If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be - passed as C{True}. - - If C{name} is omitted, same as calling C{L{copy}}. - - Example:: - # these are equivalent - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - """ - if name is not None: - return self.setResultsName(name) - else: - return self.copy() - - def suppress( self ): - """ - Suppresses the output of this C{ParserElement}; useful to keep punctuation from - cluttering up returned output. - """ - return Suppress( self ) - - def leaveWhitespace( self ): - """ - Disables the skipping of whitespace before matching the characters in the - C{ParserElement}'s defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. - """ - self.skipWhitespace = False - return self - - def setWhitespaceChars( self, chars ): - """ - Overrides the default whitespace chars - """ - self.skipWhitespace = True - self.whiteChars = chars - self.copyDefaultWhiteChars = False - return self - - def parseWithTabs( self ): - """ - Overrides default behavior to expand C{}s to spaces before parsing the input string. - Must be called before C{parseString} when the input grammar contains elements that - match C{} characters. - """ - self.keepTabs = True - return self - - def ignore( self, other ): - """ - Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. - - Example:: - patt = OneOrMore(Word(alphas)) - patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] - - patt.ignore(cStyleComment) - patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] - """ - if isinstance(other, basestring): - other = Suppress(other) - - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - self.ignoreExprs.append(other) - else: - self.ignoreExprs.append( Suppress( other.copy() ) ) - return self - - def setDebugActions( self, startAction, successAction, exceptionAction ): - """ - Enable display of debugging messages while doing pattern matching. - """ - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) - self.debug = True - return self - - def setDebug( self, flag=True ): - """ - Enable display of debugging messages while doing pattern matching. - Set C{flag} to True to enable, False to disable. - - Example:: - wd = Word(alphas).setName("alphaword") - integer = Word(nums).setName("numword") - term = wd | integer - - # turn on debugging for wd - wd.setDebug() - - OneOrMore(term).parseString("abc 123 xyz 890") - - prints:: - Match alphaword at loc 0(1,1) - Matched alphaword -> ['abc'] - Match alphaword at loc 3(1,4) - Exception raised:Expected alphaword (at char 4), (line:1, col:5) - Match alphaword at loc 7(1,8) - Matched alphaword -> ['xyz'] - Match alphaword at loc 11(1,12) - Exception raised:Expected alphaword (at char 12), (line:1, col:13) - Match alphaword at loc 15(1,16) - Exception raised:Expected alphaword (at char 15), (line:1, col:16) - - The output shown is that produced by the default debug actions - custom debug actions can be - specified using L{setDebugActions}. Prior to attempting - to match the C{wd} expression, the debugging message C{"Match at loc (,)"} - is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} - message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, - which makes debugging and exception messages easier to understand - for instance, the default - name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. - """ - if flag: - self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) - else: - self.debug = False - return self - - def __str__( self ): - return self.name - - def __repr__( self ): - return _ustr(self) - - def streamline( self ): - self.streamlined = True - self.strRepr = None - return self - - def checkRecursion( self, parseElementList ): - pass - - def validate( self, validateTrace=[] ): - """ - Check defined expressions for valid structure, check for infinite recursive definitions. - """ - self.checkRecursion( [] ) - - def parseFile( self, file_or_filename, parseAll=False ): - """ - Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. - """ - try: - file_contents = file_or_filename.read() - except AttributeError: - with open(file_or_filename, "r") as f: - file_contents = f.read() - try: - return self.parseString(file_contents, parseAll) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def __eq__(self,other): - if isinstance(other, ParserElement): - return self is other or vars(self) == vars(other) - elif isinstance(other, basestring): - return self.matches(other) - else: - return super(ParserElement,self)==other - - def __ne__(self,other): - return not (self == other) - - def __hash__(self): - return hash(id(self)) - - def __req__(self,other): - return self == other - - def __rne__(self,other): - return not (self == other) - - def matches(self, testString, parseAll=True): - """ - Method for quick testing of a parser against a test string. Good for simple - inline microtests of sub expressions while building up larger parser. - - Parameters: - - testString - to test against this expression for a match - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - Example:: - expr = Word(nums) - assert expr.matches("100") - """ - try: - self.parseString(_ustr(testString), parseAll=parseAll) - return True - except ParseBaseException: - return False - - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): - """ - Execute the parse expression on a series of test strings, showing each - test, the parsed results or where the parse failed. Quick and easy way to - run a parse expression against a list of sample strings. - - Parameters: - - tests - a list of separate test strings, or a multiline string of test strings - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - comment - (default=C{'#'}) - expression for indicating embedded comments in the test - string; pass None to disable comment filtering - - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; - if False, only dump nested list - - printResults - (default=C{True}) prints test output to stdout - - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing - - Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if C{failureTests} is True), and the results contain a list of lines of each - test's output - - Example:: - number_expr = pyparsing_common.number.copy() - - result = number_expr.runTests(''' - # unsigned integer - 100 - # negative integer - -100 - # float with scientific notation - 6.02e23 - # integer with scientific notation - 1e-12 - ''') - print("Success" if result[0] else "Failed!") - - result = number_expr.runTests(''' - # stray character - 100Z - # missing leading digit before '.' - -.100 - # too many '.' - 3.14.159 - ''', failureTests=True) - print("Success" if result[0] else "Failed!") - prints:: - # unsigned integer - 100 - [100] - - # negative integer - -100 - [-100] - - # float with scientific notation - 6.02e23 - [6.02e+23] - - # integer with scientific notation - 1e-12 - [1e-12] - - Success - - # stray character - 100Z - ^ - FAIL: Expected end of text (at char 3), (line:1, col:4) - - # missing leading digit before '.' - -.100 - ^ - FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) - - # too many '.' - 3.14.159 - ^ - FAIL: Expected end of text (at char 4), (line:1, col:5) - - Success - - Each test string must be on a single line. If you want to test a string that spans multiple - lines, create a test like this:: - - expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") - - (Note that this is a raw string literal, you must include the leading 'r'.) - """ - if isinstance(tests, basestring): - tests = list(map(str.strip, tests.rstrip().splitlines())) - if isinstance(comment, basestring): - comment = Literal(comment) - allResults = [] - comments = [] - success = True - for t in tests: - if comment is not None and comment.matches(t, False) or comments and not t: - comments.append(t) - continue - if not t: - continue - out = ['\n'.join(comments), t] - comments = [] - try: - t = t.replace(r'\n','\n') - result = self.parseString(t, parseAll=parseAll) - out.append(result.dump(full=fullDump)) - success = success and not failureTests - except ParseBaseException as pe: - fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" - if '\n' in t: - out.append(line(pe.loc, t)) - out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) - else: - out.append(' '*pe.loc + '^' + fatal) - out.append("FAIL: " + str(pe)) - success = success and failureTests - result = pe - except Exception as exc: - out.append("FAIL-EXCEPTION: " + str(exc)) - success = success and failureTests - result = exc - - if printResults: - if fullDump: - out.append('') - print('\n'.join(out)) - - allResults.append((t, result)) - - return success, allResults - - -class Token(ParserElement): - """ - Abstract C{ParserElement} subclass, for defining atomic matching patterns. - """ - def __init__( self ): - super(Token,self).__init__( savelist=False ) - - -class Empty(Token): - """ - An empty token, will always match. - """ - def __init__( self ): - super(Empty,self).__init__() - self.name = "Empty" - self.mayReturnEmpty = True - self.mayIndexError = False - - -class NoMatch(Token): - """ - A token that will never match. - """ - def __init__( self ): - super(NoMatch,self).__init__() - self.name = "NoMatch" - self.mayReturnEmpty = True - self.mayIndexError = False - self.errmsg = "Unmatchable token" - - def parseImpl( self, instring, loc, doActions=True ): - raise ParseException(instring, loc, self.errmsg, self) - - -class Literal(Token): - """ - Token to exactly match a specified string. - - Example:: - Literal('blah').parseString('blah') # -> ['blah'] - Literal('blah').parseString('blahfooblah') # -> ['blah'] - Literal('blah').parseString('bla') # -> Exception: Expected "blah" - - For case-insensitive matching, use L{CaselessLiteral}. - - For keyword matching (force word break before and after the matched string), - use L{Keyword} or L{CaselessKeyword}. - """ - def __init__( self, matchString ): - super(Literal,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.__class__ = Empty - self.name = '"%s"' % _ustr(self.match) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - - # Performance tuning: this routine gets called a *lot* - # if this is a single character match string and the first character matches, - # short-circuit as quickly as possible, and avoid calling startswith - #~ @profile - def parseImpl( self, instring, loc, doActions=True ): - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) -_L = Literal -ParserElement._literalStringClass = Literal - -class Keyword(Token): - """ - Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{L{Literal}}: - - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. - - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} - Accepts two optional constructor arguments in addition to the keyword string: - - C{identChars} is a string of characters that would be valid identifier characters, - defaulting to all alphanumerics + "_" and "$" - - C{caseless} allows case-insensitive matching, default is C{False}. - - Example:: - Keyword("start").parseString("start") # -> ['start'] - Keyword("start").parseString("starting") # -> Exception - - For case-insensitive matching, use L{CaselessKeyword}. - """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" - - def __init__( self, matchString, identChars=None, caseless=False ): - super(Keyword,self).__init__() - if identChars is None: - identChars = Keyword.DEFAULT_KEYWORD_CHARS - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.name = '"%s"' % self.match - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - self.caseless = caseless - if caseless: - self.caselessmatch = matchString.upper() - identChars = identChars.upper() - self.identChars = set(identChars) - - def parseImpl( self, instring, loc, doActions=True ): - if self.caseless: - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and - (loc == 0 or instring[loc-1].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - else: - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and - (loc == 0 or instring[loc-1] not in self.identChars) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - - def copy(self): - c = super(Keyword,self).copy() - c.identChars = Keyword.DEFAULT_KEYWORD_CHARS - return c - - @staticmethod - def setDefaultKeywordChars( chars ): - """Overrides the default Keyword chars - """ - Keyword.DEFAULT_KEYWORD_CHARS = chars - -class CaselessLiteral(Literal): - """ - Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. - - Example:: - OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] - - (Contrast with example for L{CaselessKeyword}.) - """ - def __init__( self, matchString ): - super(CaselessLiteral,self).__init__( matchString.upper() ) - # Preserve the defining literal. - self.returnString = matchString - self.name = "'%s'" % self.returnString - self.errmsg = "Expected " + self.name - - def parseImpl( self, instring, loc, doActions=True ): - if instring[ loc:loc+self.matchLen ].upper() == self.match: - return loc+self.matchLen, self.returnString - raise ParseException(instring, loc, self.errmsg, self) - -class CaselessKeyword(Keyword): - """ - Caseless version of L{Keyword}. - - Example:: - OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] - - (Contrast with example for L{CaselessLiteral}.) - """ - def __init__( self, matchString, identChars=None ): - super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - - def parseImpl( self, instring, loc, doActions=True ): - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - -class CloseMatch(Token): - """ - A variation on L{Literal} which matches "close" matches, that is, - strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: - - C{match_string} - string to be matched - - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match - - The results from a successful parse will contain the matched text from the input string and the following named results: - - C{mismatches} - a list of the positions within the match_string where mismatches were found - - C{original} - the original match_string used to compare against the input string - - If C{mismatches} is an empty list, then the match was an exact match. - - Example:: - patt = CloseMatch("ATCATCGAATGGA") - patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) - patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) - - # exact match - patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) - - # close match allowing up to 2 mismatches - patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) - patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) - """ - def __init__(self, match_string, maxMismatches=1): - super(CloseMatch,self).__init__() - self.name = match_string - self.match_string = match_string - self.maxMismatches = maxMismatches - self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) - self.mayIndexError = False - self.mayReturnEmpty = False - - def parseImpl( self, instring, loc, doActions=True ): - start = loc - instrlen = len(instring) - maxloc = start + len(self.match_string) - - if maxloc <= instrlen: - match_string = self.match_string - match_stringloc = 0 - mismatches = [] - maxMismatches = self.maxMismatches - - for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): - src,mat = s_m - if src != mat: - mismatches.append(match_stringloc) - if len(mismatches) > maxMismatches: - break - else: - loc = match_stringloc + 1 - results = ParseResults([instring[start:loc]]) - results['original'] = self.match_string - results['mismatches'] = mismatches - return loc, results - - raise ParseException(instring, loc, self.errmsg, self) - - -class Word(Token): - """ - Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. An optional - C{excludeChars} parameter can list characters that might be found in - the input C{bodyChars} string; useful to define a word of all printables - except for one or two characters, for instance. - - L{srange} is useful for defining custom character set strings for defining - C{Word} expressions, using range notation from regular expression character sets. - - A common mistake is to use C{Word} to match a specific literal string, as in - C{Word("Address")}. Remember that C{Word} uses the string argument to define - I{sets} of matchable characters. This expression would match "Add", "AAA", - "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. - To match an exact literal string, use L{Literal} or L{Keyword}. - - pyparsing includes helper strings for building Words: - - L{alphas} - - L{nums} - - L{alphanums} - - L{hexnums} - - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) - - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) - - L{printables} (any non-whitespace character) - - Example:: - # a word composed of digits - integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) - - # a word with a leading capital, and zero or more lowercase - capital_word = Word(alphas.upper(), alphas.lower()) - - # hostnames are alphanumeric, with leading alpha, and '-' - hostname = Word(alphas, alphanums+'-') - - # roman numeral (not a strict parser, accepts invalid mix of characters) - roman = Word("IVXLCDM") - - # any string of non-whitespace characters, except for ',' - csv_value = Word(printables, excludeChars=",") - """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): - super(Word,self).__init__() - if excludeChars: - initChars = ''.join(c for c in initChars if c not in excludeChars) - if bodyChars: - bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) - self.initCharsOrig = initChars - self.initChars = set(initChars) - if bodyChars : - self.bodyCharsOrig = bodyChars - self.bodyChars = set(bodyChars) - else: - self.bodyCharsOrig = initChars - self.bodyChars = set(initChars) - - self.maxSpecified = max > 0 - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.asKeyword = asKeyword - - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): - if self.bodyCharsOrig == self.initCharsOrig: - self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) - elif len(self.initCharsOrig) == 1: - self.reString = "%s[%s]*" % \ - (re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - else: - self.reString = "[%s][%s]*" % \ - (_escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" - try: - self.re = re.compile( self.reString ) - except Exception: - self.re = None - - def parseImpl( self, instring, loc, doActions=True ): - if self.re: - result = self.re.match(instring,loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - return loc, result.group() - - if not(instring[ loc ] in self.initChars): - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - instrlen = len(instring) - bodychars = self.bodyChars - maxloc = start + self.maxLen - maxloc = min( maxloc, instrlen ) - while loc < maxloc and instring[loc] in bodychars: - loc += 1 - - throwException = False - if loc - start < self.minLen: - throwException = True - if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: - throwException = True - if self.asKeyword: - if (start>0 and instring[start-1] in bodychars) or (loc4: - return s[:4]+"..." - else: - return s - - if ( self.initCharsOrig != self.bodyCharsOrig ): - self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) - else: - self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) - - return self.strRepr - - -class Regex(Token): - r""" - Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as - named parse results. - - Example:: - realnum = Regex(r"[+-]?\d+\.\d*") - date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)') - # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression - roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") - """ - compiledREtype = type(re.compile("[A-Z]")) - def __init__( self, pattern, flags=0): - """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" - super(Regex,self).__init__() - - if isinstance(pattern, basestring): - if not pattern: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) - - self.pattern = pattern - self.flags = flags - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) - raise - - elif isinstance(pattern, Regex.compiledREtype): - self.re = pattern - self.pattern = \ - self.reString = str(pattern) - self.flags = flags - - else: - raise ValueError("Regex may only be constructed with a string or a compiled RE object") - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = self.re.match(instring,loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - d = result.groupdict() - ret = ParseResults(result.group()) - if d: - for k in d: - ret[k] = d[k] - return loc,ret - - def __str__( self ): - try: - return super(Regex,self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "Re:(%s)" % repr(self.pattern) - - return self.strRepr - - -class QuotedString(Token): - r""" - Token for matching strings that are delimited by quoting characters. - - Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=C{None}) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) - - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) - - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) - - Example:: - qs = QuotedString('"') - print(qs.searchString('lsjdf "This is the quote" sldjf')) - complex_qs = QuotedString('{{', endQuoteChar='}}') - print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) - sql_qs = QuotedString('"', escQuote='""') - print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) - prints:: - [['This is the quote']] - [['This is the "quote"']] - [['This is the quote with "embedded" quotes']] - """ - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): - super(QuotedString,self).__init__() - - # remove white space from quote chars - wont work anyway - quoteChar = quoteChar.strip() - if not quoteChar: - warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - if endQuoteChar is None: - endQuoteChar = quoteChar - else: - endQuoteChar = endQuoteChar.strip() - if not endQuoteChar: - warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - self.quoteChar = quoteChar - self.quoteCharLen = len(quoteChar) - self.firstQuoteChar = quoteChar[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - self.convertWhitespaceEscapes = convertWhitespaceEscapes - - if multiline: - self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - else: - self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - if len(self.endQuoteChar) > 1: - self.pattern += ( - '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' - ) - if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) - if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = result.group() - - if self.unquoteResults: - - # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - - if isinstance(ret,basestring): - # replace escaped whitespace - if '\\' in ret and self.convertWhitespaceEscapes: - ws_map = { - r'\t' : '\t', - r'\n' : '\n', - r'\f' : '\f', - r'\r' : '\r', - } - for wslit,wschar in ws_map.items(): - ret = ret.replace(wslit, wschar) - - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) - - # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) - - return loc, ret - - def __str__( self ): - try: - return super(QuotedString,self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - - return self.strRepr - - -class CharsNotIn(Token): - """ - Token for matching words composed of characters I{not} in a given set (will - include whitespace in matched characters if not listed in the provided exclusion set - see example). - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. - - Example:: - # define a comma-separated-value as anything that is not a ',' - csv_value = CharsNotIn(',') - print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) - prints:: - ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] - """ - def __init__( self, notChars, min=1, max=0, exact=0 ): - super(CharsNotIn,self).__init__() - self.skipWhitespace = False - self.notChars = notChars - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = ( self.minLen == 0 ) - self.mayIndexError = False - - def parseImpl( self, instring, loc, doActions=True ): - if instring[loc] in self.notChars: - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - notchars = self.notChars - maxlen = min( start+self.maxLen, len(instring) ) - while loc < maxlen and \ - (instring[loc] not in notchars): - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - def __str__( self ): - try: - return super(CharsNotIn, self).__str__() - except Exception: - pass - - if self.strRepr is None: - if len(self.notChars) > 4: - self.strRepr = "!W:(%s...)" % self.notChars[:4] - else: - self.strRepr = "!W:(%s)" % self.notChars - - return self.strRepr - -class White(Token): - """ - Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{L{Word}} class. - """ - whiteStrs = { - " " : "", - "\t": "", - "\n": "", - "\r": "", - "\f": "", - } - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White,self).__init__() - self.matchWhite = ws - self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) - #~ self.leaveWhitespace() - self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) - self.mayReturnEmpty = True - self.errmsg = "Expected " + self.name - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def parseImpl( self, instring, loc, doActions=True ): - if not(instring[ loc ] in self.matchWhite): - raise ParseException(instring, loc, self.errmsg, self) - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min( maxloc, len(instring) ) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - -class _PositionToken(Token): - def __init__( self ): - super(_PositionToken,self).__init__() - self.name=self.__class__.__name__ - self.mayReturnEmpty = True - self.mayIndexError = False - -class GoToColumn(_PositionToken): - """ - Token to advance to a specific column of input text; useful for tabular report scraping. - """ - def __init__( self, colno ): - super(GoToColumn,self).__init__() - self.col = colno - - def preParse( self, instring, loc ): - if col(loc,instring) != self.col: - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - thiscol = col( loc, instring ) - if thiscol > self.col: - raise ParseException( instring, loc, "Text not in expected column", self ) - newloc = loc + self.col - thiscol - ret = instring[ loc: newloc ] - return newloc, ret - - -class LineStart(_PositionToken): - """ - Matches if current position is at the beginning of a line within the parse string - - Example:: - - test = '''\ - AAA this line - AAA and this line - AAA but not this one - B AAA and definitely not this one - ''' - - for t in (LineStart() + 'AAA' + restOfLine).searchString(test): - print(t) - - Prints:: - ['AAA', ' this line'] - ['AAA', ' and this line'] - - """ - def __init__( self ): - super(LineStart,self).__init__() - self.errmsg = "Expected start of line" - - def parseImpl( self, instring, loc, doActions=True ): - if col(loc, instring) == 1: - return loc, [] - raise ParseException(instring, loc, self.errmsg, self) - -class LineEnd(_PositionToken): - """ - Matches if current position is at the end of a line within the parse string - """ - def __init__( self ): - super(LineEnd,self).__init__() - self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) - self.errmsg = "Expected end of line" - - def parseImpl( self, instring, loc, doActions=True ): - if loc len(instring): - return loc, [] - else: - raise ParseException(instring, loc, self.errmsg, self) - -class WordStart(_PositionToken): - """ - Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of - the string being parsed, or at the beginning of a line. - """ - def __init__(self, wordChars = printables): - super(WordStart,self).__init__() - self.wordChars = set(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True ): - if loc != 0: - if (instring[loc-1] in self.wordChars or - instring[loc] not in self.wordChars): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - -class WordEnd(_PositionToken): - """ - Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of - the string being parsed, or at the end of a line. - """ - def __init__(self, wordChars = printables): - super(WordEnd,self).__init__() - self.wordChars = set(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True ): - instrlen = len(instring) - if instrlen>0 and loc maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - else: - # save match among all matches, to retry longest to shortest - matches.append((loc2, e)) - - if matches: - matches.sort(key=lambda x: -x[0]) - for _,e in matches: - try: - return e._parse( instring, loc, doActions ) - except ParseException as err: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - - def __ixor__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #Or( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class MatchFirst(ParseExpression): - """ - Requires that at least one C{ParseExpression} is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the C{'|'} operator. - - Example:: - # construct MatchFirst using '|' operator - - # watch the order of expressions to match - number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) - print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] - - # put more selective expression first - number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) - print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] - """ - def __init__( self, exprs, savelist = False ): - super(MatchFirst,self).__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - maxExcLoc = -1 - maxException = None - for e in self.exprs: - try: - ret = e._parse( instring, loc, doActions ) - return ret - except ParseException as err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - - # only got here if no expression matched, raise exception for match that made it the furthest - else: - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #MatchFirst( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class Each(ParseExpression): - """ - Requires all given C{ParseExpression}s to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the C{'&'} operator. - - Example:: - color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") - shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") - integer = Word(nums) - shape_attr = "shape:" + shape_type("shape") - posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") - color_attr = "color:" + color("color") - size_attr = "size:" + integer("size") - - # use Each (using operator '&') to accept attributes in any order - # (shape and posn are required, color and size are optional) - shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) - - shape_spec.runTests(''' - shape: SQUARE color: BLACK posn: 100, 120 - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - color:GREEN size:20 shape:TRIANGLE posn:20,40 - ''' - ) - prints:: - shape: SQUARE color: BLACK posn: 100, 120 - ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] - - color: BLACK - - posn: ['100', ',', '120'] - - x: 100 - - y: 120 - - shape: SQUARE - - - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] - - color: BLUE - - posn: ['50', ',', '80'] - - x: 50 - - y: 80 - - shape: CIRCLE - - size: 50 - - - color: GREEN size: 20 shape: TRIANGLE posn: 20,40 - ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] - - color: GREEN - - posn: ['20', ',', '40'] - - x: 20 - - y: 40 - - shape: TRIANGLE - - size: 20 - """ - def __init__( self, exprs, savelist = True ): - super(Each,self).__init__(exprs, savelist) - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = True - self.initExprGroups = True - - def parseImpl( self, instring, loc, doActions=True ): - if self.initExprGroups: - self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) - opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] - opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] - self.optionals = opt1 + opt2 - self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] - self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] - self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] - self.required += self.multirequired - self.initExprGroups = False - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - matchOrder = [] - - keepMatching = True - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired - failed = [] - for e in tmpExprs: - try: - tmpLoc = e.tryParse( instring, tmpLoc ) - except ParseException: - failed.append(e) - else: - matchOrder.append(self.opt1map.get(id(e),e)) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - if tmpReqd: - missing = ", ".join(_ustr(e) for e in tmpReqd) - raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) - - # add any unmatched Optionals, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] - - resultlist = [] - for e in matchOrder: - loc,results = e._parse(instring,loc,doActions) - resultlist.append(results) - - finalResults = sum(resultlist, ParseResults([])) - return loc, finalResults - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class ParseElementEnhance(ParserElement): - """ - Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. - """ - def __init__( self, expr, savelist=False ): - super(ParseElementEnhance,self).__init__(savelist) - if isinstance( expr, basestring ): - if issubclass(ParserElement._literalStringClass, Token): - expr = ParserElement._literalStringClass(expr) - else: - expr = ParserElement._literalStringClass(Literal(expr)) - self.expr = expr - self.strRepr = None - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars( expr.whiteChars ) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr is not None: - return self.expr._parse( instring, loc, doActions, callPreParse=False ) - else: - raise ParseException("",loc,self.errmsg,self) - - def leaveWhitespace( self ): - self.skipWhitespace = False - self.expr = self.expr.copy() - if self.expr is not None: - self.expr.leaveWhitespace() - return self - - def ignore( self, other ): - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - else: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - return self - - def streamline( self ): - super(ParseElementEnhance,self).streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def checkRecursion( self, parseElementList ): - if self in parseElementList: - raise RecursiveGrammarException( parseElementList+[self] ) - subRecCheckList = parseElementList[:] + [ self ] - if self.expr is not None: - self.expr.checkRecursion( subRecCheckList ) - - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion( [] ) - - def __str__( self ): - try: - return super(ParseElementEnhance,self).__str__() - except Exception: - pass - - if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) - return self.strRepr - - -class FollowedBy(ParseElementEnhance): - """ - Lookahead matching of the given parse expression. C{FollowedBy} - does I{not} advance the parsing position within the input string, it only - verifies that the specified parse expression matches at the current - position. C{FollowedBy} always returns a null token list. - - Example:: - # use FollowedBy to match a label only if it is followed by a ':' - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() - prints:: - [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] - """ - def __init__( self, expr ): - super(FollowedBy,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - self.expr.tryParse( instring, loc ) - return loc, [] - - -class NotAny(ParseElementEnhance): - """ - Lookahead to disallow matching with the given parse expression. C{NotAny} - does I{not} advance the parsing position within the input string, it only - verifies that the specified parse expression does I{not} match at the current - position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} - always returns a null token list. May be constructed using the '~' operator. - - Example:: - - """ - def __init__( self, expr ): - super(NotAny,self).__init__(expr) - #~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr.canParseNext(instring, loc): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "~{" + _ustr(self.expr) + "}" - - return self.strRepr - -class _MultipleMatch(ParseElementEnhance): - def __init__( self, expr, stopOn=None): - super(_MultipleMatch, self).__init__(expr) - self.saveAsList = True - ender = stopOn - if isinstance(ender, basestring): - ender = ParserElement._literalStringClass(ender) - self.not_ender = ~ender if ender is not None else None - - def parseImpl( self, instring, loc, doActions=True ): - self_expr_parse = self.expr._parse - self_skip_ignorables = self._skipIgnorables - check_ender = self.not_ender is not None - if check_ender: - try_not_ender = self.not_ender.tryParse - - # must be at least one (but first see if we are the stopOn sentinel; - # if so, fail) - if check_ender: - try_not_ender(instring, loc) - loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) - try: - hasIgnoreExprs = (not not self.ignoreExprs) - while 1: - if check_ender: - try_not_ender(instring, loc) - if hasIgnoreExprs: - preloc = self_skip_ignorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self_expr_parse( instring, preloc, doActions ) - if tmptokens or tmptokens.haskeys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - -class OneOrMore(_MultipleMatch): - """ - Repetition of one or more of the given expression. - - Parameters: - - expr - expression that must match one or more times - - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example:: - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: BLACK" - OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] - - # use stopOn attribute for OneOrMore to avoid reading label string as part of the data - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - - # could also be written as - (attr_expr * (1,)).parseString(text).pprint() - """ - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + _ustr(self.expr) + "}..." - - return self.strRepr - -class ZeroOrMore(_MultipleMatch): - """ - Optional repetition of zero or more of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example: similar to L{OneOrMore} - """ - def __init__( self, expr, stopOn=None): - super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) - except (ParseException,IndexError): - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]..." - - return self.strRepr - -class _NullToken(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - def __str__(self): - return "" - -_optionalNotMatched = _NullToken() -class Optional(ParseElementEnhance): - """ - Optional matching of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - default (optional) - value to be returned if the optional expression is not found. - - Example:: - # US postal code can be a 5-digit zip, plus optional 4-digit qualifier - zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) - zip.runTests(''' - # traditional ZIP code - 12345 - - # ZIP+4 form - 12101-0001 - - # invalid ZIP - 98765- - ''') - prints:: - # traditional ZIP code - 12345 - ['12345'] - - # ZIP+4 form - 12101-0001 - ['12101-0001'] - - # invalid ZIP - 98765- - ^ - FAIL: Expected end of text (at char 5), (line:1, col:6) - """ - def __init__( self, expr, default=_optionalNotMatched ): - super(Optional,self).__init__( expr, savelist=False ) - self.saveAsList = self.expr.saveAsList - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - except (ParseException,IndexError): - if self.defaultValue is not _optionalNotMatched: - if self.expr.resultsName: - tokens = ParseResults([ self.defaultValue ]) - tokens[self.expr.resultsName] = self.defaultValue - else: - tokens = [ self.defaultValue ] - else: - tokens = [] - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]" - - return self.strRepr - -class SkipTo(ParseElementEnhance): - """ - Token for skipping over all undefined text until the matched expression is found. - - Parameters: - - expr - target expression marking the end of the data to be skipped - - include - (default=C{False}) if True, the target expression is also parsed - (the skipped text and target expression are returned as a 2-element list). - - ignore - (default=C{None}) used to define grammars (typically quoted strings and - comments) that might contain false matches to the target expression - - failOn - (default=C{None}) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, - the SkipTo is not a match - - Example:: - report = ''' - Outstanding Issues Report - 1 Jan 2000 - - # | Severity | Description | Days Open - -----+----------+-------------------------------------------+----------- - 101 | Critical | Intermittent system crash | 6 - 94 | Cosmetic | Spelling error on Login ('log|n') | 14 - 79 | Minor | System slow when running too many reports | 47 - ''' - integer = Word(nums) - SEP = Suppress('|') - # use SkipTo to simply match everything up until the next SEP - # - ignore quoted strings, so that a '|' character inside a quoted string does not match - # - parse action will call token.strip() for each matched token, i.e., the description body - string_data = SkipTo(SEP, ignore=quotedString) - string_data.setParseAction(tokenMap(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP - + integer("days_open")) - - for tkt in ticket_expr.searchString(report): - print tkt.dump() - prints:: - ['101', 'Critical', 'Intermittent system crash', '6'] - - days_open: 6 - - desc: Intermittent system crash - - issue_num: 101 - - sev: Critical - ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] - - days_open: 14 - - desc: Spelling error on Login ('log|n') - - issue_num: 94 - - sev: Cosmetic - ['79', 'Minor', 'System slow when running too many reports', '47'] - - days_open: 47 - - desc: System slow when running too many reports - - issue_num: 79 - - sev: Minor - """ - def __init__( self, other, include=False, ignore=None, failOn=None ): - super( SkipTo, self ).__init__( other ) - self.ignoreExpr = ignore - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.asList = False - if isinstance(failOn, basestring): - self.failOn = ParserElement._literalStringClass(failOn) - else: - self.failOn = failOn - self.errmsg = "No match found for "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - startloc = loc - instrlen = len(instring) - expr = self.expr - expr_parse = self.expr._parse - self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None - self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - - tmploc = loc - while tmploc <= instrlen: - if self_failOn_canParseNext is not None: - # break if failOn expression matches - if self_failOn_canParseNext(instring, tmploc): - break - - if self_ignoreExpr_tryParse is not None: - # advance past ignore expressions - while 1: - try: - tmploc = self_ignoreExpr_tryParse(instring, tmploc) - except ParseBaseException: - break - - try: - expr_parse(instring, tmploc, doActions=False, callPreParse=False) - except (ParseException, IndexError): - # no match, advance loc in string - tmploc += 1 - else: - # matched skipto expr, done - break - - else: - # ran off the end of the input string without matching skipto expr, fail - raise ParseException(instring, loc, self.errmsg, self) - - # build up return values - loc = tmploc - skiptext = instring[startloc:loc] - skipresult = ParseResults(skiptext) - - if self.includeMatch: - loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) - skipresult += mat - - return loc, skipresult - -class Forward(ParseElementEnhance): - """ - Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. - - Note: take care when assigning to C{Forward} not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the C{Forward}:: - fwdExpr << (a | b | c) - Converting to use the '<<=' operator instead will avoid this problem. - - See L{ParseResults.pprint} for an example of a recursive parser created using - C{Forward}. - """ - def __init__( self, other=None ): - super(Forward,self).__init__( other, savelist=False ) - - def __lshift__( self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass(other) - self.expr = other - self.strRepr = None - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars( self.expr.whiteChars ) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - return self - - def __ilshift__(self, other): - return self << other - - def leaveWhitespace( self ): - self.skipWhitespace = False - return self - - def streamline( self ): - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate( self, validateTrace=[] ): - if self not in validateTrace: - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - return self.__class__.__name__ + ": ..." - - # stubbed out for now - creates awful memory and perf issues - self._revertClass = self.__class__ - self.__class__ = _ForwardNoRecurse - try: - if self.expr is not None: - retString = _ustr(self.expr) - else: - retString = "None" - finally: - self.__class__ = self._revertClass - return self.__class__.__name__ + ": " + retString - - def copy(self): - if self.expr is not None: - return super(Forward,self).copy() - else: - ret = Forward() - ret <<= self - return ret - -class _ForwardNoRecurse(Forward): - def __str__( self ): - return "..." - -class TokenConverter(ParseElementEnhance): - """ - Abstract subclass of C{ParseExpression}, for converting parsed results. - """ - def __init__( self, expr, savelist=False ): - super(TokenConverter,self).__init__( expr )#, savelist ) - self.saveAsList = False - -class Combine(TokenConverter): - """ - Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying C{'adjacent=False'} in the constructor. - - Example:: - real = Word(nums) + '.' + Word(nums) - print(real.parseString('3.1416')) # -> ['3', '.', '1416'] - # will also erroneously match the following - print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] - - real = Combine(Word(nums) + '.' + Word(nums)) - print(real.parseString('3.1416')) # -> ['3.1416'] - # no match when there are internal spaces - print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) - """ - def __init__( self, expr, joinString="", adjacent=True ): - super(Combine,self).__init__( expr ) - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leaveWhitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - self.callPreparse = True - - def ignore( self, other ): - if self.adjacent: - ParserElement.ignore(self, other) - else: - super( Combine, self).ignore( other ) - return self - - def postParse( self, instring, loc, tokenlist ): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) - - if self.resultsName and retToks.haskeys(): - return [ retToks ] - else: - return retToks - -class Group(TokenConverter): - """ - Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. - - Example:: - ident = Word(alphas) - num = Word(nums) - term = ident | num - func = ident + Optional(delimitedList(term)) - print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] - - func = ident + Group(Optional(delimitedList(term))) - print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] - """ - def __init__( self, expr ): - super(Group,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - return [ tokenlist ] - -class Dict(TokenConverter): - """ - Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. - - Example:: - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - # print attributes as plain groups - print(OneOrMore(attr_expr).parseString(text).dump()) - - # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names - result = Dict(OneOrMore(Group(attr_expr))).parseString(text) - print(result.dump()) - - # access named fields as dict entries, or output as dict - print(result['shape']) - print(result.asDict()) - prints:: - ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] - - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} - See more examples at L{ParseResults} of accessing fields by results name. - """ - def __init__( self, expr ): - super(Dict,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - for i,tok in enumerate(tokenlist): - if len(tok) == 0: - continue - ikey = tok[0] - if isinstance(ikey,int): - ikey = _ustr(tok[0]).strip() - if len(tok)==1: - tokenlist[ikey] = _ParseResultsWithOffset("",i) - elif len(tok)==2 and not isinstance(tok[1],ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) - else: - dictvalue = tok.copy() #ParseResults(i) - del dictvalue[0] - if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) - - if self.resultsName: - return [ tokenlist ] - else: - return tokenlist - - -class Suppress(TokenConverter): - """ - Converter for ignoring the results of a parsed expression. - - Example:: - source = "a, b, c,d" - wd = Word(alphas) - wd_list1 = wd + ZeroOrMore(',' + wd) - print(wd_list1.parseString(source)) - - # often, delimiters that are useful during parsing are just in the - # way afterward - use Suppress to keep them out of the parsed output - wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) - print(wd_list2.parseString(source)) - prints:: - ['a', ',', 'b', ',', 'c', ',', 'd'] - ['a', 'b', 'c', 'd'] - (See also L{delimitedList}.) - """ - def postParse( self, instring, loc, tokenlist ): - return [] - - def suppress( self ): - return self - - -class OnlyOnce(object): - """ - Wrapper for parse actions, to ensure they are only called once. - """ - def __init__(self, methodCall): - self.callable = _trim_arity(methodCall) - self.called = False - def __call__(self,s,l,t): - if not self.called: - results = self.callable(s,l,t) - self.called = True - return results - raise ParseException(s,l,"") - def reset(self): - self.called = False - -def traceParseAction(f): - """ - Decorator for debugging parse actions. - - When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} - When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. - - Example:: - wd = Word(alphas) - - @traceParseAction - def remove_duplicate_chars(tokens): - return ''.join(sorted(set(''.join(tokens)))) - - wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) - print(wds.parseString("slkdjs sld sldd sdlf sdljf")) - prints:: - >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) - <3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) - try: - ret = f(*paArgs) - except Exception as exc: - sys.stderr.write( "< ['aa', 'bb', 'cc'] - delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." - if combine: - return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) - else: - return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) - -def countedArray( expr, intExpr=None ): - """ - Helper to define a counted list of expressions. - This helper defines a pattern of the form:: - integer expr expr expr... - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. - - If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. - - Example:: - countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] - - # in this parser, the leading integer value is given in binary, - # '10' indicating that 2 values are in the array - binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) - countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] - """ - arrayExpr = Forward() - def countFieldParseAction(s,l,t): - n = t[0] - arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) - return [] - if intExpr is None: - intExpr = Word(nums).setParseAction(lambda t:int(t[0])) - else: - intExpr = intExpr.copy() - intExpr.setName("arrayLen") - intExpr.addParseAction(countFieldParseAction, callDuringTry=True) - return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') - -def _flatten(L): - ret = [] - for i in L: - if isinstance(i,list): - ret.extend(_flatten(i)) - else: - ret.append(i) - return ret - -def matchPreviousLiteral(expr): - """ - Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousLiteral(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches a - previous literal, will also match the leading C{"1:1"} in C{"1:10"}. - If this is not desired, use C{matchPreviousExpr}. - Do I{not} use with packrat parsing enabled. - """ - rep = Forward() - def copyTokenToRepeater(s,l,t): - if t: - if len(t) == 1: - rep << t[0] - else: - # flatten t tokens - tflat = _flatten(t.asList()) - rep << And(Literal(tt) for tt in tflat) - else: - rep << Empty() - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def matchPreviousExpr(expr): - """ - Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousExpr(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches by - expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; - the expressions are evaluated first, and then compared, so - C{"1"} is compared with C{"10"}. - Do I{not} use with packrat parsing enabled. - """ - rep = Forward() - e2 = expr.copy() - rep <<= e2 - def copyTokenToRepeater(s,l,t): - matchTokens = _flatten(t.asList()) - def mustMatchTheseTokens(s,l,t): - theseTokens = _flatten(t.asList()) - if theseTokens != matchTokens: - raise ParseException("",0,"") - rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def _escapeRegexRangeChars(s): - #~ escape these chars: ^-] - for c in r"\^-]": - s = s.replace(c,_bslash+c) - s = s.replace("\n",r"\n") - s = s.replace("\t",r"\t") - return _ustr(s) - -def oneOf( strs, caseless=False, useRegex=True ): - """ - Helper to quickly define a set of alternative Literals, and makes sure to do - longest-first testing when there is a conflict, regardless of the input order, - but returns a C{L{MatchFirst}} for best performance. - - Parameters: - - strs - a string of space-delimited literals, or a collection of string literals - - caseless - (default=C{False}) - treat all literals as caseless - - useRegex - (default=C{True}) - as an optimization, will generate a Regex - object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or - if creating a C{Regex} raises an exception) - - Example:: - comp_oper = oneOf("< = > <= >= !=") - var = Word(alphas) - number = Word(nums) - term = var | number - comparison_expr = term + comp_oper + term - print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) - prints:: - [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] - """ - if caseless: - isequal = ( lambda a,b: a.upper() == b.upper() ) - masks = ( lambda a,b: b.upper().startswith(a.upper()) ) - parseElementClass = CaselessLiteral - else: - isequal = ( lambda a,b: a == b ) - masks = ( lambda a,b: b.startswith(a) ) - parseElementClass = Literal - - symbols = [] - if isinstance(strs,basestring): - symbols = strs.split() - elif isinstance(strs, Iterable): - symbols = list(strs) - else: - warnings.warn("Invalid argument to oneOf, expected string or iterable", - SyntaxWarning, stacklevel=2) - if not symbols: - return NoMatch() - - i = 0 - while i < len(symbols)-1: - cur = symbols[i] - for j,other in enumerate(symbols[i+1:]): - if ( isequal(other, cur) ): - del symbols[i+j+1] - break - elif ( masks(cur, other) ): - del symbols[i+j+1] - symbols.insert(i,other) - cur = other - break - else: - i += 1 - - if not caseless and useRegex: - #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) - try: - if len(symbols)==len("".join(symbols)): - return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) - else: - return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) - except Exception: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - - - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) - -def dictOf( key, value ): - """ - Helper to easily and clearly define a dictionary by specifying the respective patterns - for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens - in the proper order. The key pattern can include delimiting markers or punctuation, - as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the C{Dict} results can include named token - fields. - - Example:: - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - print(OneOrMore(attr_expr).parseString(text).dump()) - - attr_label = label - attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) - - # similar to Dict, but simpler call format - result = dictOf(attr_label, attr_value).parseString(text) - print(result.dump()) - print(result['shape']) - print(result.shape) # object attribute access works too - print(result.asDict()) - prints:: - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - SQUARE - {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} - """ - return Dict( ZeroOrMore( Group ( key + value ) ) ) - -def originalTextFor(expr, asString=True): - """ - Helper to return the original, untokenized text for a given expression. Useful to - restore the parsed fields of an HTML start tag into the raw tag text itself, or to - revert separate tokens with intervening whitespace back to the original matching - input text. By default, returns astring containing the original parsed text. - - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{L{ParseResults}} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if - the expression passed to C{L{originalTextFor}} contains expressions with defined - results names, you must set C{asString} to C{False} if you want to preserve those - results name values. - - Example:: - src = "this is test bold text normal text " - for tag in ("b","i"): - opener,closer = makeHTMLTags(tag) - patt = originalTextFor(opener + SkipTo(closer) + closer) - print(patt.searchString(src)[0]) - prints:: - [' bold text '] - ['text'] - """ - locMarker = Empty().setParseAction(lambda s,loc,t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s,l,t: s[t._original_start:t._original_end] - else: - def extractText(s,l,t): - t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] - matchExpr.setParseAction(extractText) - matchExpr.ignoreExprs = expr.ignoreExprs - return matchExpr - -def ungroup(expr): - """ - Helper to undo pyparsing's default grouping of And expressions, even - if all but one are non-empty. - """ - return TokenConverter(expr).setParseAction(lambda t:t[0]) - -def locatedExpr(expr): - """ - Helper to decorate a returned token with its starting and ending locations in the input string. - This helper adds the following results names: - - locn_start = location where matched expression begins - - locn_end = location where matched expression ends - - value = the actual parsed results - - Be careful if the input text contains C{} characters, you may want to call - C{L{ParserElement.parseWithTabs}} - - Example:: - wd = Word(alphas) - for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): - print(match) - prints:: - [[0, 'ljsdf', 5]] - [[8, 'lksdjjf', 15]] - [[18, 'lkkjj', 23]] - """ - locator = Empty().setParseAction(lambda s,l,t: l) - return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) - - -# convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") -stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" - -def srange(s): - r""" - Helper to easily define string ranges for use in Word construction. Borrows - syntax from regexp '[]' string range definitions:: - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be: - - a single character - - an escaped character with a leading backslash (such as C{\-} or C{\]}) - - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) - (C{\0x##} is also supported for backwards compatibility) - - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) - """ - _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) - try: - return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) - except Exception: - return "" - -def matchOnlyAtCol(n): - """ - Helper method for defining parse actions that require matching at a specific - column in the input text. - """ - def verifyCol(strg,locn,toks): - if col(locn,strg) != n: - raise ParseException(strg,locn,"matched token not at column %d" % n) - return verifyCol - -def replaceWith(replStr): - """ - Helper method for common parse actions that simply return a literal value. Especially - useful when used with C{L{transformString}()}. - - Example:: - num = Word(nums).setParseAction(lambda toks: int(toks[0])) - na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) - term = na | num - - OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] - """ - return lambda s,l,t: [replStr] - -def removeQuotes(s,l,t): - """ - Helper parse action for removing quotation marks from parsed quoted strings. - - Example:: - # by default, quotation marks are included in parsed results - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] - - # use removeQuotes to strip quotation marks from parsed results - quotedString.setParseAction(removeQuotes) - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] - """ - return t[0][1:-1] - -def tokenMap(func, *args): - """ - Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional - args are passed, they are forwarded to the given function as additional arguments after - the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the - parsed data to an integer using base 16. - - Example (compare the last to example in L{ParserElement.transformString}:: - hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) - hex_ints.runTests(''' - 00 11 22 aa FF 0a 0d 1a - ''') - - upperword = Word(alphas).setParseAction(tokenMap(str.upper)) - OneOrMore(upperword).runTests(''' - my kingdom for a horse - ''') - - wd = Word(alphas).setParseAction(tokenMap(str.title)) - OneOrMore(wd).setParseAction(' '.join).runTests(''' - now is the winter of our discontent made glorious summer by this sun of york - ''') - prints:: - 00 11 22 aa FF 0a 0d 1a - [0, 17, 34, 170, 255, 10, 13, 26] - - my kingdom for a horse - ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] - - now is the winter of our discontent made glorious summer by this sun of york - ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] - """ - def pa(s,l,t): - return [func(tokn, *args) for tokn in t] - - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - pa.__name__ = func_name - - return pa - -upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) -"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" - -downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) -"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" - -def _makeTags(tagStr, xml): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr,basestring): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas,alphanums+"_-:") - if (xml): - tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - else: - printablesLessRAbrack = "".join(c for c in printables if c not in ">") - tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ - Optional( Suppress("=") + tagAttrValue ) ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - closeTag = Combine(_L("") - - openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % resname) - openTag.tag = resname - closeTag.tag = resname - return openTag, closeTag - -def makeHTMLTags(tagStr): - """ - Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches - tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. - - Example:: - text = 'More info at the pyparsing wiki page' - # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple - a,a_end = makeHTMLTags("A") - link_expr = a + SkipTo(a_end)("link_text") + a_end - - for link in link_expr.searchString(text): - # attributes in the tag (like "href" shown here) are also accessible as named results - print(link.link_text, '->', link.href) - prints:: - pyparsing -> http://pyparsing.wikispaces.com - """ - return _makeTags( tagStr, False ) - -def makeXMLTags(tagStr): - """ - Helper to construct opening and closing tag expressions for XML, given a tag name. Matches - tags only in the given upper/lower case. - - Example: similar to L{makeHTMLTags} - """ - return _makeTags( tagStr, True ) - -def withAttribute(*args,**attrDict): - """ - Helper to create a validating parse action to be used with start tags created - with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - C{} or C{
}. - - Call C{withAttribute} with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in C{(align="right")}, or - - as an explicit dict with C{**} operator, when an attribute name is also a Python - reserved word, as in C{**{"class":"Customer", "align":"right"}} - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. - - If just testing for C{class} (with or without a namespace), use C{L{withClass}}. - - To verify that the attribute exists, but without specifying a value, pass - C{withAttribute.ANY_VALUE} as the value. - - Example:: - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this has no type
-
- - ''' - div,div_end = makeHTMLTags("div") - - # only match div tag having a type attribute with value "grid" - div_grid = div().setParseAction(withAttribute(type="grid")) - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - # construct a match with any div tag having a type attribute, regardless of the value - div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - prints:: - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - if args: - attrs = args[:] - else: - attrs = attrDict.items() - attrs = [(k,v) for k,v in attrs] - def pa(s,l,tokens): - for attrName,attrValue in attrs: - if attrName not in tokens: - raise ParseException(s,l,"no matching attribute " + attrName) - if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) - return pa -withAttribute.ANY_VALUE = object() - -def withClass(classname, namespace=''): - """ - Simplified version of C{L{withAttribute}} when matching on a div class - made - difficult because C{class} is a reserved word in Python. - - Example:: - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this <div> has no class
-
- - ''' - div,div_end = makeHTMLTags("div") - div_grid = div().setParseAction(withClass("grid")) - - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - prints:: - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - classattr = "%s:class" % namespace if namespace else "class" - return withAttribute(**{classattr : classname}) - -opAssoc = _Constants() -opAssoc.LEFT = object() -opAssoc.RIGHT = object() - -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): - """ - Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. The generated parser will also recognize the use - of parentheses to override operator precedences (see example below). - - Note: if you define a deep operator list, you may see performance issues - when using infixNotation. See L{ParserElement.enablePackrat} for a - mechanism to potentially improve your parser performance. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted); if the parse action - is passed a tuple or list of functions, this is equivalent to - calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) - - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) - - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) - - Example:: - # simple example of four-function arithmetic with ints and variable names - integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - - arith_expr = infixNotation(integer | varname, - [ - ('-', 1, opAssoc.RIGHT), - (oneOf('* /'), 2, opAssoc.LEFT), - (oneOf('+ -'), 2, opAssoc.LEFT), - ]) - - arith_expr.runTests(''' - 5+3*6 - (5+3)*6 - -2--11 - ''', fullDump=False) - prints:: - 5+3*6 - [[5, '+', [3, '*', 6]]] - - (5+3)*6 - [[[5, '+', 3], '*', 6]] - - -2--11 - [[['-', 2], '-', ['-', 11]]] - """ - ret = Forward() - lastExpr = baseExpr | ( lpar + ret + rpar ) - for i,operDef in enumerate(opList): - opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] - termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr - if arity == 3: - if opExpr is None or len(opExpr) != 2: - raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") - opExpr1, opExpr2 = opExpr - thisExpr = Forward().setName(termName) - if rightLeftAssoc == opAssoc.LEFT: - if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) - else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - elif rightLeftAssoc == opAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Optional): - opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) - else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - else: - raise ValueError("operator must indicate right or left associativity") - if pa: - if isinstance(pa, (tuple, list)): - matchExpr.setParseAction(*pa) - else: - matchExpr.setParseAction(pa) - thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) - lastExpr = thisExpr - ret <<= lastExpr - return ret - -operatorPrecedence = infixNotation -"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" - -dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") -sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") -quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| - Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") - -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): - """ - Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression - - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression - - content - expression for items within the nested lists (default=C{None}) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the C{ignoreExpr} argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. - The default is L{quotedString}, but if no expressions are to be ignored, - then pass C{None} for this argument. - - Example:: - data_type = oneOf("void int short long char float double") - decl_data_type = Combine(data_type + Optional(Word('*'))) - ident = Word(alphas+'_', alphanums+'_') - number = pyparsing_common.number - arg = Group(decl_data_type + ident) - LPAR,RPAR = map(Suppress, "()") - - code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) - - c_function = (decl_data_type("type") - + ident("name") - + LPAR + Optional(delimitedList(arg), [])("args") + RPAR - + code_body("body")) - c_function.ignore(cStyleComment) - - source_code = ''' - int is_odd(int x) { - return (x%2); - } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { - return (10+ord(hchar)-ord('A')); - } - } - ''' - for func in c_function.searchString(source_code): - print("%(name)s (%(type)s) args: %(args)s" % func) - - prints:: - is_odd (int) args: [['int', 'x']] - dec_to_hex (int) args: [['char', 'hchar']] - """ - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener,basestring) and isinstance(closer,basestring): - if len(opener) == 1 and len(closer)==1: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t:t[0].strip())) - else: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - ~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") - ret = Forward() - if ignoreExpr is not None: - ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) - else: - ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) - ret.setName('nested %s%s expression' % (opener,closer)) - return ret - -def indentedBlock(blockStatementExpr, indentStack, indent=True): - """ - Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. - - Parameters: - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single grammar - should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the - the current level; set to False for block of left-most statements - (default=C{True}) - - A valid block must contain at least one C{blockStatement}. - - Example:: - data = ''' - def A(z): - A1 - B = 100 - G = A2 - A2 - A3 - B - def BB(a,b,c): - BB1 - def BBA(): - bba1 - bba2 - bba3 - C - D - def spam(x,y): - def eggs(z): - pass - ''' - - - indentStack = [1] - stmt = Forward() - - identifier = Word(alphas, alphanums) - funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") - func_body = indentedBlock(stmt, indentStack) - funcDef = Group( funcDecl + func_body ) - - rvalue = Forward() - funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") - rvalue << (funcCall | identifier | Word(nums)) - assignment = Group(identifier + "=" + rvalue) - stmt << ( funcDef | assignment | identifier ) - - module_body = OneOrMore(stmt) - - parseTree = module_body.parseString(data) - parseTree.pprint() - prints:: - [['def', - 'A', - ['(', 'z', ')'], - ':', - [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], - 'B', - ['def', - 'BB', - ['(', 'a', 'b', 'c', ')'], - ':', - [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], - 'C', - 'D', - ['def', - 'spam', - ['(', 'x', 'y', ')'], - ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] - """ - def checkPeerIndent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseFatalException(s,l,"illegal nesting") - raise ParseException(s,l,"not a peer entry") - - def checkSubIndent(s,l,t): - curCol = col(l,s) - if curCol > indentStack[-1]: - indentStack.append( curCol ) - else: - raise ParseException(s,l,"not a subentry") - - def checkUnindent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s,l,"not an unindent") - indentStack.pop() - - NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') - PEER = Empty().setParseAction(checkPeerIndent).setName('') - UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') - if indent: - smExpr = Group( Optional(NL) + - #~ FollowedBy(blockStatementExpr) + - INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) - else: - smExpr = Group( Optional(NL) + - (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.setName('indented block') - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) -_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) -commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") -def replaceHTMLEntity(t): - """Helper parser action to replace common HTML entities with their special characters""" - return _htmlEntityMap.get(t.entity) - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") -"Comment of the form C{/* ... */}" - -htmlComment = Regex(r"").setName("HTML comment") -"Comment of the form C{}" - -restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") -dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") -"Comment of the form C{// ... (to end of line)}" - -cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") -"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" - -javaStyleComment = cppStyleComment -"Same as C{L{cppStyleComment}}" - -pythonStyleComment = Regex(r"#.*").setName("Python style comment") -"Comment of the form C{# ... (to end of line)}" - -_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + - Optional( Word(" \t") + - ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") -commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") -"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. - This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" - -# some other useful expressions - using lower-case class name since we are really using this as a namespace -class pyparsing_common: - """ - Here are some common low-level expressions that may be useful in jump-starting parser development: - - numeric forms (L{integers}, L{reals}, L{scientific notation}) - - common L{programming identifiers} - - network addresses (L{MAC}, L{IPv4}, L{IPv6}) - - ISO8601 L{dates} and L{datetime} - - L{UUID} - - L{comma-separated list} - Parse actions: - - C{L{convertToInteger}} - - C{L{convertToFloat}} - - C{L{convertToDate}} - - C{L{convertToDatetime}} - - C{L{stripHTMLTags}} - - C{L{upcaseTokens}} - - C{L{downcaseTokens}} - - Example:: - pyparsing_common.number.runTests(''' - # any int or real number, returned as the appropriate type - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.fnumber.runTests(''' - # any int or real number, returned as float - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.hex_integer.runTests(''' - # hex numbers - 100 - FF - ''') - - pyparsing_common.fraction.runTests(''' - # fractions - 1/2 - -3/4 - ''') - - pyparsing_common.mixed_integer.runTests(''' - # mixed fractions - 1 - 1/2 - -3/4 - 1-3/4 - ''') - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(''' - # uuid - 12345678-1234-5678-1234-567812345678 - ''') - prints:: - # any int or real number, returned as the appropriate type - 100 - [100] - - -100 - [-100] - - +100 - [100] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # any int or real number, returned as float - 100 - [100.0] - - -100 - [-100.0] - - +100 - [100.0] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # hex numbers - 100 - [256] - - FF - [255] - - # fractions - 1/2 - [0.5] - - -3/4 - [-0.75] - - # mixed fractions - 1 - [1] - - 1/2 - [0.5] - - -3/4 - [-0.75] - - 1-3/4 - [1.75] - - # uuid - 12345678-1234-5678-1234-567812345678 - [UUID('12345678-1234-5678-1234-567812345678')] - """ - - convertToInteger = tokenMap(int) - """ - Parse action for converting parsed integers to Python int - """ - - convertToFloat = tokenMap(float) - """ - Parse action for converting parsed numbers to Python float - """ - - integer = Word(nums).setName("integer").setParseAction(convertToInteger) - """expression that parses an unsigned integer, returns an int""" - - hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) - """expression that parses a hexadecimal integer, returns an int""" - - signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) - """expression that parses an integer with optional leading sign, returns an int""" - - fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") - """fractional expression of an integer divided by an integer, returns a float""" - fraction.addParseAction(lambda t: t[0]/t[-1]) - - mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") - """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" - mixed_integer.addParseAction(sum) - - real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) - """expression that parses a floating point number and returns a float""" - - sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) - """expression that parses a floating point number with optional scientific notation and returns a float""" - - # streamlining this expression makes the docs nicer-looking - number = (sci_real | real | signed_integer).streamline() - """any numeric expression, returns the corresponding Python type""" - - fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) - """any int or real number, returned as float""" - - identifier = Word(alphas+'_', alphanums+'_').setName("identifier") - """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") - "IPv4 address (C{0.0.0.0 - 255.255.255.255})" - - _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") - _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") - _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") - _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) - _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") - ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") - "IPv6 address (long, short, or mixed form)" - - mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") - "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - - @staticmethod - def convertToDate(fmt="%Y-%m-%d"): - """ - Helper to create a parse action for converting parsed date string to Python datetime.date - - Params - - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) - - Example:: - date_expr = pyparsing_common.iso8601_date.copy() - date_expr.setParseAction(pyparsing_common.convertToDate()) - print(date_expr.parseString("1999-12-31")) - prints:: - [datetime.date(1999, 12, 31)] - """ - def cvt_fn(s,l,t): - try: - return datetime.strptime(t[0], fmt).date() - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - @staticmethod - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): - """ - Helper to create a parse action for converting parsed datetime string to Python datetime.datetime - - Params - - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) - - Example:: - dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.setParseAction(pyparsing_common.convertToDatetime()) - print(dt_expr.parseString("1999-12-31T23:59:59.999")) - prints:: - [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] - """ - def cvt_fn(s,l,t): - try: - return datetime.strptime(t[0], fmt) - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") - "ISO8601 date (C{yyyy-mm-dd})" - - iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") - "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" - - uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") - "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" - - _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() - @staticmethod - def stripHTMLTags(s, l, tokens): - """ - Parse action to remove HTML tags from web page HTML source - - Example:: - # strip HTML links from normal text - text = 'More info at the
pyparsing wiki page' - td,td_end = makeHTMLTags("TD") - table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - - print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' - """ - return pyparsing_common._html_stripper.transformString(tokens[0]) - - _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') - + Optional( White(" \t") ) ) ).streamline().setName("commaItem") - comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") - """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" - - upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) - """Parse action to convert tokens to upper case.""" - - downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) - """Parse action to convert tokens to lower case.""" - - -if __name__ == "__main__": - - selectToken = CaselessLiteral("select") - fromToken = CaselessLiteral("from") - - ident = Word(alphas, alphanums + "_$") - - columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - columnNameList = Group(delimitedList(columnName)).setName("columns") - columnSpec = ('*' | columnNameList) - - tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - tableNameList = Group(delimitedList(tableName)).setName("tables") - - simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") - - # demo runTests method, including embedded comments in test string - simpleSQL.runTests(""" - # '*' as column list and dotted table name - select * from SYS.XYZZY - - # caseless match on "SELECT", and casts back to "select" - SELECT * from XYZZY, ABC - - # list of column names, and mixed case SELECT keyword - Select AA,BB,CC from Sys.dual - - # multiple tables - Select A, B, C from Sys.dual, Table2 - - # invalid SELECT keyword - should fail - Xelect A, B, C from Sys.dual - - # incomplete command - should fail - Select - - # invalid column name - should fail - Select ^^^ frox Sys.dual - - """) - - pyparsing_common.number.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - # any int or real number, returned as float - pyparsing_common.fnumber.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - pyparsing_common.hex_integer.runTests(""" - 100 - FF - """) - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(""" - 12345678-1234-5678-1234-567812345678 - """) diff --git a/libs/common/pkg_resources/_vendor/pyparsing/__init__.py b/libs/common/pkg_resources/_vendor/pyparsing/__init__.py new file mode 100644 index 00000000..7802ff15 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/__init__.py @@ -0,0 +1,331 @@ +# module pyparsing.py +# +# Copyright (c) 2003-2022 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +__doc__ = """ +pyparsing module - Classes and methods to define and execute parsing grammars +============================================================================= + +The pyparsing module is an alternative approach to creating and +executing simple grammars, vs. the traditional lex/yacc approach, or the +use of regular expressions. With pyparsing, you don't need to learn +a new syntax for defining grammars or matching expressions - the parsing +module provides a library of classes that you use to construct the +grammar directly in Python. + +Here is a program to parse "Hello, World!" (or any greeting of the form +``", !"``), built up using :class:`Word`, +:class:`Literal`, and :class:`And` elements +(the :meth:`'+'` operators create :class:`And` expressions, +and the strings are auto-converted to :class:`Literal` expressions):: + + from pyparsing import Word, alphas + + # define grammar of a greeting + greet = Word(alphas) + "," + Word(alphas) + "!" + + hello = "Hello, World!" + print(hello, "->", greet.parse_string(hello)) + +The program outputs the following:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +The Python representation of the grammar is quite readable, owing to the +self-explanatory class names, and the use of :class:`'+'`, +:class:`'|'`, :class:`'^'` and :class:`'&'` operators. + +The :class:`ParseResults` object returned from +:class:`ParserElement.parseString` can be +accessed as a nested list, a dictionary, or an object with named +attributes. + +The pyparsing module handles some of the problems that are typically +vexing when writing text parsers: + + - extra or missing whitespace (the above program will also handle + "Hello,World!", "Hello , World !", etc.) + - quoted strings + - embedded comments + + +Getting Started - +----------------- +Visit the classes :class:`ParserElement` and :class:`ParseResults` to +see the base classes that most other pyparsing +classes inherit from. Use the docstrings for examples of how to: + + - construct literal match expressions from :class:`Literal` and + :class:`CaselessLiteral` classes + - construct character word-group expressions using the :class:`Word` + class + - see how to create repetitive expressions using :class:`ZeroOrMore` + and :class:`OneOrMore` classes + - use :class:`'+'`, :class:`'|'`, :class:`'^'`, + and :class:`'&'` operators to combine simple expressions into + more complex ones + - associate names with your parsed results using + :class:`ParserElement.setResultsName` + - access the parsed data, which is returned as a :class:`ParseResults` + object + - find some helpful expression short-cuts like :class:`delimitedList` + and :class:`oneOf` + - find more useful common expressions in the :class:`pyparsing_common` + namespace class +""" +from typing import NamedTuple + + +class version_info(NamedTuple): + major: int + minor: int + micro: int + releaselevel: str + serial: int + + @property + def __version__(self): + return ( + "{}.{}.{}".format(self.major, self.minor, self.micro) + + ( + "{}{}{}".format( + "r" if self.releaselevel[0] == "c" else "", + self.releaselevel[0], + self.serial, + ), + "", + )[self.releaselevel == "final"] + ) + + def __str__(self): + return "{} {} / {}".format(__name__, self.__version__, __version_time__) + + def __repr__(self): + return "{}.{}({})".format( + __name__, + type(self).__name__, + ", ".join("{}={!r}".format(*nv) for nv in zip(self._fields, self)), + ) + + +__version_info__ = version_info(3, 0, 9, "final", 0) +__version_time__ = "05 May 2022 07:02 UTC" +__version__ = __version_info__.__version__ +__versionTime__ = __version_time__ +__author__ = "Paul McGuire " + +from .util import * +from .exceptions import * +from .actions import * +from .core import __diag__, __compat__ +from .results import * +from .core import * +from .core import _builtin_exprs as core_builtin_exprs +from .helpers import * +from .helpers import _builtin_exprs as helper_builtin_exprs + +from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode +from .testing import pyparsing_test as testing +from .common import ( + pyparsing_common as common, + _builtin_exprs as common_builtin_exprs, +) + +# define backward compat synonyms +if "pyparsing_unicode" not in globals(): + pyparsing_unicode = unicode +if "pyparsing_common" not in globals(): + pyparsing_common = common +if "pyparsing_test" not in globals(): + pyparsing_test = testing + +core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs + + +__all__ = [ + "__version__", + "__version_time__", + "__author__", + "__compat__", + "__diag__", + "And", + "AtLineStart", + "AtStringStart", + "CaselessKeyword", + "CaselessLiteral", + "CharsNotIn", + "Combine", + "Dict", + "Each", + "Empty", + "FollowedBy", + "Forward", + "GoToColumn", + "Group", + "IndentedBlock", + "Keyword", + "LineEnd", + "LineStart", + "Literal", + "Located", + "PrecededBy", + "MatchFirst", + "NoMatch", + "NotAny", + "OneOrMore", + "OnlyOnce", + "OpAssoc", + "Opt", + "Optional", + "Or", + "ParseBaseException", + "ParseElementEnhance", + "ParseException", + "ParseExpression", + "ParseFatalException", + "ParseResults", + "ParseSyntaxException", + "ParserElement", + "PositionToken", + "QuotedString", + "RecursiveGrammarException", + "Regex", + "SkipTo", + "StringEnd", + "StringStart", + "Suppress", + "Token", + "TokenConverter", + "White", + "Word", + "WordEnd", + "WordStart", + "ZeroOrMore", + "Char", + "alphanums", + "alphas", + "alphas8bit", + "any_close_tag", + "any_open_tag", + "c_style_comment", + "col", + "common_html_entity", + "counted_array", + "cpp_style_comment", + "dbl_quoted_string", + "dbl_slash_comment", + "delimited_list", + "dict_of", + "empty", + "hexnums", + "html_comment", + "identchars", + "identbodychars", + "java_style_comment", + "line", + "line_end", + "line_start", + "lineno", + "make_html_tags", + "make_xml_tags", + "match_only_at_col", + "match_previous_expr", + "match_previous_literal", + "nested_expr", + "null_debug_action", + "nums", + "one_of", + "printables", + "punc8bit", + "python_style_comment", + "quoted_string", + "remove_quotes", + "replace_with", + "replace_html_entity", + "rest_of_line", + "sgl_quoted_string", + "srange", + "string_end", + "string_start", + "trace_parse_action", + "unicode_string", + "with_attribute", + "indentedBlock", + "original_text_for", + "ungroup", + "infix_notation", + "locatedExpr", + "with_class", + "CloseMatch", + "token_map", + "pyparsing_common", + "pyparsing_unicode", + "unicode_set", + "condition_as_parse_action", + "pyparsing_test", + # pre-PEP8 compatibility names + "__versionTime__", + "anyCloseTag", + "anyOpenTag", + "cStyleComment", + "commonHTMLEntity", + "countedArray", + "cppStyleComment", + "dblQuotedString", + "dblSlashComment", + "delimitedList", + "dictOf", + "htmlComment", + "javaStyleComment", + "lineEnd", + "lineStart", + "makeHTMLTags", + "makeXMLTags", + "matchOnlyAtCol", + "matchPreviousExpr", + "matchPreviousLiteral", + "nestedExpr", + "nullDebugAction", + "oneOf", + "opAssoc", + "pythonStyleComment", + "quotedString", + "removeQuotes", + "replaceHTMLEntity", + "replaceWith", + "restOfLine", + "sglQuotedString", + "stringEnd", + "stringStart", + "traceParseAction", + "unicodeString", + "withAttribute", + "indentedBlock", + "originalTextFor", + "infixNotation", + "locatedExpr", + "withClass", + "tokenMap", + "conditionAsParseAction", + "autoname_elements", +] diff --git a/libs/common/pkg_resources/_vendor/pyparsing/actions.py b/libs/common/pkg_resources/_vendor/pyparsing/actions.py new file mode 100644 index 00000000..f72c66e7 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/actions.py @@ -0,0 +1,207 @@ +# actions.py + +from .exceptions import ParseException +from .util import col + + +class OnlyOnce: + """ + Wrapper for parse actions, to ensure they are only called once. + """ + + def __init__(self, method_call): + from .core import _trim_arity + + self.callable = _trim_arity(method_call) + self.called = False + + def __call__(self, s, l, t): + if not self.called: + results = self.callable(s, l, t) + self.called = True + return results + raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset") + + def reset(self): + """ + Allow the associated parse action to be called once more. + """ + + self.called = False + + +def match_only_at_col(n): + """ + Helper method for defining parse actions that require matching at + a specific column in the input text. + """ + + def verify_col(strg, locn, toks): + if col(locn, strg) != n: + raise ParseException(strg, locn, "matched token not at column {}".format(n)) + + return verify_col + + +def replace_with(repl_str): + """ + Helper method for common parse actions that simply return + a literal value. Especially useful when used with + :class:`transform_string` (). + + Example:: + + num = Word(nums).set_parse_action(lambda toks: int(toks[0])) + na = one_of("N/A NA").set_parse_action(replace_with(math.nan)) + term = na | num + + term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234] + """ + return lambda s, l, t: [repl_str] + + +def remove_quotes(s, l, t): + """ + Helper parse action for removing quotation marks from parsed + quoted strings. + + Example:: + + # by default, quotation marks are included in parsed results + quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] + + # use remove_quotes to strip quotation marks from parsed results + quoted_string.set_parse_action(remove_quotes) + quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] + """ + return t[0][1:-1] + + +def with_attribute(*args, **attr_dict): + """ + Helper to create a validating parse action to be used with start + tags created with :class:`make_xml_tags` or + :class:`make_html_tags`. Use ``with_attribute`` to qualify + a starting tag with a required attribute value, to avoid false + matches on common tags such as ```` or ``
``. + + Call ``with_attribute`` with a series of attribute names and + values. Specify the list of filter attributes names and values as: + + - keyword arguments, as in ``(align="right")``, or + - as an explicit dict with ``**`` operator, when an attribute + name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` + - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` + + For attribute names with a namespace prefix, you must use the second + form. Attribute names are matched insensitive to upper/lower case. + + If just testing for ``class`` (with or without a namespace), use + :class:`with_class`. + + To verify that the attribute exists, but without specifying a value, + pass ``with_attribute.ANY_VALUE`` as the value. + + Example:: + + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this has no type
+
+ + ''' + div,div_end = make_html_tags("div") + + # only match div tag having a type attribute with value "grid" + div_grid = div().set_parse_action(with_attribute(type="grid")) + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.search_string(html): + print(grid_header.body) + + # construct a match with any div tag having a type attribute, regardless of the value + div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.search_string(html): + print(div_header.body) + + prints:: + + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + if args: + attrs = args[:] + else: + attrs = attr_dict.items() + attrs = [(k, v) for k, v in attrs] + + def pa(s, l, tokens): + for attrName, attrValue in attrs: + if attrName not in tokens: + raise ParseException(s, l, "no matching attribute " + attrName) + if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException( + s, + l, + "attribute {!r} has value {!r}, must be {!r}".format( + attrName, tokens[attrName], attrValue + ), + ) + + return pa + + +with_attribute.ANY_VALUE = object() + + +def with_class(classname, namespace=""): + """ + Simplified version of :class:`with_attribute` when + matching on a div class - made difficult because ``class`` is + a reserved word in Python. + + Example:: + + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this <div> has no class
+
+ + ''' + div,div_end = make_html_tags("div") + div_grid = div().set_parse_action(with_class("grid")) + + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.search_string(html): + print(grid_header.body) + + div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.search_string(html): + print(div_header.body) + + prints:: + + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + classattr = "{}:class".format(namespace) if namespace else "class" + return with_attribute(**{classattr: classname}) + + +# pre-PEP8 compatibility symbols +replaceWith = replace_with +removeQuotes = remove_quotes +withAttribute = with_attribute +withClass = with_class +matchOnlyAtCol = match_only_at_col diff --git a/libs/common/pkg_resources/_vendor/pyparsing/common.py b/libs/common/pkg_resources/_vendor/pyparsing/common.py new file mode 100644 index 00000000..1859fb79 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/common.py @@ -0,0 +1,424 @@ +# common.py +from .core import * +from .helpers import delimited_list, any_open_tag, any_close_tag +from datetime import datetime + + +# some other useful expressions - using lower-case class name since we are really using this as a namespace +class pyparsing_common: + """Here are some common low-level expressions that may be useful in + jump-starting parser development: + + - numeric forms (:class:`integers`, :class:`reals`, + :class:`scientific notation`) + - common :class:`programming identifiers` + - network addresses (:class:`MAC`, + :class:`IPv4`, :class:`IPv6`) + - ISO8601 :class:`dates` and + :class:`datetime` + - :class:`UUID` + - :class:`comma-separated list` + - :class:`url` + + Parse actions: + + - :class:`convertToInteger` + - :class:`convertToFloat` + - :class:`convertToDate` + - :class:`convertToDatetime` + - :class:`stripHTMLTags` + - :class:`upcaseTokens` + - :class:`downcaseTokens` + + Example:: + + pyparsing_common.number.runTests(''' + # any int or real number, returned as the appropriate type + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.fnumber.runTests(''' + # any int or real number, returned as float + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.hex_integer.runTests(''' + # hex numbers + 100 + FF + ''') + + pyparsing_common.fraction.runTests(''' + # fractions + 1/2 + -3/4 + ''') + + pyparsing_common.mixed_integer.runTests(''' + # mixed fractions + 1 + 1/2 + -3/4 + 1-3/4 + ''') + + import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) + pyparsing_common.uuid.runTests(''' + # uuid + 12345678-1234-5678-1234-567812345678 + ''') + + prints:: + + # any int or real number, returned as the appropriate type + 100 + [100] + + -100 + [-100] + + +100 + [100] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # any int or real number, returned as float + 100 + [100.0] + + -100 + [-100.0] + + +100 + [100.0] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # hex numbers + 100 + [256] + + FF + [255] + + # fractions + 1/2 + [0.5] + + -3/4 + [-0.75] + + # mixed fractions + 1 + [1] + + 1/2 + [0.5] + + -3/4 + [-0.75] + + 1-3/4 + [1.75] + + # uuid + 12345678-1234-5678-1234-567812345678 + [UUID('12345678-1234-5678-1234-567812345678')] + """ + + convert_to_integer = token_map(int) + """ + Parse action for converting parsed integers to Python int + """ + + convert_to_float = token_map(float) + """ + Parse action for converting parsed numbers to Python float + """ + + integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer) + """expression that parses an unsigned integer, returns an int""" + + hex_integer = ( + Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) + ) + """expression that parses a hexadecimal integer, returns an int""" + + signed_integer = ( + Regex(r"[+-]?\d+") + .set_name("signed integer") + .set_parse_action(convert_to_integer) + ) + """expression that parses an integer with optional leading sign, returns an int""" + + fraction = ( + signed_integer().set_parse_action(convert_to_float) + + "/" + + signed_integer().set_parse_action(convert_to_float) + ).set_name("fraction") + """fractional expression of an integer divided by an integer, returns a float""" + fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) + + mixed_integer = ( + fraction | signed_integer + Opt(Opt("-").suppress() + fraction) + ).set_name("fraction or mixed integer-fraction") + """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" + mixed_integer.add_parse_action(sum) + + real = ( + Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") + .set_name("real number") + .set_parse_action(convert_to_float) + ) + """expression that parses a floating point number and returns a float""" + + sci_real = ( + Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") + .set_name("real number with scientific notation") + .set_parse_action(convert_to_float) + ) + """expression that parses a floating point number with optional + scientific notation and returns a float""" + + # streamlining this expression makes the docs nicer-looking + number = (sci_real | real | signed_integer).setName("number").streamline() + """any numeric expression, returns the corresponding Python type""" + + fnumber = ( + Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") + .set_name("fnumber") + .set_parse_action(convert_to_float) + ) + """any int or real number, returned as float""" + + identifier = Word(identchars, identbodychars).set_name("identifier") + """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" + + ipv4_address = Regex( + r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" + ).set_name("IPv4 address") + "IPv4 address (``0.0.0.0 - 255.255.255.255``)" + + _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") + _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( + "full IPv6 address" + ) + _short_ipv6_address = ( + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) + + "::" + + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) + ).set_name("short IPv6 address") + _short_ipv6_address.add_condition( + lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 + ) + _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") + ipv6_address = Combine( + (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( + "IPv6 address" + ) + ).set_name("IPv6 address") + "IPv6 address (long, short, or mixed form)" + + mac_address = Regex( + r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" + ).set_name("MAC address") + "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" + + @staticmethod + def convert_to_date(fmt: str = "%Y-%m-%d"): + """ + Helper to create a parse action for converting parsed date string to Python datetime.date + + Params - + - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) + + Example:: + + date_expr = pyparsing_common.iso8601_date.copy() + date_expr.setParseAction(pyparsing_common.convertToDate()) + print(date_expr.parseString("1999-12-31")) + + prints:: + + [datetime.date(1999, 12, 31)] + """ + + def cvt_fn(ss, ll, tt): + try: + return datetime.strptime(tt[0], fmt).date() + except ValueError as ve: + raise ParseException(ss, ll, str(ve)) + + return cvt_fn + + @staticmethod + def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): + """Helper to create a parse action for converting parsed + datetime string to Python datetime.datetime + + Params - + - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) + + Example:: + + dt_expr = pyparsing_common.iso8601_datetime.copy() + dt_expr.setParseAction(pyparsing_common.convertToDatetime()) + print(dt_expr.parseString("1999-12-31T23:59:59.999")) + + prints:: + + [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] + """ + + def cvt_fn(s, l, t): + try: + return datetime.strptime(t[0], fmt) + except ValueError as ve: + raise ParseException(s, l, str(ve)) + + return cvt_fn + + iso8601_date = Regex( + r"(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?" + ).set_name("ISO8601 date") + "ISO8601 date (``yyyy-mm-dd``)" + + iso8601_datetime = Regex( + r"(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?" + ).set_name("ISO8601 datetime") + "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" + + uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID") + "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" + + _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() + + @staticmethod + def strip_html_tags(s: str, l: int, tokens: ParseResults): + """Parse action to remove HTML tags from web page HTML source + + Example:: + + # strip HTML links from normal text + text = 'More info at the pyparsing wiki page' + td, td_end = makeHTMLTags("TD") + table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end + print(table_text.parseString(text).body) + + Prints:: + + More info at the pyparsing wiki page + """ + return pyparsing_common._html_stripper.transform_string(tokens[0]) + + _commasepitem = ( + Combine( + OneOrMore( + ~Literal(",") + + ~LineEnd() + + Word(printables, exclude_chars=",") + + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) + ) + ) + .streamline() + .set_name("commaItem") + ) + comma_separated_list = delimited_list( + Opt(quoted_string.copy() | _commasepitem, default="") + ).set_name("comma separated list") + """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" + + upcase_tokens = staticmethod(token_map(lambda t: t.upper())) + """Parse action to convert tokens to upper case.""" + + downcase_tokens = staticmethod(token_map(lambda t: t.lower())) + """Parse action to convert tokens to lower case.""" + + # fmt: off + url = Regex( + # https://mathiasbynens.be/demo/url-regex + # https://gist.github.com/dperini/729294 + r"^" + + # protocol identifier (optional) + # short syntax // still required + r"(?:(?:(?Phttps?|ftp):)?\/\/)" + + # user:pass BasicAuth (optional) + r"(?:(?P\S+(?::\S*)?)@)?" + + r"(?P" + + # IP address exclusion + # private & local networks + r"(?!(?:10|127)(?:\.\d{1,3}){3})" + + r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + + r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + + # IP address dotted notation octets + # excludes loopback network 0.0.0.0 + # excludes reserved space >= 224.0.0.0 + # excludes network & broadcast addresses + # (first & last IP address of each class) + r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + + r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + + r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + + r"|" + + # host & domain names, may end with dot + # can be replaced by a shortest alternative + # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ + r"(?:" + + r"(?:" + + r"[a-z0-9\u00a1-\uffff]" + + r"[a-z0-9\u00a1-\uffff_-]{0,62}" + + r")?" + + r"[a-z0-9\u00a1-\uffff]\." + + r")+" + + # TLD identifier name, may end with dot + r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + + r")" + + # port number (optional) + r"(:(?P\d{2,5}))?" + + # resource path (optional) + r"(?P\/[^?# ]*)?" + + # query string (optional) + r"(\?(?P[^#]*))?" + + # fragment (optional) + r"(#(?P\S*))?" + + r"$" + ).set_name("url") + # fmt: on + + # pre-PEP8 compatibility names + convertToInteger = convert_to_integer + convertToFloat = convert_to_float + convertToDate = convert_to_date + convertToDatetime = convert_to_datetime + stripHTMLTags = strip_html_tags + upcaseTokens = upcase_tokens + downcaseTokens = downcase_tokens + + +_builtin_exprs = [ + v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) +] diff --git a/libs/common/pkg_resources/_vendor/pyparsing/core.py b/libs/common/pkg_resources/_vendor/pyparsing/core.py new file mode 100644 index 00000000..9acba3f3 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/core.py @@ -0,0 +1,5814 @@ +# +# core.py +# +import os +import typing +from typing import ( + NamedTuple, + Union, + Callable, + Any, + Generator, + Tuple, + List, + TextIO, + Set, + Sequence, +) +from abc import ABC, abstractmethod +from enum import Enum +import string +import copy +import warnings +import re +import sys +from collections.abc import Iterable +import traceback +import types +from operator import itemgetter +from functools import wraps +from threading import RLock +from pathlib import Path + +from .util import ( + _FifoCache, + _UnboundedCache, + __config_flags, + _collapse_string_to_ranges, + _escape_regex_range_chars, + _bslash, + _flatten, + LRUMemo as _LRUMemo, + UnboundedMemo as _UnboundedMemo, +) +from .exceptions import * +from .actions import * +from .results import ParseResults, _ParseResultsWithOffset +from .unicode import pyparsing_unicode + +_MAX_INT = sys.maxsize +str_type: Tuple[type, ...] = (str, bytes) + +# +# Copyright (c) 2003-2022 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + + +if sys.version_info >= (3, 8): + from functools import cached_property +else: + + class cached_property: + def __init__(self, func): + self._func = func + + def __get__(self, instance, owner=None): + ret = instance.__dict__[self._func.__name__] = self._func(instance) + return ret + + +class __compat__(__config_flags): + """ + A cross-version compatibility configuration for pyparsing features that will be + released in a future version. By setting values in this configuration to True, + those features can be enabled in prior versions for compatibility development + and testing. + + - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping + of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; + maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 + behavior + """ + + _type_desc = "compatibility" + + collect_all_And_tokens = True + + _all_names = [__ for __ in locals() if not __.startswith("_")] + _fixed_names = """ + collect_all_And_tokens + """.split() + + +class __diag__(__config_flags): + _type_desc = "diagnostic" + + warn_multiple_tokens_in_named_alternation = False + warn_ungrouped_named_tokens_in_collection = False + warn_name_set_on_empty_Forward = False + warn_on_parse_using_empty_Forward = False + warn_on_assignment_to_Forward = False + warn_on_multiple_string_args_to_oneof = False + warn_on_match_first_with_lshift_operator = False + enable_debug_on_named_expressions = False + + _all_names = [__ for __ in locals() if not __.startswith("_")] + _warning_names = [name for name in _all_names if name.startswith("warn")] + _debug_names = [name for name in _all_names if name.startswith("enable_debug")] + + @classmethod + def enable_all_warnings(cls) -> None: + for name in cls._warning_names: + cls.enable(name) + + +class Diagnostics(Enum): + """ + Diagnostic configuration (all default to disabled) + - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results + name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions + - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results + name is defined on a containing expression with ungrouped subexpressions that also + have results names + - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined + with a results name, but has no contents defined + - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is + defined in a grammar but has never had an expression attached to it + - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined + but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` + - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is + incorrectly called with multiple str arguments + - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent + calls to :class:`ParserElement.set_name` + + Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. + All warnings can be enabled by calling :class:`enable_all_warnings`. + """ + + warn_multiple_tokens_in_named_alternation = 0 + warn_ungrouped_named_tokens_in_collection = 1 + warn_name_set_on_empty_Forward = 2 + warn_on_parse_using_empty_Forward = 3 + warn_on_assignment_to_Forward = 4 + warn_on_multiple_string_args_to_oneof = 5 + warn_on_match_first_with_lshift_operator = 6 + enable_debug_on_named_expressions = 7 + + +def enable_diag(diag_enum: Diagnostics) -> None: + """ + Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). + """ + __diag__.enable(diag_enum.name) + + +def disable_diag(diag_enum: Diagnostics) -> None: + """ + Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). + """ + __diag__.disable(diag_enum.name) + + +def enable_all_warnings() -> None: + """ + Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). + """ + __diag__.enable_all_warnings() + + +# hide abstract class +del __config_flags + + +def _should_enable_warnings( + cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] +) -> bool: + enable = bool(warn_env_var) + for warn_opt in cmd_line_warn_options: + w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( + ":" + )[:5] + if not w_action.lower().startswith("i") and ( + not (w_message or w_category or w_module) or w_module == "pyparsing" + ): + enable = True + elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): + enable = False + return enable + + +if _should_enable_warnings( + sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") +): + enable_all_warnings() + + +# build list of single arg builtins, that can be used as parse actions +_single_arg_builtins = { + sum, + len, + sorted, + reversed, + list, + tuple, + set, + any, + all, + min, + max, +} + +_generatorType = types.GeneratorType +ParseAction = Union[ + Callable[[], Any], + Callable[[ParseResults], Any], + Callable[[int, ParseResults], Any], + Callable[[str, int, ParseResults], Any], +] +ParseCondition = Union[ + Callable[[], bool], + Callable[[ParseResults], bool], + Callable[[int, ParseResults], bool], + Callable[[str, int, ParseResults], bool], +] +ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] +DebugStartAction = Callable[[str, int, "ParserElement", bool], None] +DebugSuccessAction = Callable[ + [str, int, int, "ParserElement", ParseResults, bool], None +] +DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] + + +alphas = string.ascii_uppercase + string.ascii_lowercase +identchars = pyparsing_unicode.Latin1.identchars +identbodychars = pyparsing_unicode.Latin1.identbodychars +nums = "0123456789" +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +printables = "".join([c for c in string.printable if c not in string.whitespace]) + +_trim_arity_call_line: traceback.StackSummary = None + + +def _trim_arity(func, max_limit=3): + """decorator to trim function calls to match the arity of the target""" + global _trim_arity_call_line + + if func in _single_arg_builtins: + return lambda s, l, t: func(t) + + limit = 0 + found_arity = False + + def extract_tb(tb, limit=0): + frames = traceback.extract_tb(tb, limit=limit) + frame_summary = frames[-1] + return [frame_summary[:2]] + + # synthesize what would be returned by traceback.extract_stack at the call to + # user's parse action 'func', so that we don't incur call penalty at parse time + + # fmt: off + LINE_DIFF = 7 + # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND + # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! + _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) + pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) + + def wrapper(*args): + nonlocal found_arity, limit + while 1: + try: + ret = func(*args[limit:]) + found_arity = True + return ret + except TypeError as te: + # re-raise TypeErrors if they did not come from our arity testing + if found_arity: + raise + else: + tb = te.__traceback__ + trim_arity_type_error = ( + extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth + ) + del tb + + if trim_arity_type_error: + if limit < max_limit: + limit += 1 + continue + + raise + # fmt: on + + # copy func name to wrapper for sensible debug output + # (can't use functools.wraps, since that messes with function signature) + func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) + wrapper.__name__ = func_name + wrapper.__doc__ = func.__doc__ + + return wrapper + + +def condition_as_parse_action( + fn: ParseCondition, message: str = None, fatal: bool = False +) -> ParseAction: + """ + Function to convert a simple predicate function that returns ``True`` or ``False`` + into a parse action. Can be used in places when a parse action is required + and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition + to an operator level in :class:`infix_notation`). + + Optional keyword arguments: + + - ``message`` - define a custom message to be used in the raised exception + - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; + otherwise will raise :class:`ParseException` + + """ + msg = message if message is not None else "failed user-defined condition" + exc_type = ParseFatalException if fatal else ParseException + fn = _trim_arity(fn) + + @wraps(fn) + def pa(s, l, t): + if not bool(fn(s, l, t)): + raise exc_type(s, l, msg) + + return pa + + +def _default_start_debug_action( + instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False +): + cache_hit_str = "*" if cache_hit else "" + print( + ( + "{}Match {} at loc {}({},{})\n {}\n {}^".format( + cache_hit_str, + expr, + loc, + lineno(loc, instring), + col(loc, instring), + line(loc, instring), + " " * (col(loc, instring) - 1), + ) + ) + ) + + +def _default_success_debug_action( + instring: str, + startloc: int, + endloc: int, + expr: "ParserElement", + toks: ParseResults, + cache_hit: bool = False, +): + cache_hit_str = "*" if cache_hit else "" + print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list())) + + +def _default_exception_debug_action( + instring: str, + loc: int, + expr: "ParserElement", + exc: Exception, + cache_hit: bool = False, +): + cache_hit_str = "*" if cache_hit else "" + print( + "{}Match {} failed, {} raised: {}".format( + cache_hit_str, expr, type(exc).__name__, exc + ) + ) + + +def null_debug_action(*args): + """'Do-nothing' debug action, to suppress debugging output during parsing.""" + + +class ParserElement(ABC): + """Abstract base level parser element class.""" + + DEFAULT_WHITE_CHARS: str = " \n\t\r" + verbose_stacktrace: bool = False + _literalStringClass: typing.Optional[type] = None + + @staticmethod + def set_default_whitespace_chars(chars: str) -> None: + r""" + Overrides the default whitespace chars + + Example:: + + # default whitespace chars are space, and newline + Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] + + # change to just treat newline as significant + ParserElement.set_default_whitespace_chars(" \t") + Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] + """ + ParserElement.DEFAULT_WHITE_CHARS = chars + + # update whitespace all parse expressions defined in this module + for expr in _builtin_exprs: + if expr.copyDefaultWhiteChars: + expr.whiteChars = set(chars) + + @staticmethod + def inline_literals_using(cls: type) -> None: + """ + Set class to be used for inclusion of string literals into a parser. + + Example:: + + # default literal class used is Literal + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] + + + # change to Suppress + ParserElement.inline_literals_using(Suppress) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] + """ + ParserElement._literalStringClass = cls + + class DebugActions(NamedTuple): + debug_try: typing.Optional[DebugStartAction] + debug_match: typing.Optional[DebugSuccessAction] + debug_fail: typing.Optional[DebugExceptionAction] + + def __init__(self, savelist: bool = False): + self.parseAction: List[ParseAction] = list() + self.failAction: typing.Optional[ParseFailAction] = None + self.customName = None + self._defaultName = None + self.resultsName = None + self.saveAsList = savelist + self.skipWhitespace = True + self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) + self.copyDefaultWhiteChars = True + # used when checking for left-recursion + self.mayReturnEmpty = False + self.keepTabs = False + self.ignoreExprs: List["ParserElement"] = list() + self.debug = False + self.streamlined = False + # optimize exception handling for subclasses that don't advance parse index + self.mayIndexError = True + self.errmsg = "" + # mark results names as modal (report only last) or cumulative (list all) + self.modalResults = True + # custom debug actions + self.debugActions = self.DebugActions(None, None, None) + # avoid redundant calls to preParse + self.callPreparse = True + self.callDuringTry = False + self.suppress_warnings_: List[Diagnostics] = [] + + def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": + """ + Suppress warnings emitted for a particular diagnostic on this expression. + + Example:: + + base = pp.Forward() + base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) + + # statement would normally raise a warning, but is now suppressed + print(base.parseString("x")) + + """ + self.suppress_warnings_.append(warning_type) + return self + + def copy(self) -> "ParserElement": + """ + Make a copy of this :class:`ParserElement`. Useful for defining + different parse actions for the same parsing pattern, using copies of + the original parse element. + + Example:: + + integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) + integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") + integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") + + print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) + + prints:: + + [5120, 100, 655360, 268435456] + + Equivalent form of ``expr.copy()`` is just ``expr()``:: + + integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") + """ + cpy = copy.copy(self) + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) + return cpy + + def set_results_name( + self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False + ) -> "ParserElement": + """ + Define name for referencing matching tokens as a nested attribute + of the returned parse results. + + Normally, results names are assigned as you would assign keys in a dict: + any existing value is overwritten by later values. If it is necessary to + keep all values captured for a particular results name, call ``set_results_name`` + with ``list_all_matches`` = True. + + NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + + You can also set results names using the abbreviated syntax, + ``expr("name")`` in place of ``expr.set_results_name("name")`` + - see :class:`__call__`. If ``list_all_matches`` is required, use + ``expr("name*")``. + + Example:: + + date_str = (integer.set_results_name("year") + '/' + + integer.set_results_name("month") + '/' + + integer.set_results_name("day")) + + # equivalent form: + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + """ + listAllMatches = listAllMatches or list_all_matches + return self._setResultsName(name, listAllMatches) + + def _setResultsName(self, name, listAllMatches=False): + if name is None: + return self + newself = self.copy() + if name.endswith("*"): + name = name[:-1] + listAllMatches = True + newself.resultsName = name + newself.modalResults = not listAllMatches + return newself + + def set_break(self, break_flag: bool = True) -> "ParserElement": + """ + Method to invoke the Python pdb debugger when this element is + about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to + disable. + """ + if break_flag: + _parseMethod = self._parse + + def breaker(instring, loc, doActions=True, callPreParse=True): + import pdb + + # this call to pdb.set_trace() is intentional, not a checkin error + pdb.set_trace() + return _parseMethod(instring, loc, doActions, callPreParse) + + breaker._originalParseMethod = _parseMethod + self._parse = breaker + else: + if hasattr(self._parse, "_originalParseMethod"): + self._parse = self._parse._originalParseMethod + return self + + def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": + """ + Define one or more actions to perform when successfully matching parse element definition. + + Parse actions can be called to perform data conversions, do extra validation, + update external data structures, or enhance or replace the parsed tokens. + Each parse action ``fn`` is a callable method with 0-3 arguments, called as + ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: + + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object + + The parsed tokens are passed to the parse action as ParseResults. They can be + modified in place using list-style append, extend, and pop operations to update + the parsed list elements; and with dictionary-style item set and del operations + to add, update, or remove any named results. If the tokens are modified in place, + it is not necessary to return them with a return statement. + + Parse actions can also completely replace the given tokens, with another ``ParseResults`` + object, or with some entirely different object (common for parse actions that perform data + conversions). A convenient way to build a new parse result is to define the values + using a dict, and then create the return value using :class:`ParseResults.from_dict`. + + If None is passed as the ``fn`` parse action, all previously added parse actions for this + expression are cleared. + + Optional keyword arguments: + + - call_during_try = (default= ``False``) indicate if parse action should be run during + lookaheads and alternate testing. For parse actions that have side effects, it is + important to only call the parse action once it is determined that it is being + called as part of a successful parse. For parse actions that perform additional + validation, then call_during_try should be passed as True, so that the validation + code is included in the preliminary "try" parses. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See :class:`parse_string` for more + information on parsing strings containing ```` s, and suggested + methods to maintain a consistent view of the parsed string, the parse + location, and line and column positions within the parsed string. + + Example:: + + # parse dates in the form YYYY/MM/DD + + # use parse action to convert toks from str to int at parse time + def convert_to_int(toks): + return int(toks[0]) + + # use a parse action to verify that the date is a valid date + def is_valid_date(instring, loc, toks): + from datetime import date + year, month, day = toks[::2] + try: + date(year, month, day) + except ValueError: + raise ParseException(instring, loc, "invalid date given") + + integer = Word(nums) + date_str = integer + '/' + integer + '/' + integer + + # add parse actions + integer.set_parse_action(convert_to_int) + date_str.set_parse_action(is_valid_date) + + # note that integer fields are now ints, not strings + date_str.run_tests(''' + # successful parse - note that integer fields were converted to ints + 1999/12/31 + + # fail - invalid date + 1999/13/31 + ''') + """ + if list(fns) == [None]: + self.parseAction = [] + else: + if not all(callable(fn) for fn in fns): + raise TypeError("parse actions must be callable") + self.parseAction = [_trim_arity(fn) for fn in fns] + self.callDuringTry = kwargs.get( + "call_during_try", kwargs.get("callDuringTry", False) + ) + return self + + def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": + """ + Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. + + See examples in :class:`copy`. + """ + self.parseAction += [_trim_arity(fn) for fn in fns] + self.callDuringTry = self.callDuringTry or kwargs.get( + "call_during_try", kwargs.get("callDuringTry", False) + ) + return self + + def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": + """Add a boolean predicate function to expression's list of parse actions. See + :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, + functions passed to ``add_condition`` need to return boolean success/fail of the condition. + + Optional keyword arguments: + + - message = define a custom message to be used in the raised exception + - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise + ParseException + - call_during_try = boolean to indicate if this method should be called during internal tryParse calls, + default=False + + Example:: + + integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) + year_int = integer.copy() + year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") + date_str = year_int + '/' + integer + '/' + integer + + result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), + (line:1, col:1) + """ + for fn in fns: + self.parseAction.append( + condition_as_parse_action( + fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False) + ) + ) + + self.callDuringTry = self.callDuringTry or kwargs.get( + "call_during_try", kwargs.get("callDuringTry", False) + ) + return self + + def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": + """ + Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + ``fn(s, loc, expr, err)`` where: + + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + + The function returns no value. It may throw :class:`ParseFatalException` + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + + def _skipIgnorables(self, instring, loc): + exprsFound = True + while exprsFound: + exprsFound = False + for e in self.ignoreExprs: + try: + while 1: + loc, dummy = e._parse(instring, loc) + exprsFound = True + except ParseException: + pass + return loc + + def preParse(self, instring, loc): + if self.ignoreExprs: + loc = self._skipIgnorables(instring, loc) + + if self.skipWhitespace: + instrlen = len(instring) + white_chars = self.whiteChars + while loc < instrlen and instring[loc] in white_chars: + loc += 1 + + return loc + + def parseImpl(self, instring, loc, doActions=True): + return loc, [] + + def postParse(self, instring, loc, tokenlist): + return tokenlist + + # @profile + def _parseNoCache( + self, instring, loc, doActions=True, callPreParse=True + ) -> Tuple[int, ParseResults]: + TRY, MATCH, FAIL = 0, 1, 2 + debugging = self.debug # and doActions) + len_instring = len(instring) + + if debugging or self.failAction: + # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) + try: + if callPreParse and self.callPreparse: + pre_loc = self.preParse(instring, loc) + else: + pre_loc = loc + tokens_start = pre_loc + if self.debugActions.debug_try: + self.debugActions.debug_try(instring, tokens_start, self, False) + if self.mayIndexError or pre_loc >= len_instring: + try: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + except IndexError: + raise ParseException(instring, len_instring, self.errmsg, self) + else: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + except Exception as err: + # print("Exception raised:", err) + if self.debugActions.debug_fail: + self.debugActions.debug_fail( + instring, tokens_start, self, err, False + ) + if self.failAction: + self.failAction(instring, tokens_start, self, err) + raise + else: + if callPreParse and self.callPreparse: + pre_loc = self.preParse(instring, loc) + else: + pre_loc = loc + tokens_start = pre_loc + if self.mayIndexError or pre_loc >= len_instring: + try: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + except IndexError: + raise ParseException(instring, len_instring, self.errmsg, self) + else: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + + tokens = self.postParse(instring, loc, tokens) + + ret_tokens = ParseResults( + tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults + ) + if self.parseAction and (doActions or self.callDuringTry): + if debugging: + try: + for fn in self.parseAction: + try: + tokens = fn(instring, tokens_start, ret_tokens) + except IndexError as parse_action_exc: + exc = ParseException("exception raised in parse action") + raise exc from parse_action_exc + + if tokens is not None and tokens is not ret_tokens: + ret_tokens = ParseResults( + tokens, + self.resultsName, + asList=self.saveAsList + and isinstance(tokens, (ParseResults, list)), + modal=self.modalResults, + ) + except Exception as err: + # print "Exception raised in user parse action:", err + if self.debugActions.debug_fail: + self.debugActions.debug_fail( + instring, tokens_start, self, err, False + ) + raise + else: + for fn in self.parseAction: + try: + tokens = fn(instring, tokens_start, ret_tokens) + except IndexError as parse_action_exc: + exc = ParseException("exception raised in parse action") + raise exc from parse_action_exc + + if tokens is not None and tokens is not ret_tokens: + ret_tokens = ParseResults( + tokens, + self.resultsName, + asList=self.saveAsList + and isinstance(tokens, (ParseResults, list)), + modal=self.modalResults, + ) + if debugging: + # print("Matched", self, "->", ret_tokens.as_list()) + if self.debugActions.debug_match: + self.debugActions.debug_match( + instring, tokens_start, loc, self, ret_tokens, False + ) + + return loc, ret_tokens + + def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int: + try: + return self._parse(instring, loc, doActions=False)[0] + except ParseFatalException: + if raise_fatal: + raise + raise ParseException(instring, loc, self.errmsg, self) + + def can_parse_next(self, instring: str, loc: int) -> bool: + try: + self.try_parse(instring, loc) + except (ParseException, IndexError): + return False + else: + return True + + # cache for left-recursion in Forward references + recursion_lock = RLock() + recursion_memos: typing.Dict[ + Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] + ] = {} + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + packrat_cache = ( + {} + ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail + packrat_cache_lock = RLock() + packrat_cache_stats = [0, 0] + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( + self, instring, loc, doActions=True, callPreParse=True + ) -> Tuple[int, ParseResults]: + HIT, MISS = 0, 1 + TRY, MATCH, FAIL = 0, 1, 2 + lookup = (self, instring, loc, callPreParse, doActions) + with ParserElement.packrat_cache_lock: + cache = ParserElement.packrat_cache + value = cache.get(lookup) + if value is cache.not_in_cache: + ParserElement.packrat_cache_stats[MISS] += 1 + try: + value = self._parseNoCache(instring, loc, doActions, callPreParse) + except ParseBaseException as pe: + # cache a copy of the exception, without the traceback + cache.set(lookup, pe.__class__(*pe.args)) + raise + else: + cache.set(lookup, (value[0], value[1].copy(), loc)) + return value + else: + ParserElement.packrat_cache_stats[HIT] += 1 + if self.debug and self.debugActions.debug_try: + try: + self.debugActions.debug_try(instring, loc, self, cache_hit=True) + except TypeError: + pass + if isinstance(value, Exception): + if self.debug and self.debugActions.debug_fail: + try: + self.debugActions.debug_fail( + instring, loc, self, value, cache_hit=True + ) + except TypeError: + pass + raise value + + loc_, result, endloc = value[0], value[1].copy(), value[2] + if self.debug and self.debugActions.debug_match: + try: + self.debugActions.debug_match( + instring, loc_, endloc, self, result, cache_hit=True + ) + except TypeError: + pass + + return loc_, result + + _parse = _parseNoCache + + @staticmethod + def reset_cache() -> None: + ParserElement.packrat_cache.clear() + ParserElement.packrat_cache_stats[:] = [0] * len( + ParserElement.packrat_cache_stats + ) + ParserElement.recursion_memos.clear() + + _packratEnabled = False + _left_recursion_enabled = False + + @staticmethod + def disable_memoization() -> None: + """ + Disables active Packrat or Left Recursion parsing and their memoization + + This method also works if neither Packrat nor Left Recursion are enabled. + This makes it safe to call before activating Packrat nor Left Recursion + to clear any previous settings. + """ + ParserElement.reset_cache() + ParserElement._left_recursion_enabled = False + ParserElement._packratEnabled = False + ParserElement._parse = ParserElement._parseNoCache + + @staticmethod + def enable_left_recursion( + cache_size_limit: typing.Optional[int] = None, *, force=False + ) -> None: + """ + Enables "bounded recursion" parsing, which allows for both direct and indirect + left-recursion. During parsing, left-recursive :class:`Forward` elements are + repeatedly matched with a fixed recursion depth that is gradually increased + until finding the longest match. + + Example:: + + import pyparsing as pp + pp.ParserElement.enable_left_recursion() + + E = pp.Forward("E") + num = pp.Word(pp.nums) + # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... + E <<= E + '+' - num | num + + print(E.parse_string("1+2+3")) + + Recursion search naturally memoizes matches of ``Forward`` elements and may + thus skip reevaluation of parse actions during backtracking. This may break + programs with parse actions which rely on strict ordering of side-effects. + + Parameters: + + - cache_size_limit - (default=``None``) - memoize at most this many + ``Forward`` elements during matching; if ``None`` (the default), + memoize all ``Forward`` elements. + + Bounded Recursion parsing works similar but not identical to Packrat parsing, + thus the two cannot be used together. Use ``force=True`` to disable any + previous, conflicting settings. + """ + if force: + ParserElement.disable_memoization() + elif ParserElement._packratEnabled: + raise RuntimeError("Packrat and Bounded Recursion are not compatible") + if cache_size_limit is None: + ParserElement.recursion_memos = _UnboundedMemo() + elif cache_size_limit > 0: + ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) + else: + raise NotImplementedError("Memo size of %s" % cache_size_limit) + ParserElement._left_recursion_enabled = True + + @staticmethod + def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: + """ + Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + Parameters: + + - cache_size_limit - (default= ``128``) - if an integer value is provided + will limit the size of the packrat cache; if None is passed, then + the cache size will be unbounded; if 0 is passed, the cache will + be effectively disabled. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method :class:`ParserElement.enable_packrat`. + For best results, call ``enable_packrat()`` immediately after + importing pyparsing. + + Example:: + + import pyparsing + pyparsing.ParserElement.enable_packrat() + + Packrat parsing works similar but not identical to Bounded Recursion parsing, + thus the two cannot be used together. Use ``force=True`` to disable any + previous, conflicting settings. + """ + if force: + ParserElement.disable_memoization() + elif ParserElement._left_recursion_enabled: + raise RuntimeError("Packrat and Bounded Recursion are not compatible") + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + if cache_size_limit is None: + ParserElement.packrat_cache = _UnboundedCache() + else: + ParserElement.packrat_cache = _FifoCache(cache_size_limit) + ParserElement._parse = ParserElement._parseCache + + def parse_string( + self, instring: str, parse_all: bool = False, *, parseAll: bool = False + ) -> ParseResults: + """ + Parse a string with respect to the parser definition. This function is intended as the primary interface to the + client code. + + :param instring: The input string to be parsed. + :param parse_all: If set, the entire input string must match the grammar. + :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. + :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. + :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or + an object with attributes if the given parser includes results names. + + If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This + is also equivalent to ending the grammar with :class:`StringEnd`(). + + To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are + converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string + contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string + being parsed, one can ensure a consistent view of the input string by doing one of the following: + + - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), + - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the + parse action's ``s`` argument, or + - explicitly expand the tabs in your input string before calling ``parse_string``. + + Examples: + + By default, partial matches are OK. + + >>> res = Word('a').parse_string('aaaaabaaa') + >>> print(res) + ['aaaaa'] + + The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children + directly to see more examples. + + It raises an exception if parse_all flag is set and instring does not match the whole grammar. + + >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) + Traceback (most recent call last): + ... + pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) + """ + parseAll = parse_all or parseAll + + ParserElement.reset_cache() + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + if not self.keepTabs: + instring = instring.expandtabs() + try: + loc, tokens = self._parse(instring, 0) + if parseAll: + loc = self.preParse(instring, loc) + se = Empty() + StringEnd() + se._parse(instring, loc) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clearing out pyparsing internal stack trace + raise exc.with_traceback(None) + else: + return tokens + + def scan_string( + self, + instring: str, + max_matches: int = _MAX_INT, + overlap: bool = False, + *, + debug: bool = False, + maxMatches: int = _MAX_INT, + ) -> Generator[Tuple[ParseResults, int, int], None, None]: + """ + Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + ``max_matches`` argument, to clip scanning after 'n' matches are found. If + ``overlap`` is specified, then overlapping matches will be reported. + + Note that the start and end locations are reported relative to the string + being parsed. See :class:`parse_string` for more information on parsing + strings with embedded tabs. + + Example:: + + source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" + print(source) + for tokens, start, end in Word(alphas).scan_string(source): + print(' '*start + '^'*(end-start)) + print(' '*start + tokens[0]) + + prints:: + + sldjf123lsdjjkf345sldkjf879lkjsfd987 + ^^^^^ + sldjf + ^^^^^^^ + lsdjjkf + ^^^^^^ + sldkjf + ^^^^^^ + lkjsfd + """ + maxMatches = min(maxMatches, max_matches) + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + + if not self.keepTabs: + instring = str(instring).expandtabs() + instrlen = len(instring) + loc = 0 + preparseFn = self.preParse + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + try: + while loc <= instrlen and matches < maxMatches: + try: + preloc = preparseFn(instring, loc) + nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) + except ParseException: + loc = preloc + 1 + else: + if nextLoc > loc: + matches += 1 + if debug: + print( + { + "tokens": tokens.asList(), + "start": preloc, + "end": nextLoc, + } + ) + yield tokens, preloc, nextLoc + if overlap: + nextloc = preparseFn(instring, loc) + if nextloc > loc: + loc = nextLoc + else: + loc += 1 + else: + loc = nextLoc + else: + loc = preloc + 1 + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def transform_string(self, instring: str, *, debug: bool = False) -> str: + """ + Extension to :class:`scan_string`, to modify matching text with modified tokens that may + be returned from a parse action. To use ``transform_string``, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking ``transform_string()`` on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. ``transform_string()`` returns the resulting transformed string. + + Example:: + + wd = Word(alphas) + wd.set_parse_action(lambda toks: toks[0].title()) + + print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) + + prints:: + + Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. + """ + out: List[str] = [] + lastE = 0 + # force preservation of s, to minimize unwanted transformation of string, and to + # keep string locs straight between transform_string and scan_string + self.keepTabs = True + try: + for t, s, e in self.scan_string(instring, debug=debug): + out.append(instring[lastE:s]) + if t: + if isinstance(t, ParseResults): + out += t.as_list() + elif isinstance(t, Iterable) and not isinstance(t, str_type): + out.extend(t) + else: + out.append(t) + lastE = e + out.append(instring[lastE:]) + out = [o for o in out if o] + return "".join([str(s) for s in _flatten(out)]) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def search_string( + self, + instring: str, + max_matches: int = _MAX_INT, + *, + debug: bool = False, + maxMatches: int = _MAX_INT, + ) -> ParseResults: + """ + Another extension to :class:`scan_string`, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + ``max_matches`` argument, to clip searching after 'n' matches are found. + + Example:: + + # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters + cap_word = Word(alphas.upper(), alphas.lower()) + + print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) + + # the sum() builtin can be used to merge results into a single ParseResults object + print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) + + prints:: + + [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] + ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] + """ + maxMatches = min(maxMatches, max_matches) + try: + return ParseResults( + [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] + ) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def split( + self, + instring: str, + maxsplit: int = _MAX_INT, + include_separators: bool = False, + *, + includeSeparators=False, + ) -> Generator[str, None, None]: + """ + Generator method to split a string using the given expression as a separator. + May be called with optional ``maxsplit`` argument, to limit the number of splits; + and the optional ``include_separators`` argument (default= ``False``), if the separating + matching text should be included in the split results. + + Example:: + + punc = one_of(list(".,;:/-!?")) + print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) + + prints:: + + ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] + """ + includeSeparators = includeSeparators or include_separators + last = 0 + for t, s, e in self.scan_string(instring, max_matches=maxsplit): + yield instring[last:s] + if includeSeparators: + yield t[0] + last = e + yield instring[last:] + + def __add__(self, other) -> "ParserElement": + """ + Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` + converts them to :class:`Literal`s by default. + + Example:: + + greet = Word(alphas) + "," + Word(alphas) + "!" + hello = "Hello, World!" + print(hello, "->", greet.parse_string(hello)) + + prints:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + + ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. + + Literal('start') + ... + Literal('end') + + is equivalent to: + + Literal('start') + SkipTo('end')("_skipped*") + Literal('end') + + Note that the skipped text is returned with '_skipped' as a results name, + and to support having multiple skips in the same parser, the value returned is + a list of all skipped text. + """ + if other is Ellipsis: + return _PendingSkip(self) + + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return And([self, other]) + + def __radd__(self, other) -> "ParserElement": + """ + Implementation of ``+`` operator when left operand is not a :class:`ParserElement` + """ + if other is Ellipsis: + return SkipTo(self)("_skipped*") + self + + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other + self + + def __sub__(self, other) -> "ParserElement": + """ + Implementation of ``-`` operator, returns :class:`And` with error stop + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return self + And._ErrorStop() + other + + def __rsub__(self, other) -> "ParserElement": + """ + Implementation of ``-`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other - self + + def __mul__(self, other) -> "ParserElement": + """ + Implementation of ``*`` operator, allows use of ``expr * 3`` in place of + ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer + tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples + may also include ``None`` as in: + - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") + - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` + (read as "0 to n instances of ``expr``") + - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` + - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` + + Note that ``expr*(None, n)`` does not raise an exception if + more than n exprs exist in the input stream; that is, + ``expr*(None, n)`` does not enforce a maximum number of expr + occurrences. If this behavior is desired, then write + ``expr*(None, n) + ~expr`` + """ + if other is Ellipsis: + other = (0, None) + elif isinstance(other, tuple) and other[:1] == (Ellipsis,): + other = ((0,) + other[1:] + (None,))[:2] + + if isinstance(other, int): + minElements, optElements = other, 0 + elif isinstance(other, tuple): + other = tuple(o if o is not Ellipsis else None for o in other) + other = (other + (None, None))[:2] + if other[0] is None: + other = (0, other[1]) + if isinstance(other[0], int) and other[1] is None: + if other[0] == 0: + return ZeroOrMore(self) + if other[0] == 1: + return OneOrMore(self) + else: + return self * other[0] + ZeroOrMore(self) + elif isinstance(other[0], int) and isinstance(other[1], int): + minElements, optElements = other + optElements -= minElements + else: + raise TypeError( + "cannot multiply ParserElement and ({}) objects".format( + ",".join(type(item).__name__ for item in other) + ) + ) + else: + raise TypeError( + "cannot multiply ParserElement and {} objects".format( + type(other).__name__ + ) + ) + + if minElements < 0: + raise ValueError("cannot multiply ParserElement by negative value") + if optElements < 0: + raise ValueError( + "second tuple value must be greater or equal to first tuple value" + ) + if minElements == optElements == 0: + return And([]) + + if optElements: + + def makeOptionalList(n): + if n > 1: + return Opt(self + makeOptionalList(n - 1)) + else: + return Opt(self) + + if minElements: + if minElements == 1: + ret = self + makeOptionalList(optElements) + else: + ret = And([self] * minElements) + makeOptionalList(optElements) + else: + ret = makeOptionalList(optElements) + else: + if minElements == 1: + ret = self + else: + ret = And([self] * minElements) + return ret + + def __rmul__(self, other) -> "ParserElement": + return self.__mul__(other) + + def __or__(self, other) -> "ParserElement": + """ + Implementation of ``|`` operator - returns :class:`MatchFirst` + """ + if other is Ellipsis: + return _PendingSkip(self, must_skip=True) + + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return MatchFirst([self, other]) + + def __ror__(self, other) -> "ParserElement": + """ + Implementation of ``|`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other | self + + def __xor__(self, other) -> "ParserElement": + """ + Implementation of ``^`` operator - returns :class:`Or` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return Or([self, other]) + + def __rxor__(self, other) -> "ParserElement": + """ + Implementation of ``^`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other ^ self + + def __and__(self, other) -> "ParserElement": + """ + Implementation of ``&`` operator - returns :class:`Each` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return Each([self, other]) + + def __rand__(self, other) -> "ParserElement": + """ + Implementation of ``&`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other & self + + def __invert__(self) -> "ParserElement": + """ + Implementation of ``~`` operator - returns :class:`NotAny` + """ + return NotAny(self) + + # disable __iter__ to override legacy use of sequential access to __getitem__ to + # iterate over a sequence + __iter__ = None + + def __getitem__(self, key): + """ + use ``[]`` indexing notation as a short form for expression repetition: + + - ``expr[n]`` is equivalent to ``expr*n`` + - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` + - ``expr[n, ...]`` or ``expr[n,]`` is equivalent + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") + - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` + (read as "0 to n instances of ``expr``") + - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` + - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` + + ``None`` may be used in place of ``...``. + + Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception + if more than ``n`` ``expr``s exist in the input stream. If this behavior is + desired, then write ``expr[..., n] + ~expr``. + """ + + # convert single arg keys to tuples + try: + if isinstance(key, str_type): + key = (key,) + iter(key) + except TypeError: + key = (key, key) + + if len(key) > 2: + raise TypeError( + "only 1 or 2 index arguments supported ({}{})".format( + key[:5], "... [{}]".format(len(key)) if len(key) > 5 else "" + ) + ) + + # clip to 2 elements + ret = self * tuple(key[:2]) + return ret + + def __call__(self, name: str = None) -> "ParserElement": + """ + Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. + + If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be + passed as ``True``. + + If ``name` is omitted, same as calling :class:`copy`. + + Example:: + + # these are equivalent + userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") + userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") + """ + if name is not None: + return self._setResultsName(name) + else: + return self.copy() + + def suppress(self) -> "ParserElement": + """ + Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from + cluttering up returned output. + """ + return Suppress(self) + + def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": + """ + Enables the skipping of whitespace before matching the characters in the + :class:`ParserElement`'s defined pattern. + + :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) + """ + self.skipWhitespace = True + return self + + def leave_whitespace(self, recursive: bool = True) -> "ParserElement": + """ + Disables the skipping of whitespace before matching the characters in the + :class:`ParserElement`'s defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. + + :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) + """ + self.skipWhitespace = False + return self + + def set_whitespace_chars( + self, chars: Union[Set[str], str], copy_defaults: bool = False + ) -> "ParserElement": + """ + Overrides the default whitespace chars + """ + self.skipWhitespace = True + self.whiteChars = set(chars) + self.copyDefaultWhiteChars = copy_defaults + return self + + def parse_with_tabs(self) -> "ParserElement": + """ + Overrides default behavior to expand ```` s to spaces before parsing the input string. + Must be called before ``parse_string`` when the input grammar contains elements that + match ```` characters. + """ + self.keepTabs = True + return self + + def ignore(self, other: "ParserElement") -> "ParserElement": + """ + Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + + Example:: + + patt = Word(alphas)[1, ...] + patt.parse_string('ablaj /* comment */ lskjd') + # -> ['ablaj'] + + patt.ignore(c_style_comment) + patt.parse_string('ablaj /* comment */ lskjd') + # -> ['ablaj', 'lskjd'] + """ + import typing + + if isinstance(other, str_type): + other = Suppress(other) + + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + self.ignoreExprs.append(other) + else: + self.ignoreExprs.append(Suppress(other.copy())) + return self + + def set_debug_actions( + self, + start_action: DebugStartAction, + success_action: DebugSuccessAction, + exception_action: DebugExceptionAction, + ) -> "ParserElement": + """ + Customize display of debugging messages while doing pattern matching: + + - ``start_action`` - method to be called when an expression is about to be parsed; + should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` + + - ``success_action`` - method to be called when an expression has successfully parsed; + should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` + + - ``exception_action`` - method to be called when expression fails to parse; + should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` + """ + self.debugActions = self.DebugActions( + start_action or _default_start_debug_action, + success_action or _default_success_debug_action, + exception_action or _default_exception_debug_action, + ) + self.debug = True + return self + + def set_debug(self, flag: bool = True) -> "ParserElement": + """ + Enable display of debugging messages while doing pattern matching. + Set ``flag`` to ``True`` to enable, ``False`` to disable. + + Example:: + + wd = Word(alphas).set_name("alphaword") + integer = Word(nums).set_name("numword") + term = wd | integer + + # turn on debugging for wd + wd.set_debug() + + term[1, ...].parse_string("abc 123 xyz 890") + + prints:: + + Match alphaword at loc 0(1,1) + Matched alphaword -> ['abc'] + Match alphaword at loc 3(1,4) + Exception raised:Expected alphaword (at char 4), (line:1, col:5) + Match alphaword at loc 7(1,8) + Matched alphaword -> ['xyz'] + Match alphaword at loc 11(1,12) + Exception raised:Expected alphaword (at char 12), (line:1, col:13) + Match alphaword at loc 15(1,16) + Exception raised:Expected alphaword (at char 15), (line:1, col:16) + + The output shown is that produced by the default debug actions - custom debug actions can be + specified using :class:`set_debug_actions`. Prior to attempting + to match the ``wd`` expression, the debugging message ``"Match at loc (,)"`` + is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` + message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, + which makes debugging and exception messages easier to understand - for instance, the default + name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. + """ + if flag: + self.set_debug_actions( + _default_start_debug_action, + _default_success_debug_action, + _default_exception_debug_action, + ) + else: + self.debug = False + return self + + @property + def default_name(self) -> str: + if self._defaultName is None: + self._defaultName = self._generateDefaultName() + return self._defaultName + + @abstractmethod + def _generateDefaultName(self): + """ + Child classes must define this method, which defines how the ``default_name`` is set. + """ + + def set_name(self, name: str) -> "ParserElement": + """ + Define name for this expression, makes debugging and exception messages clearer. + Example:: + Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) + Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) + """ + self.customName = name + self.errmsg = "Expected " + self.name + if __diag__.enable_debug_on_named_expressions: + self.set_debug() + return self + + @property + def name(self) -> str: + # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name + return self.customName if self.customName is not None else self.default_name + + def __str__(self) -> str: + return self.name + + def __repr__(self) -> str: + return str(self) + + def streamline(self) -> "ParserElement": + self.streamlined = True + self._defaultName = None + return self + + def recurse(self) -> Sequence["ParserElement"]: + return [] + + def _checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.recurse(): + e._checkRecursion(subRecCheckList) + + def validate(self, validateTrace=None) -> None: + """ + Check defined expressions for valid structure, check for infinite recursive definitions. + """ + self._checkRecursion([]) + + def parse_file( + self, + file_or_filename: Union[str, Path, TextIO], + encoding: str = "utf-8", + parse_all: bool = False, + *, + parseAll: bool = False, + ) -> ParseResults: + """ + Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. + """ + parseAll = parseAll or parse_all + try: + file_contents = file_or_filename.read() + except AttributeError: + with open(file_or_filename, "r", encoding=encoding) as f: + file_contents = f.read() + try: + return self.parse_string(file_contents, parseAll) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def __eq__(self, other): + if self is other: + return True + elif isinstance(other, str_type): + return self.matches(other, parse_all=True) + elif isinstance(other, ParserElement): + return vars(self) == vars(other) + return False + + def __hash__(self): + return id(self) + + def matches( + self, test_string: str, parse_all: bool = True, *, parseAll: bool = True + ) -> bool: + """ + Method for quick testing of a parser against a test string. Good for simple + inline microtests of sub expressions while building up larger parser. + + Parameters: + - ``test_string`` - to test against this expression for a match + - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests + + Example:: + + expr = Word(nums) + assert expr.matches("100") + """ + parseAll = parseAll and parse_all + try: + self.parse_string(str(test_string), parse_all=parseAll) + return True + except ParseBaseException: + return False + + def run_tests( + self, + tests: Union[str, List[str]], + parse_all: bool = True, + comment: typing.Optional[Union["ParserElement", str]] = "#", + full_dump: bool = True, + print_results: bool = True, + failure_tests: bool = False, + post_parse: Callable[[str, ParseResults], str] = None, + file: typing.Optional[TextIO] = None, + with_line_numbers: bool = False, + *, + parseAll: bool = True, + fullDump: bool = True, + printResults: bool = True, + failureTests: bool = False, + postParse: Callable[[str, ParseResults], str] = None, + ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: + """ + Execute the parse expression on a series of test strings, showing each + test, the parsed results or where the parse failed. Quick and easy way to + run a parse expression against a list of sample strings. + + Parameters: + - ``tests`` - a list of separate test strings, or a multiline string of test strings + - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests + - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test + string; pass None to disable comment filtering + - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; + if False, only dump nested list + - ``print_results`` - (default= ``True``) prints test output to stdout + - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing + - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as + `fn(test_string, parse_results)` and returns a string to be added to the test output + - ``file`` - (default= ``None``) optional file-like object to which test output will be written; + if None, will default to ``sys.stdout`` + - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers + + Returns: a (success, results) tuple, where success indicates that all tests succeeded + (or failed if ``failure_tests`` is True), and the results contain a list of lines of each + test's output + + Example:: + + number_expr = pyparsing_common.number.copy() + + result = number_expr.run_tests(''' + # unsigned integer + 100 + # negative integer + -100 + # float with scientific notation + 6.02e23 + # integer with scientific notation + 1e-12 + ''') + print("Success" if result[0] else "Failed!") + + result = number_expr.run_tests(''' + # stray character + 100Z + # missing leading digit before '.' + -.100 + # too many '.' + 3.14.159 + ''', failure_tests=True) + print("Success" if result[0] else "Failed!") + + prints:: + + # unsigned integer + 100 + [100] + + # negative integer + -100 + [-100] + + # float with scientific notation + 6.02e23 + [6.02e+23] + + # integer with scientific notation + 1e-12 + [1e-12] + + Success + + # stray character + 100Z + ^ + FAIL: Expected end of text (at char 3), (line:1, col:4) + + # missing leading digit before '.' + -.100 + ^ + FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) + + # too many '.' + 3.14.159 + ^ + FAIL: Expected end of text (at char 4), (line:1, col:5) + + Success + + Each test string must be on a single line. If you want to test a string that spans multiple + lines, create a test like this:: + + expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") + + (Note that this is a raw string literal, you must include the leading ``'r'``.) + """ + from .testing import pyparsing_test + + parseAll = parseAll and parse_all + fullDump = fullDump and full_dump + printResults = printResults and print_results + failureTests = failureTests or failure_tests + postParse = postParse or post_parse + if isinstance(tests, str_type): + line_strip = type(tests).strip + tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] + if isinstance(comment, str_type): + comment = Literal(comment) + if file is None: + file = sys.stdout + print_ = file.write + + result: Union[ParseResults, Exception] + allResults = [] + comments = [] + success = True + NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) + BOM = "\ufeff" + for t in tests: + if comment is not None and comment.matches(t, False) or comments and not t: + comments.append( + pyparsing_test.with_line_numbers(t) if with_line_numbers else t + ) + continue + if not t: + continue + out = [ + "\n" + "\n".join(comments) if comments else "", + pyparsing_test.with_line_numbers(t) if with_line_numbers else t, + ] + comments = [] + try: + # convert newline marks to actual newlines, and strip leading BOM if present + t = NL.transform_string(t.lstrip(BOM)) + result = self.parse_string(t, parse_all=parseAll) + except ParseBaseException as pe: + fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" + out.append(pe.explain()) + out.append("FAIL: " + str(pe)) + if ParserElement.verbose_stacktrace: + out.extend(traceback.format_tb(pe.__traceback__)) + success = success and failureTests + result = pe + except Exception as exc: + out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc)) + if ParserElement.verbose_stacktrace: + out.extend(traceback.format_tb(exc.__traceback__)) + success = success and failureTests + result = exc + else: + success = success and not failureTests + if postParse is not None: + try: + pp_value = postParse(t, result) + if pp_value is not None: + if isinstance(pp_value, ParseResults): + out.append(pp_value.dump()) + else: + out.append(str(pp_value)) + else: + out.append(result.dump()) + except Exception as e: + out.append(result.dump(full=fullDump)) + out.append( + "{} failed: {}: {}".format( + postParse.__name__, type(e).__name__, e + ) + ) + else: + out.append(result.dump(full=fullDump)) + out.append("") + + if printResults: + print_("\n".join(out)) + + allResults.append((t, result)) + + return success, allResults + + def create_diagram( + self, + output_html: Union[TextIO, Path, str], + vertical: int = 3, + show_results_names: bool = False, + show_groups: bool = False, + **kwargs, + ) -> None: + """ + Create a railroad diagram for the parser. + + Parameters: + - output_html (str or file-like object) - output target for generated + diagram HTML + - vertical (int) - threshold for formatting multiple alternatives vertically + instead of horizontally (default=3) + - show_results_names - bool flag whether diagram should show annotations for + defined results names + - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box + Additional diagram-formatting keyword arguments can also be included; + see railroad.Diagram class. + """ + + try: + from .diagram import to_railroad, railroad_to_html + except ImportError as ie: + raise Exception( + "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" + ) from ie + + self.streamline() + + railroad = to_railroad( + self, + vertical=vertical, + show_results_names=show_results_names, + show_groups=show_groups, + diagram_kwargs=kwargs, + ) + if isinstance(output_html, (str, Path)): + with open(output_html, "w", encoding="utf-8") as diag_file: + diag_file.write(railroad_to_html(railroad)) + else: + # we were passed a file-like object, just write to it + output_html.write(railroad_to_html(railroad)) + + setDefaultWhitespaceChars = set_default_whitespace_chars + inlineLiteralsUsing = inline_literals_using + setResultsName = set_results_name + setBreak = set_break + setParseAction = set_parse_action + addParseAction = add_parse_action + addCondition = add_condition + setFailAction = set_fail_action + tryParse = try_parse + canParseNext = can_parse_next + resetCache = reset_cache + enableLeftRecursion = enable_left_recursion + enablePackrat = enable_packrat + parseString = parse_string + scanString = scan_string + searchString = search_string + transformString = transform_string + setWhitespaceChars = set_whitespace_chars + parseWithTabs = parse_with_tabs + setDebugActions = set_debug_actions + setDebug = set_debug + defaultName = default_name + setName = set_name + parseFile = parse_file + runTests = run_tests + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class _PendingSkip(ParserElement): + # internal placeholder class to hold a place were '...' is added to a parser element, + # once another ParserElement is added, this placeholder will be replaced with a SkipTo + def __init__(self, expr: ParserElement, must_skip: bool = False): + super().__init__() + self.anchor = expr + self.must_skip = must_skip + + def _generateDefaultName(self): + return str(self.anchor + Empty()).replace("Empty", "...") + + def __add__(self, other) -> "ParserElement": + skipper = SkipTo(other).set_name("...")("_skipped*") + if self.must_skip: + + def must_skip(t): + if not t._skipped or t._skipped.as_list() == [""]: + del t[0] + t.pop("_skipped", None) + + def show_skip(t): + if t._skipped.as_list()[-1:] == [""]: + t.pop("_skipped") + t["_skipped"] = "missing <" + repr(self.anchor) + ">" + + return ( + self.anchor + skipper().add_parse_action(must_skip) + | skipper().add_parse_action(show_skip) + ) + other + + return self.anchor + skipper + other + + def __repr__(self): + return self.defaultName + + def parseImpl(self, *args): + raise Exception( + "use of `...` expression without following SkipTo target expression" + ) + + +class Token(ParserElement): + """Abstract :class:`ParserElement` subclass, for defining atomic + matching patterns. + """ + + def __init__(self): + super().__init__(savelist=False) + + def _generateDefaultName(self): + return type(self).__name__ + + +class Empty(Token): + """ + An empty token, will always match. + """ + + def __init__(self): + super().__init__() + self.mayReturnEmpty = True + self.mayIndexError = False + + +class NoMatch(Token): + """ + A token that will never match. + """ + + def __init__(self): + super().__init__() + self.mayReturnEmpty = True + self.mayIndexError = False + self.errmsg = "Unmatchable token" + + def parseImpl(self, instring, loc, doActions=True): + raise ParseException(instring, loc, self.errmsg, self) + + +class Literal(Token): + """ + Token to exactly match a specified string. + + Example:: + + Literal('blah').parse_string('blah') # -> ['blah'] + Literal('blah').parse_string('blahfooblah') # -> ['blah'] + Literal('blah').parse_string('bla') # -> Exception: Expected "blah" + + For case-insensitive matching, use :class:`CaselessLiteral`. + + For keyword matching (force word break before and after the matched string), + use :class:`Keyword` or :class:`CaselessKeyword`. + """ + + def __init__(self, match_string: str = "", *, matchString: str = ""): + super().__init__() + match_string = matchString or match_string + self.match = match_string + self.matchLen = len(match_string) + try: + self.firstMatchChar = match_string[0] + except IndexError: + raise ValueError("null string passed to Literal; use Empty() instead") + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + self.mayIndexError = False + + # Performance tuning: modify __class__ to select + # a parseImpl optimized for single-character check + if self.matchLen == 1 and type(self) is Literal: + self.__class__ = _SingleCharLiteral + + def _generateDefaultName(self): + return repr(self.match) + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] == self.firstMatchChar and instring.startswith( + self.match, loc + ): + return loc + self.matchLen, self.match + raise ParseException(instring, loc, self.errmsg, self) + + +class _SingleCharLiteral(Literal): + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] == self.firstMatchChar: + return loc + 1, self.match + raise ParseException(instring, loc, self.errmsg, self) + + +ParserElement._literalStringClass = Literal + + +class Keyword(Token): + """ + Token to exactly match a specified string as a keyword, that is, + it must be immediately followed by a non-keyword character. Compare + with :class:`Literal`: + + - ``Literal("if")`` will match the leading ``'if'`` in + ``'ifAndOnlyIf'``. + - ``Keyword("if")`` will not; it will only match the leading + ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` + + Accepts two optional constructor arguments in addition to the + keyword string: + + - ``identChars`` is a string of characters that would be valid + identifier characters, defaulting to all alphanumerics + "_" and + "$" + - ``caseless`` allows case-insensitive matching, default is ``False``. + + Example:: + + Keyword("start").parse_string("start") # -> ['start'] + Keyword("start").parse_string("starting") # -> Exception + + For case-insensitive matching, use :class:`CaselessKeyword`. + """ + + DEFAULT_KEYWORD_CHARS = alphanums + "_$" + + def __init__( + self, + match_string: str = "", + ident_chars: typing.Optional[str] = None, + caseless: bool = False, + *, + matchString: str = "", + identChars: typing.Optional[str] = None, + ): + super().__init__() + identChars = identChars or ident_chars + if identChars is None: + identChars = Keyword.DEFAULT_KEYWORD_CHARS + match_string = matchString or match_string + self.match = match_string + self.matchLen = len(match_string) + try: + self.firstMatchChar = match_string[0] + except IndexError: + raise ValueError("null string passed to Keyword; use Empty() instead") + self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) + self.mayReturnEmpty = False + self.mayIndexError = False + self.caseless = caseless + if caseless: + self.caselessmatch = match_string.upper() + identChars = identChars.upper() + self.identChars = set(identChars) + + def _generateDefaultName(self): + return repr(self.match) + + def parseImpl(self, instring, loc, doActions=True): + errmsg = self.errmsg + errloc = loc + if self.caseless: + if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: + if loc == 0 or instring[loc - 1].upper() not in self.identChars: + if ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen].upper() not in self.identChars + ): + return loc + self.matchLen, self.match + else: + # followed by keyword char + errmsg += ", was immediately followed by keyword character" + errloc = loc + self.matchLen + else: + # preceded by keyword char + errmsg += ", keyword was immediately preceded by keyword character" + errloc = loc - 1 + # else no match just raise plain exception + + else: + if ( + instring[loc] == self.firstMatchChar + and self.matchLen == 1 + or instring.startswith(self.match, loc) + ): + if loc == 0 or instring[loc - 1] not in self.identChars: + if ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen] not in self.identChars + ): + return loc + self.matchLen, self.match + else: + # followed by keyword char + errmsg += ( + ", keyword was immediately followed by keyword character" + ) + errloc = loc + self.matchLen + else: + # preceded by keyword char + errmsg += ", keyword was immediately preceded by keyword character" + errloc = loc - 1 + # else no match just raise plain exception + + raise ParseException(instring, errloc, errmsg, self) + + @staticmethod + def set_default_keyword_chars(chars) -> None: + """ + Overrides the default characters used by :class:`Keyword` expressions. + """ + Keyword.DEFAULT_KEYWORD_CHARS = chars + + setDefaultKeywordChars = set_default_keyword_chars + + +class CaselessLiteral(Literal): + """ + Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + + Example:: + + CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") + # -> ['CMD', 'CMD', 'CMD'] + + (Contrast with example for :class:`CaselessKeyword`.) + """ + + def __init__(self, match_string: str = "", *, matchString: str = ""): + match_string = matchString or match_string + super().__init__(match_string.upper()) + # Preserve the defining literal. + self.returnString = match_string + self.errmsg = "Expected " + self.name + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc : loc + self.matchLen].upper() == self.match: + return loc + self.matchLen, self.returnString + raise ParseException(instring, loc, self.errmsg, self) + + +class CaselessKeyword(Keyword): + """ + Caseless version of :class:`Keyword`. + + Example:: + + CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") + # -> ['CMD', 'CMD'] + + (Contrast with example for :class:`CaselessLiteral`.) + """ + + def __init__( + self, + match_string: str = "", + ident_chars: typing.Optional[str] = None, + *, + matchString: str = "", + identChars: typing.Optional[str] = None, + ): + identChars = identChars or ident_chars + match_string = matchString or match_string + super().__init__(match_string, identChars, caseless=True) + + +class CloseMatch(Token): + """A variation on :class:`Literal` which matches "close" matches, + that is, strings with at most 'n' mismatching characters. + :class:`CloseMatch` takes parameters: + + - ``match_string`` - string to be matched + - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters + - ``max_mismatches`` - (``default=1``) maximum number of + mismatches allowed to count as a match + + The results from a successful parse will contain the matched text + from the input string and the following named results: + + - ``mismatches`` - a list of the positions within the + match_string where mismatches were found + - ``original`` - the original match_string used to compare + against the input string + + If ``mismatches`` is an empty list, then the match was an exact + match. + + Example:: + + patt = CloseMatch("ATCATCGAATGGA") + patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) + patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) + + # exact match + patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) + + # close match allowing up to 2 mismatches + patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) + patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) + """ + + def __init__( + self, + match_string: str, + max_mismatches: int = None, + *, + maxMismatches: int = 1, + caseless=False, + ): + maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches + super().__init__() + self.match_string = match_string + self.maxMismatches = maxMismatches + self.errmsg = "Expected {!r} (with up to {} mismatches)".format( + self.match_string, self.maxMismatches + ) + self.caseless = caseless + self.mayIndexError = False + self.mayReturnEmpty = False + + def _generateDefaultName(self): + return "{}:{!r}".format(type(self).__name__, self.match_string) + + def parseImpl(self, instring, loc, doActions=True): + start = loc + instrlen = len(instring) + maxloc = start + len(self.match_string) + + if maxloc <= instrlen: + match_string = self.match_string + match_stringloc = 0 + mismatches = [] + maxMismatches = self.maxMismatches + + for match_stringloc, s_m in enumerate( + zip(instring[loc:maxloc], match_string) + ): + src, mat = s_m + if self.caseless: + src, mat = src.lower(), mat.lower() + + if src != mat: + mismatches.append(match_stringloc) + if len(mismatches) > maxMismatches: + break + else: + loc = start + match_stringloc + 1 + results = ParseResults([instring[start:loc]]) + results["original"] = match_string + results["mismatches"] = mismatches + return loc, results + + raise ParseException(instring, loc, self.errmsg, self) + + +class Word(Token): + """Token for matching words composed of allowed character sets. + Parameters: + - ``init_chars`` - string of all characters that should be used to + match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; + if ``body_chars`` is also specified, then this is the string of + initial characters + - ``body_chars`` - string of characters that + can be used for matching after a matched initial character as + given in ``init_chars``; if omitted, same as the initial characters + (default=``None``) + - ``min`` - minimum number of characters to match (default=1) + - ``max`` - maximum number of characters to match (default=0) + - ``exact`` - exact number of characters to match (default=0) + - ``as_keyword`` - match as a keyword (default=``False``) + - ``exclude_chars`` - characters that might be + found in the input ``body_chars`` string but which should not be + accepted for matching ;useful to define a word of all + printables except for one or two characters, for instance + (default=``None``) + + :class:`srange` is useful for defining custom character set strings + for defining :class:`Word` expressions, using range notation from + regular expression character sets. + + A common mistake is to use :class:`Word` to match a specific literal + string, as in ``Word("Address")``. Remember that :class:`Word` + uses the string argument to define *sets* of matchable characters. + This expression would match "Add", "AAA", "dAred", or any other word + made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an + exact literal string, use :class:`Literal` or :class:`Keyword`. + + pyparsing includes helper strings for building Words: + + - :class:`alphas` + - :class:`nums` + - :class:`alphanums` + - :class:`hexnums` + - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 + - accented, tilded, umlauted, etc.) + - :class:`punc8bit` (non-alphabetic characters in ASCII range + 128-255 - currency, symbols, superscripts, diacriticals, etc.) + - :class:`printables` (any non-whitespace character) + + ``alphas``, ``nums``, and ``printables`` are also defined in several + Unicode sets - see :class:`pyparsing_unicode``. + + Example:: + + # a word composed of digits + integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) + + # a word with a leading capital, and zero or more lowercase + capital_word = Word(alphas.upper(), alphas.lower()) + + # hostnames are alphanumeric, with leading alpha, and '-' + hostname = Word(alphas, alphanums + '-') + + # roman numeral (not a strict parser, accepts invalid mix of characters) + roman = Word("IVXLCDM") + + # any string of non-whitespace characters, except for ',' + csv_value = Word(printables, exclude_chars=",") + """ + + def __init__( + self, + init_chars: str = "", + body_chars: typing.Optional[str] = None, + min: int = 1, + max: int = 0, + exact: int = 0, + as_keyword: bool = False, + exclude_chars: typing.Optional[str] = None, + *, + initChars: typing.Optional[str] = None, + bodyChars: typing.Optional[str] = None, + asKeyword: bool = False, + excludeChars: typing.Optional[str] = None, + ): + initChars = initChars or init_chars + bodyChars = bodyChars or body_chars + asKeyword = asKeyword or as_keyword + excludeChars = excludeChars or exclude_chars + super().__init__() + if not initChars: + raise ValueError( + "invalid {}, initChars cannot be empty string".format( + type(self).__name__ + ) + ) + + initChars = set(initChars) + self.initChars = initChars + if excludeChars: + excludeChars = set(excludeChars) + initChars -= excludeChars + if bodyChars: + bodyChars = set(bodyChars) - excludeChars + self.initCharsOrig = "".join(sorted(initChars)) + + if bodyChars: + self.bodyCharsOrig = "".join(sorted(bodyChars)) + self.bodyChars = set(bodyChars) + else: + self.bodyCharsOrig = "".join(sorted(initChars)) + self.bodyChars = set(initChars) + + self.maxSpecified = max > 0 + + if min < 1: + raise ValueError( + "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" + ) + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.asKeyword = asKeyword + + # see if we can make a regex for this Word + if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): + if self.bodyChars == self.initChars: + if max == 0: + repeat = "+" + elif max == 1: + repeat = "" + else: + repeat = "{{{},{}}}".format( + self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen + ) + self.reString = "[{}]{}".format( + _collapse_string_to_ranges(self.initChars), + repeat, + ) + elif len(self.initChars) == 1: + if max == 0: + repeat = "*" + else: + repeat = "{{0,{}}}".format(max - 1) + self.reString = "{}[{}]{}".format( + re.escape(self.initCharsOrig), + _collapse_string_to_ranges(self.bodyChars), + repeat, + ) + else: + if max == 0: + repeat = "*" + elif max == 2: + repeat = "" + else: + repeat = "{{0,{}}}".format(max - 1) + self.reString = "[{}][{}]{}".format( + _collapse_string_to_ranges(self.initChars), + _collapse_string_to_ranges(self.bodyChars), + repeat, + ) + if self.asKeyword: + self.reString = r"\b" + self.reString + r"\b" + + try: + self.re = re.compile(self.reString) + except re.error: + self.re = None + else: + self.re_match = self.re.match + self.__class__ = _WordRegex + + def _generateDefaultName(self): + def charsAsStr(s): + max_repr_len = 16 + s = _collapse_string_to_ranges(s, re_escape=False) + if len(s) > max_repr_len: + return s[: max_repr_len - 3] + "..." + else: + return s + + if self.initChars != self.bodyChars: + base = "W:({}, {})".format( + charsAsStr(self.initChars), charsAsStr(self.bodyChars) + ) + else: + base = "W:({})".format(charsAsStr(self.initChars)) + + # add length specification + if self.minLen > 1 or self.maxLen != _MAX_INT: + if self.minLen == self.maxLen: + if self.minLen == 1: + return base[2:] + else: + return base + "{{{}}}".format(self.minLen) + elif self.maxLen == _MAX_INT: + return base + "{{{},...}}".format(self.minLen) + else: + return base + "{{{},{}}}".format(self.minLen, self.maxLen) + return base + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] not in self.initChars: + raise ParseException(instring, loc, self.errmsg, self) + + start = loc + loc += 1 + instrlen = len(instring) + bodychars = self.bodyChars + maxloc = start + self.maxLen + maxloc = min(maxloc, instrlen) + while loc < maxloc and instring[loc] in bodychars: + loc += 1 + + throwException = False + if loc - start < self.minLen: + throwException = True + elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: + throwException = True + elif self.asKeyword: + if ( + start > 0 + and instring[start - 1] in bodychars + or loc < instrlen + and instring[loc] in bodychars + ): + throwException = True + + if throwException: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class _WordRegex(Word): + def parseImpl(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + return loc, result.group() + + +class Char(_WordRegex): + """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, + when defining a match of any single character in a string of + characters. + """ + + def __init__( + self, + charset: str, + as_keyword: bool = False, + exclude_chars: typing.Optional[str] = None, + *, + asKeyword: bool = False, + excludeChars: typing.Optional[str] = None, + ): + asKeyword = asKeyword or as_keyword + excludeChars = excludeChars or exclude_chars + super().__init__( + charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars + ) + self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars)) + if asKeyword: + self.reString = r"\b{}\b".format(self.reString) + self.re = re.compile(self.reString) + self.re_match = self.re.match + + +class Regex(Token): + r"""Token for matching strings that match a given regular + expression. Defined with string specifying the regular expression in + a form recognized by the stdlib Python `re module `_. + If the given regex contains named groups (defined using ``(?P...)``), + these will be preserved as named :class:`ParseResults`. + + If instead of the Python stdlib ``re`` module you wish to use a different RE module + (such as the ``regex`` module), you can do so by building your ``Regex`` object with + a compiled RE that was compiled using ``regex``. + + Example:: + + realnum = Regex(r"[+-]?\d+\.\d*") + # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression + roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") + + # named fields in a regex will be returned as named results + date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)') + + # the Regex class will accept re's compiled using the regex module + import regex + parser = pp.Regex(regex.compile(r'[0-9]')) + """ + + def __init__( + self, + pattern: Any, + flags: Union[re.RegexFlag, int] = 0, + as_group_list: bool = False, + as_match: bool = False, + *, + asGroupList: bool = False, + asMatch: bool = False, + ): + """The parameters ``pattern`` and ``flags`` are passed + to the ``re.compile()`` function as-is. See the Python + `re module `_ module for an + explanation of the acceptable patterns and flags. + """ + super().__init__() + asGroupList = asGroupList or as_group_list + asMatch = asMatch or as_match + + if isinstance(pattern, str_type): + if not pattern: + raise ValueError("null string passed to Regex; use Empty() instead") + + self._re = None + self.reString = self.pattern = pattern + self.flags = flags + + elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): + self._re = pattern + self.pattern = self.reString = pattern.pattern + self.flags = flags + + else: + raise TypeError( + "Regex may only be constructed with a string or a compiled RE object" + ) + + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.asGroupList = asGroupList + self.asMatch = asMatch + if self.asGroupList: + self.parseImpl = self.parseImplAsGroupList + if self.asMatch: + self.parseImpl = self.parseImplAsMatch + + @cached_property + def re(self): + if self._re: + return self._re + else: + try: + return re.compile(self.pattern, self.flags) + except re.error: + raise ValueError( + "invalid pattern ({!r}) passed to Regex".format(self.pattern) + ) + + @cached_property + def re_match(self): + return self.re.match + + @cached_property + def mayReturnEmpty(self): + return self.re_match("") is not None + + def _generateDefaultName(self): + return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) + + def parseImpl(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = ParseResults(result.group()) + d = result.groupdict() + if d: + for k, v in d.items(): + ret[k] = v + return loc, ret + + def parseImplAsGroupList(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result.groups() + return loc, ret + + def parseImplAsMatch(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result + return loc, ret + + def sub(self, repl: str) -> ParserElement: + r""" + Return :class:`Regex` with an attached parse action to transform the parsed + result as if called using `re.sub(expr, repl, string) `_. + + Example:: + + make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2") + print(make_html.transform_string("h1:main title:")) + # prints "

main title

" + """ + if self.asGroupList: + raise TypeError("cannot use sub() with Regex(asGroupList=True)") + + if self.asMatch and callable(repl): + raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)") + + if self.asMatch: + + def pa(tokens): + return tokens[0].expand(repl) + + else: + + def pa(tokens): + return self.re.sub(repl, tokens[0]) + + return self.add_parse_action(pa) + + +class QuotedString(Token): + r""" + Token for matching strings that are delimited by quoting characters. + + Defined with the following parameters: + + - ``quote_char`` - string of one or more characters defining the + quote delimiting string + - ``esc_char`` - character to re_escape quotes, typically backslash + (default= ``None``) + - ``esc_quote`` - special quote sequence to re_escape an embedded quote + string (such as SQL's ``""`` to re_escape an embedded ``"``) + (default= ``None``) + - ``multiline`` - boolean indicating whether quotes can span + multiple lines (default= ``False``) + - ``unquote_results`` - boolean indicating whether the matched text + should be unquoted (default= ``True``) + - ``end_quote_char`` - string of one or more characters defining the + end of the quote delimited string (default= ``None`` => same as + quote_char) + - ``convert_whitespace_escapes`` - convert escaped whitespace + (``'\t'``, ``'\n'``, etc.) to actual whitespace + (default= ``True``) + + Example:: + + qs = QuotedString('"') + print(qs.search_string('lsjdf "This is the quote" sldjf')) + complex_qs = QuotedString('{{', end_quote_char='}}') + print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) + sql_qs = QuotedString('"', esc_quote='""') + print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) + + prints:: + + [['This is the quote']] + [['This is the "quote"']] + [['This is the quote with "embedded" quotes']] + """ + ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")) + + def __init__( + self, + quote_char: str = "", + esc_char: typing.Optional[str] = None, + esc_quote: typing.Optional[str] = None, + multiline: bool = False, + unquote_results: bool = True, + end_quote_char: typing.Optional[str] = None, + convert_whitespace_escapes: bool = True, + *, + quoteChar: str = "", + escChar: typing.Optional[str] = None, + escQuote: typing.Optional[str] = None, + unquoteResults: bool = True, + endQuoteChar: typing.Optional[str] = None, + convertWhitespaceEscapes: bool = True, + ): + super().__init__() + escChar = escChar or esc_char + escQuote = escQuote or esc_quote + unquoteResults = unquoteResults and unquote_results + endQuoteChar = endQuoteChar or end_quote_char + convertWhitespaceEscapes = ( + convertWhitespaceEscapes and convert_whitespace_escapes + ) + quote_char = quoteChar or quote_char + + # remove white space from quote chars - wont work anyway + quote_char = quote_char.strip() + if not quote_char: + raise ValueError("quote_char cannot be the empty string") + + if endQuoteChar is None: + endQuoteChar = quote_char + else: + endQuoteChar = endQuoteChar.strip() + if not endQuoteChar: + raise ValueError("endQuoteChar cannot be the empty string") + + self.quoteChar = quote_char + self.quoteCharLen = len(quote_char) + self.firstQuoteChar = quote_char[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + self.convertWhitespaceEscapes = convertWhitespaceEscapes + + sep = "" + inner_pattern = "" + + if escQuote: + inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote)) + sep = "|" + + if escChar: + inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar)) + sep = "|" + self.escCharReplacePattern = re.escape(self.escChar) + "(.)" + + if len(self.endQuoteChar) > 1: + inner_pattern += ( + "{}(?:".format(sep) + + "|".join( + "(?:{}(?!{}))".format( + re.escape(self.endQuoteChar[:i]), + re.escape(self.endQuoteChar[i:]), + ) + for i in range(len(self.endQuoteChar) - 1, 0, -1) + ) + + ")" + ) + sep = "|" + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + inner_pattern += r"{}(?:[^{}{}])".format( + sep, + _escape_regex_range_chars(self.endQuoteChar[0]), + (_escape_regex_range_chars(escChar) if escChar is not None else ""), + ) + else: + self.flags = 0 + inner_pattern += r"{}(?:[^{}\n\r{}])".format( + sep, + _escape_regex_range_chars(self.endQuoteChar[0]), + (_escape_regex_range_chars(escChar) if escChar is not None else ""), + ) + + self.pattern = "".join( + [ + re.escape(self.quoteChar), + "(?:", + inner_pattern, + ")*", + re.escape(self.endQuoteChar), + ] + ) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + self.re_match = self.re.match + except re.error: + raise ValueError( + "invalid pattern {!r} passed to Regex".format(self.pattern) + ) + + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def _generateDefaultName(self): + if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): + return "string enclosed in {!r}".format(self.quoteChar) + + return "quoted string, starting with {} ending with {}".format( + self.quoteChar, self.endQuoteChar + ) + + def parseImpl(self, instring, loc, doActions=True): + result = ( + instring[loc] == self.firstQuoteChar + and self.re_match(instring, loc) + or None + ) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen : -self.endQuoteCharLen] + + if isinstance(ret, str_type): + # replace escaped whitespace + if "\\" in ret and self.convertWhitespaceEscapes: + for wslit, wschar in self.ws_map: + ret = ret.replace(wslit, wschar) + + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given + set (will include whitespace in matched characters if not listed in + the provided exclusion set - see example). Defined with string + containing all disallowed characters, and an optional minimum, + maximum, and/or exact length. The default value for ``min`` is + 1 (a minimum value < 1 is not valid); the default values for + ``max`` and ``exact`` are 0, meaning no maximum or exact + length restriction. + + Example:: + + # define a comma-separated-value as anything that is not a ',' + csv_value = CharsNotIn(',') + print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) + + prints:: + + ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] + """ + + def __init__( + self, + not_chars: str = "", + min: int = 1, + max: int = 0, + exact: int = 0, + *, + notChars: str = "", + ): + super().__init__() + self.skipWhitespace = False + self.notChars = not_chars or notChars + self.notCharsSet = set(self.notChars) + + if min < 1: + raise ValueError( + "cannot specify a minimum length < 1; use " + "Opt(CharsNotIn()) if zero-length char group is permitted" + ) + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = self.minLen == 0 + self.mayIndexError = False + + def _generateDefaultName(self): + not_chars_str = _collapse_string_to_ranges(self.notChars) + if len(not_chars_str) > 16: + return "!W:({}...)".format(self.notChars[: 16 - 3]) + else: + return "!W:({})".format(self.notChars) + + def parseImpl(self, instring, loc, doActions=True): + notchars = self.notCharsSet + if instring[loc] in notchars: + raise ParseException(instring, loc, self.errmsg, self) + + start = loc + loc += 1 + maxlen = min(start + self.maxLen, len(instring)) + while loc < maxlen and instring[loc] not in notchars: + loc += 1 + + if loc - start < self.minLen: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class White(Token): + """Special matching class for matching whitespace. Normally, + whitespace is ignored by pyparsing grammars. This class is included + when some whitespace structures are significant. Define with + a string containing the whitespace characters to be matched; default + is ``" \\t\\r\\n"``. Also takes optional ``min``, + ``max``, and ``exact`` arguments, as defined for the + :class:`Word` class. + """ + + whiteStrs = { + " ": "", + "\t": "", + "\n": "", + "\r": "", + "\f": "", + "\u00A0": "", + "\u1680": "", + "\u180E": "", + "\u2000": "", + "\u2001": "", + "\u2002": "", + "\u2003": "", + "\u2004": "", + "\u2005": "", + "\u2006": "", + "\u2007": "", + "\u2008": "", + "\u2009": "", + "\u200A": "", + "\u200B": "", + "\u202F": "", + "\u205F": "", + "\u3000": "", + } + + def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): + super().__init__() + self.matchWhite = ws + self.set_whitespace_chars( + "".join(c for c in self.whiteStrs if c not in self.matchWhite), + copy_defaults=True, + ) + # self.leave_whitespace() + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def _generateDefaultName(self): + return "".join(White.whiteStrs[c] for c in self.matchWhite) + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] not in self.matchWhite: + raise ParseException(instring, loc, self.errmsg, self) + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min(maxloc, len(instring)) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class PositionToken(Token): + def __init__(self): + super().__init__() + self.mayReturnEmpty = True + self.mayIndexError = False + + +class GoToColumn(PositionToken): + """Token to advance to a specific column of input text; useful for + tabular report scraping. + """ + + def __init__(self, colno: int): + super().__init__() + self.col = colno + + def preParse(self, instring, loc): + if col(loc, instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables(instring, loc) + while ( + loc < instrlen + and instring[loc].isspace() + and col(loc, instring) != self.col + ): + loc += 1 + return loc + + def parseImpl(self, instring, loc, doActions=True): + thiscol = col(loc, instring) + if thiscol > self.col: + raise ParseException(instring, loc, "Text not in expected column", self) + newloc = loc + self.col - thiscol + ret = instring[loc:newloc] + return newloc, ret + + +class LineStart(PositionToken): + r"""Matches if current position is at the beginning of a line within + the parse string + + Example:: + + test = '''\ + AAA this line + AAA and this line + AAA but not this one + B AAA and definitely not this one + ''' + + for t in (LineStart() + 'AAA' + restOfLine).search_string(test): + print(t) + + prints:: + + ['AAA', ' this line'] + ['AAA', ' and this line'] + + """ + + def __init__(self): + super().__init__() + self.leave_whitespace() + self.orig_whiteChars = set() | self.whiteChars + self.whiteChars.discard("\n") + self.skipper = Empty().set_whitespace_chars(self.whiteChars) + self.errmsg = "Expected start of line" + + def preParse(self, instring, loc): + if loc == 0: + return loc + else: + ret = self.skipper.preParse(instring, loc) + if "\n" in self.orig_whiteChars: + while instring[ret : ret + 1] == "\n": + ret = self.skipper.preParse(instring, ret + 1) + return ret + + def parseImpl(self, instring, loc, doActions=True): + if col(loc, instring) == 1: + return loc, [] + raise ParseException(instring, loc, self.errmsg, self) + + +class LineEnd(PositionToken): + """Matches if current position is at the end of a line within the + parse string + """ + + def __init__(self): + super().__init__() + self.whiteChars.discard("\n") + self.set_whitespace_chars(self.whiteChars, copy_defaults=False) + self.errmsg = "Expected end of line" + + def parseImpl(self, instring, loc, doActions=True): + if loc < len(instring): + if instring[loc] == "\n": + return loc + 1, "\n" + else: + raise ParseException(instring, loc, self.errmsg, self) + elif loc == len(instring): + return loc + 1, [] + else: + raise ParseException(instring, loc, self.errmsg, self) + + +class StringStart(PositionToken): + """Matches if current position is at the beginning of the parse + string + """ + + def __init__(self): + super().__init__() + self.errmsg = "Expected start of text" + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + # see if entire string up to here is just whitespace and ignoreables + if loc != self.preParse(instring, 0): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class StringEnd(PositionToken): + """ + Matches if current position is at the end of the parse string + """ + + def __init__(self): + super().__init__() + self.errmsg = "Expected end of text" + + def parseImpl(self, instring, loc, doActions=True): + if loc < len(instring): + raise ParseException(instring, loc, self.errmsg, self) + elif loc == len(instring): + return loc + 1, [] + elif loc > len(instring): + return loc, [] + else: + raise ParseException(instring, loc, self.errmsg, self) + + +class WordStart(PositionToken): + """Matches if the current position is at the beginning of a + :class:`Word`, and is not preceded by any character in a given + set of ``word_chars`` (default= ``printables``). To emulate the + ``\b`` behavior of regular expressions, use + ``WordStart(alphanums)``. ``WordStart`` will also match at + the beginning of the string being parsed, or at the beginning of + a line. + """ + + def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + wordChars = word_chars if wordChars == printables else wordChars + super().__init__() + self.wordChars = set(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + if ( + instring[loc - 1] in self.wordChars + or instring[loc] not in self.wordChars + ): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class WordEnd(PositionToken): + """Matches if the current position is at the end of a :class:`Word`, + and is not followed by any character in a given set of ``word_chars`` + (default= ``printables``). To emulate the ``\b`` behavior of + regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` + will also match at the end of the string being parsed, or at the end + of a line. + """ + + def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + wordChars = word_chars if wordChars == printables else wordChars + super().__init__() + self.wordChars = set(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True): + instrlen = len(instring) + if instrlen > 0 and loc < instrlen: + if ( + instring[loc] in self.wordChars + or instring[loc - 1] not in self.wordChars + ): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class ParseExpression(ParserElement): + """Abstract subclass of ParserElement, for combining and + post-processing parsed tokens. + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + super().__init__(savelist) + self.exprs: List[ParserElement] + if isinstance(exprs, _generatorType): + exprs = list(exprs) + + if isinstance(exprs, str_type): + self.exprs = [self._literalStringClass(exprs)] + elif isinstance(exprs, ParserElement): + self.exprs = [exprs] + elif isinstance(exprs, Iterable): + exprs = list(exprs) + # if sequence of strings provided, wrap with Literal + if any(isinstance(expr, str_type) for expr in exprs): + exprs = ( + self._literalStringClass(e) if isinstance(e, str_type) else e + for e in exprs + ) + self.exprs = list(exprs) + else: + try: + self.exprs = list(exprs) + except TypeError: + self.exprs = [exprs] + self.callPreparse = False + + def recurse(self) -> Sequence[ParserElement]: + return self.exprs[:] + + def append(self, other) -> ParserElement: + self.exprs.append(other) + self._defaultName = None + return self + + def leave_whitespace(self, recursive: bool = True) -> ParserElement: + """ + Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on + all contained expressions. + """ + super().leave_whitespace(recursive) + + if recursive: + self.exprs = [e.copy() for e in self.exprs] + for e in self.exprs: + e.leave_whitespace(recursive) + return self + + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: + """ + Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on + all contained expressions. + """ + super().ignore_whitespace(recursive) + if recursive: + self.exprs = [e.copy() for e in self.exprs] + for e in self.exprs: + e.ignore_whitespace(recursive) + return self + + def ignore(self, other) -> ParserElement: + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + super().ignore(other) + for e in self.exprs: + e.ignore(self.ignoreExprs[-1]) + else: + super().ignore(other) + for e in self.exprs: + e.ignore(self.ignoreExprs[-1]) + return self + + def _generateDefaultName(self): + return "{}:({})".format(self.__class__.__name__, str(self.exprs)) + + def streamline(self) -> ParserElement: + if self.streamlined: + return self + + super().streamline() + + for e in self.exprs: + e.streamline() + + # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` + # but only if there are no parse actions or resultsNames on the nested And's + # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) + if len(self.exprs) == 2: + other = self.exprs[0] + if ( + isinstance(other, self.__class__) + and not other.parseAction + and other.resultsName is None + and not other.debug + ): + self.exprs = other.exprs[:] + [self.exprs[1]] + self._defaultName = None + self.mayReturnEmpty |= other.mayReturnEmpty + self.mayIndexError |= other.mayIndexError + + other = self.exprs[-1] + if ( + isinstance(other, self.__class__) + and not other.parseAction + and other.resultsName is None + and not other.debug + ): + self.exprs = self.exprs[:-1] + other.exprs[:] + self._defaultName = None + self.mayReturnEmpty |= other.mayReturnEmpty + self.mayIndexError |= other.mayIndexError + + self.errmsg = "Expected " + str(self) + + return self + + def validate(self, validateTrace=None) -> None: + tmp = (validateTrace if validateTrace is not None else [])[:] + [self] + for e in self.exprs: + e.validate(tmp) + self._checkRecursion([]) + + def copy(self) -> ParserElement: + ret = super().copy() + ret.exprs = [e.copy() for e in self.exprs] + return ret + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_ungrouped_named_tokens_in_collection + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in self.suppress_warnings_ + ): + for e in self.exprs: + if ( + isinstance(e, ParserElement) + and e.resultsName + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in e.suppress_warnings_ + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "collides with {!r} on contained expression".format( + "warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class And(ParseExpression): + """ + Requires all given :class:`ParseExpression` s to be found in the given order. + Expressions may be separated by whitespace. + May be constructed using the ``'+'`` operator. + May also be constructed using the ``'-'`` operator, which will + suppress backtracking. + + Example:: + + integer = Word(nums) + name_expr = Word(alphas)[1, ...] + + expr = And([integer("id"), name_expr("name"), integer("age")]) + # more easily written as: + expr = integer("id") + name_expr("name") + integer("age") + """ + + class _ErrorStop(Empty): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.leave_whitespace() + + def _generateDefaultName(self): + return "-" + + def __init__( + self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True + ): + exprs: List[ParserElement] = list(exprs_arg) + if exprs and Ellipsis in exprs: + tmp = [] + for i, expr in enumerate(exprs): + if expr is Ellipsis: + if i < len(exprs) - 1: + skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1] + tmp.append(SkipTo(skipto_arg)("_skipped*")) + else: + raise Exception( + "cannot construct And with sequence ending in ..." + ) + else: + tmp.append(expr) + exprs[:] = tmp + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + if not isinstance(self.exprs[0], White): + self.set_whitespace_chars( + self.exprs[0].whiteChars, + copy_defaults=self.exprs[0].copyDefaultWhiteChars, + ) + self.skipWhitespace = self.exprs[0].skipWhitespace + else: + self.skipWhitespace = False + else: + self.mayReturnEmpty = True + self.callPreparse = True + + def streamline(self) -> ParserElement: + # collapse any _PendingSkip's + if self.exprs: + if any( + isinstance(e, ParseExpression) + and e.exprs + and isinstance(e.exprs[-1], _PendingSkip) + for e in self.exprs[:-1] + ): + for i, e in enumerate(self.exprs[:-1]): + if e is None: + continue + if ( + isinstance(e, ParseExpression) + and e.exprs + and isinstance(e.exprs[-1], _PendingSkip) + ): + e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] + self.exprs[i + 1] = None + self.exprs = [e for e in self.exprs if e is not None] + + super().streamline() + + # link any IndentedBlocks to the prior expression + for prev, cur in zip(self.exprs, self.exprs[1:]): + # traverse cur or any first embedded expr of cur looking for an IndentedBlock + # (but watch out for recursive grammar) + seen = set() + while cur: + if id(cur) in seen: + break + seen.add(id(cur)) + if isinstance(cur, IndentedBlock): + prev.add_parse_action( + lambda s, l, t, cur_=cur: setattr( + cur_, "parent_anchor", col(l, s) + ) + ) + break + subs = cur.recurse() + cur = next(iter(subs), None) + + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + return self + + def parseImpl(self, instring, loc, doActions=True): + # pass False as callPreParse arg to _parse for first element, since we already + # pre-parsed the string as part of our And pre-parsing + loc, resultlist = self.exprs[0]._parse( + instring, loc, doActions, callPreParse=False + ) + errorStop = False + for e in self.exprs[1:]: + # if isinstance(e, And._ErrorStop): + if type(e) is And._ErrorStop: + errorStop = True + continue + if errorStop: + try: + loc, exprtokens = e._parse(instring, loc, doActions) + except ParseSyntaxException: + raise + except ParseBaseException as pe: + pe.__traceback__ = None + raise ParseSyntaxException._from_exception(pe) + except IndexError: + raise ParseSyntaxException( + instring, len(instring), self.errmsg, self + ) + else: + loc, exprtokens = e._parse(instring, loc, doActions) + if exprtokens or exprtokens.haskeys(): + resultlist += exprtokens + return loc, resultlist + + def __iadd__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + return self.append(other) # And([self, other]) + + def _checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.exprs: + e._checkRecursion(subRecCheckList) + if not e.mayReturnEmpty: + break + + def _generateDefaultName(self): + inner = " ".join(str(e) for e in self.exprs) + # strip off redundant inner {}'s + while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": + inner = inner[1:-1] + return "{" + inner + "}" + + +class Or(ParseExpression): + """Requires that at least one :class:`ParseExpression` is found. If + two expressions match, the expression that matches the longest + string will be used. May be constructed using the ``'^'`` + operator. + + Example:: + + # construct Or using '^' operator + + number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) + print(number.search_string("123 3.1416 789")) + + prints:: + + [['123'], ['3.1416'], ['789']] + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) + else: + self.mayReturnEmpty = True + + def streamline(self) -> ParserElement: + super().streamline() + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.saveAsList = any(e.saveAsList for e in self.exprs) + self.skipWhitespace = all( + e.skipWhitespace and not isinstance(e, White) for e in self.exprs + ) + else: + self.saveAsList = False + return self + + def parseImpl(self, instring, loc, doActions=True): + maxExcLoc = -1 + maxException = None + matches = [] + fatals = [] + if all(e.callPreparse for e in self.exprs): + loc = self.preParse(instring, loc) + for e in self.exprs: + try: + loc2 = e.try_parse(instring, loc, raise_fatal=True) + except ParseFatalException as pfe: + pfe.__traceback__ = None + pfe.parserElement = e + fatals.append(pfe) + maxException = None + maxExcLoc = -1 + except ParseException as err: + if not fatals: + err.__traceback__ = None + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException( + instring, len(instring), e.errmsg, self + ) + maxExcLoc = len(instring) + else: + # save match among all matches, to retry longest to shortest + matches.append((loc2, e)) + + if matches: + # re-evaluate all matches in descending order of length of match, in case attached actions + # might change whether or how much they match of the input. + matches.sort(key=itemgetter(0), reverse=True) + + if not doActions: + # no further conditions or parse actions to change the selection of + # alternative, so the first match will be the best match + best_expr = matches[0][1] + return best_expr._parse(instring, loc, doActions) + + longest = -1, None + for loc1, expr1 in matches: + if loc1 <= longest[0]: + # already have a longer match than this one will deliver, we are done + return longest + + try: + loc2, toks = expr1._parse(instring, loc, doActions) + except ParseException as err: + err.__traceback__ = None + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + else: + if loc2 >= loc1: + return loc2, toks + # didn't match as much as before + elif loc2 > longest[0]: + longest = loc2, toks + + if longest != (-1, None): + return longest + + if fatals: + if len(fatals) > 1: + fatals.sort(key=lambda e: -e.loc) + if fatals[0].loc == fatals[1].loc: + fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) + max_fatal = fatals[0] + raise max_fatal + + if maxException is not None: + maxException.msg = self.errmsg + raise maxException + else: + raise ParseException( + instring, loc, "no defined alternatives to match", self + ) + + def __ixor__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + return self.append(other) # Or([self, other]) + + def _generateDefaultName(self): + return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_multiple_tokens_in_named_alternation + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in self.suppress_warnings_ + ): + if any( + isinstance(e, And) + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in e.suppress_warnings_ + for e in self.exprs + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "will return a list of all parsed tokens in an And alternative, " + "in prior versions only the first token was returned; enclose " + "contained argument in Group".format( + "warn_multiple_tokens_in_named_alternation", + name, + type(self).__name__, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + +class MatchFirst(ParseExpression): + """Requires that at least one :class:`ParseExpression` is found. If + more than one expression matches, the first one listed is the one that will + match. May be constructed using the ``'|'`` operator. + + Example:: + + # construct MatchFirst using '|' operator + + # watch the order of expressions to match + number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) + print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] + + # put more selective expression first + number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) + print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) + else: + self.mayReturnEmpty = True + + def streamline(self) -> ParserElement: + if self.streamlined: + return self + + super().streamline() + if self.exprs: + self.saveAsList = any(e.saveAsList for e in self.exprs) + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all( + e.skipWhitespace and not isinstance(e, White) for e in self.exprs + ) + else: + self.saveAsList = False + self.mayReturnEmpty = True + return self + + def parseImpl(self, instring, loc, doActions=True): + maxExcLoc = -1 + maxException = None + + for e in self.exprs: + try: + return e._parse( + instring, + loc, + doActions, + ) + except ParseFatalException as pfe: + pfe.__traceback__ = None + pfe.parserElement = e + raise + except ParseException as err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException( + instring, len(instring), e.errmsg, self + ) + maxExcLoc = len(instring) + + if maxException is not None: + maxException.msg = self.errmsg + raise maxException + else: + raise ParseException( + instring, loc, "no defined alternatives to match", self + ) + + def __ior__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + return self.append(other) # MatchFirst([self, other]) + + def _generateDefaultName(self): + return "{" + " | ".join(str(e) for e in self.exprs) + "}" + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_multiple_tokens_in_named_alternation + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in self.suppress_warnings_ + ): + if any( + isinstance(e, And) + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in e.suppress_warnings_ + for e in self.exprs + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "will return a list of all parsed tokens in an And alternative, " + "in prior versions only the first token was returned; enclose " + "contained argument in Group".format( + "warn_multiple_tokens_in_named_alternation", + name, + type(self).__name__, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + +class Each(ParseExpression): + """Requires all given :class:`ParseExpression` s to be found, but in + any order. Expressions may be separated by whitespace. + + May be constructed using the ``'&'`` operator. + + Example:: + + color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") + shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") + integer = Word(nums) + shape_attr = "shape:" + shape_type("shape") + posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") + color_attr = "color:" + color("color") + size_attr = "size:" + integer("size") + + # use Each (using operator '&') to accept attributes in any order + # (shape and posn are required, color and size are optional) + shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) + + shape_spec.run_tests(''' + shape: SQUARE color: BLACK posn: 100, 120 + shape: CIRCLE size: 50 color: BLUE posn: 50,80 + color:GREEN size:20 shape:TRIANGLE posn:20,40 + ''' + ) + + prints:: + + shape: SQUARE color: BLACK posn: 100, 120 + ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] + - color: BLACK + - posn: ['100', ',', '120'] + - x: 100 + - y: 120 + - shape: SQUARE + + + shape: CIRCLE size: 50 color: BLUE posn: 50,80 + ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] + - color: BLUE + - posn: ['50', ',', '80'] + - x: 50 + - y: 80 + - shape: CIRCLE + - size: 50 + + + color: GREEN size: 20 shape: TRIANGLE posn: 20,40 + ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] + - color: GREEN + - posn: ['20', ',', '40'] + - x: 20 + - y: 40 + - shape: TRIANGLE + - size: 20 + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + else: + self.mayReturnEmpty = True + self.skipWhitespace = True + self.initExprGroups = True + self.saveAsList = True + + def streamline(self) -> ParserElement: + super().streamline() + if self.exprs: + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + else: + self.mayReturnEmpty = True + return self + + def parseImpl(self, instring, loc, doActions=True): + if self.initExprGroups: + self.opt1map = dict( + (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) + ) + opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] + opt2 = [ + e + for e in self.exprs + if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) + ] + self.optionals = opt1 + opt2 + self.multioptionals = [ + e.expr.set_results_name(e.resultsName, list_all_matches=True) + for e in self.exprs + if isinstance(e, _MultipleMatch) + ] + self.multirequired = [ + e.expr.set_results_name(e.resultsName, list_all_matches=True) + for e in self.exprs + if isinstance(e, OneOrMore) + ] + self.required = [ + e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) + ] + self.required += self.multirequired + self.initExprGroups = False + + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + multis = self.multioptionals[:] + matchOrder = [] + + keepMatching = True + failed = [] + fatals = [] + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + multis + failed.clear() + fatals.clear() + for e in tmpExprs: + try: + tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) + except ParseFatalException as pfe: + pfe.__traceback__ = None + pfe.parserElement = e + fatals.append(pfe) + failed.append(e) + except ParseException: + failed.append(e) + else: + matchOrder.append(self.opt1map.get(id(e), e)) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + # look for any ParseFatalExceptions + if fatals: + if len(fatals) > 1: + fatals.sort(key=lambda e: -e.loc) + if fatals[0].loc == fatals[1].loc: + fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) + max_fatal = fatals[0] + raise max_fatal + + if tmpReqd: + missing = ", ".join([str(e) for e in tmpReqd]) + raise ParseException( + instring, + loc, + "Missing one or more required elements ({})".format(missing), + ) + + # add any unmatched Opts, in case they have default values defined + matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] + + total_results = ParseResults([]) + for e in matchOrder: + loc, results = e._parse(instring, loc, doActions) + total_results += results + + return loc, total_results + + def _generateDefaultName(self): + return "{" + " & ".join(str(e) for e in self.exprs) + "}" + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of :class:`ParserElement`, for combining and + post-processing parsed tokens. + """ + + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + super().__init__(savelist) + if isinstance(expr, str_type): + if issubclass(self._literalStringClass, Token): + expr = self._literalStringClass(expr) + elif issubclass(type(self), self._literalStringClass): + expr = Literal(expr) + else: + expr = self._literalStringClass(Literal(expr)) + self.expr = expr + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.set_whitespace_chars( + expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars + ) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def recurse(self) -> Sequence[ParserElement]: + return [self.expr] if self.expr is not None else [] + + def parseImpl(self, instring, loc, doActions=True): + if self.expr is not None: + return self.expr._parse(instring, loc, doActions, callPreParse=False) + else: + raise ParseException(instring, loc, "No expression defined", self) + + def leave_whitespace(self, recursive: bool = True) -> ParserElement: + super().leave_whitespace(recursive) + + if recursive: + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leave_whitespace(recursive) + return self + + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: + super().ignore_whitespace(recursive) + + if recursive: + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.ignore_whitespace(recursive) + return self + + def ignore(self, other) -> ParserElement: + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + super().ignore(other) + if self.expr is not None: + self.expr.ignore(self.ignoreExprs[-1]) + else: + super().ignore(other) + if self.expr is not None: + self.expr.ignore(self.ignoreExprs[-1]) + return self + + def streamline(self) -> ParserElement: + super().streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def _checkRecursion(self, parseElementList): + if self in parseElementList: + raise RecursiveGrammarException(parseElementList + [self]) + subRecCheckList = parseElementList[:] + [self] + if self.expr is not None: + self.expr._checkRecursion(subRecCheckList) + + def validate(self, validateTrace=None) -> None: + if validateTrace is None: + validateTrace = [] + tmp = validateTrace[:] + [self] + if self.expr is not None: + self.expr.validate(tmp) + self._checkRecursion([]) + + def _generateDefaultName(self): + return "{}:({})".format(self.__class__.__name__, str(self.expr)) + + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class IndentedBlock(ParseElementEnhance): + """ + Expression to match one or more expressions at a given indentation level. + Useful for parsing text where structure is implied by indentation (like Python source code). + """ + + class _Indent(Empty): + def __init__(self, ref_col: int): + super().__init__() + self.errmsg = "expected indent at column {}".format(ref_col) + self.add_condition(lambda s, l, t: col(l, s) == ref_col) + + class _IndentGreater(Empty): + def __init__(self, ref_col: int): + super().__init__() + self.errmsg = "expected indent at column greater than {}".format(ref_col) + self.add_condition(lambda s, l, t: col(l, s) > ref_col) + + def __init__( + self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True + ): + super().__init__(expr, savelist=True) + # if recursive: + # raise NotImplementedError("IndentedBlock with recursive is not implemented") + self._recursive = recursive + self._grouped = grouped + self.parent_anchor = 1 + + def parseImpl(self, instring, loc, doActions=True): + # advance parse position to non-whitespace by using an Empty() + # this should be the column to be used for all subsequent indented lines + anchor_loc = Empty().preParse(instring, loc) + + # see if self.expr matches at the current location - if not it will raise an exception + # and no further work is necessary + self.expr.try_parse(instring, anchor_loc, doActions) + + indent_col = col(anchor_loc, instring) + peer_detect_expr = self._Indent(indent_col) + + inner_expr = Empty() + peer_detect_expr + self.expr + if self._recursive: + sub_indent = self._IndentGreater(indent_col) + nested_block = IndentedBlock( + self.expr, recursive=self._recursive, grouped=self._grouped + ) + nested_block.set_debug(self.debug) + nested_block.parent_anchor = indent_col + inner_expr += Opt(sub_indent + nested_block) + + inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") + block = OneOrMore(inner_expr) + + trailing_undent = self._Indent(self.parent_anchor) | StringEnd() + + if self._grouped: + wrapper = Group + else: + wrapper = lambda expr: expr + return (wrapper(block) + Optional(trailing_undent)).parseImpl( + instring, anchor_loc, doActions + ) + + +class AtStringStart(ParseElementEnhance): + """Matches if expression matches at the beginning of the parse + string:: + + AtStringStart(Word(nums)).parse_string("123") + # prints ["123"] + + AtStringStart(Word(nums)).parse_string(" 123") + # raises ParseException + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + self.callPreparse = False + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + raise ParseException(instring, loc, "not found at string start") + return super().parseImpl(instring, loc, doActions) + + +class AtLineStart(ParseElementEnhance): + r"""Matches if an expression matches at the beginning of a line within + the parse string + + Example:: + + test = '''\ + AAA this line + AAA and this line + AAA but not this one + B AAA and definitely not this one + ''' + + for t in (AtLineStart('AAA') + restOfLine).search_string(test): + print(t) + + prints:: + + ['AAA', ' this line'] + ['AAA', ' and this line'] + + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + self.callPreparse = False + + def parseImpl(self, instring, loc, doActions=True): + if col(loc, instring) != 1: + raise ParseException(instring, loc, "not found at line start") + return super().parseImpl(instring, loc, doActions) + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. + ``FollowedBy`` does *not* advance the parsing position within + the input string, it only verifies that the specified parse + expression matches at the current position. ``FollowedBy`` + always returns a null token list. If any results names are defined + in the lookahead expression, those *will* be returned for access by + name. + + Example:: + + # use FollowedBy to match a label only if it is followed by a ':' + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + + attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() + + prints:: + + [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + # by using self._expr.parse and deleting the contents of the returned ParseResults list + # we keep any named results that were defined in the FollowedBy expression + _, ret = self.expr._parse(instring, loc, doActions=doActions) + del ret[:] + + return loc, ret + + +class PrecededBy(ParseElementEnhance): + """Lookbehind matching of the given parse expression. + ``PrecededBy`` does not advance the parsing position within the + input string, it only verifies that the specified parse expression + matches prior to the current position. ``PrecededBy`` always + returns a null token list, but if a results name is defined on the + given expression, it is returned. + + Parameters: + + - expr - expression that must match prior to the current parse + location + - retreat - (default= ``None``) - (int) maximum number of characters + to lookbehind prior to the current parse location + + If the lookbehind expression is a string, :class:`Literal`, + :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` + with a specified exact or maximum length, then the retreat + parameter is not required. Otherwise, retreat must be specified to + give a maximum number of characters to look back from + the current parse position for a lookbehind match. + + Example:: + + # VB-style variable names with type prefixes + int_var = PrecededBy("#") + pyparsing_common.identifier + str_var = PrecededBy("$") + pyparsing_common.identifier + + """ + + def __init__( + self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None + ): + super().__init__(expr) + self.expr = self.expr().leave_whitespace() + self.mayReturnEmpty = True + self.mayIndexError = False + self.exact = False + if isinstance(expr, str_type): + retreat = len(expr) + self.exact = True + elif isinstance(expr, (Literal, Keyword)): + retreat = expr.matchLen + self.exact = True + elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: + retreat = expr.maxLen + self.exact = True + elif isinstance(expr, PositionToken): + retreat = 0 + self.exact = True + self.retreat = retreat + self.errmsg = "not preceded by " + str(expr) + self.skipWhitespace = False + self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) + + def parseImpl(self, instring, loc=0, doActions=True): + if self.exact: + if loc < self.retreat: + raise ParseException(instring, loc, self.errmsg) + start = loc - self.retreat + _, ret = self.expr._parse(instring, start) + else: + # retreat specified a maximum lookbehind window, iterate + test_expr = self.expr + StringEnd() + instring_slice = instring[max(0, loc - self.retreat) : loc] + last_expr = ParseException(instring, loc, self.errmsg) + for offset in range(1, min(loc, self.retreat + 1) + 1): + try: + # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) + _, ret = test_expr._parse( + instring_slice, len(instring_slice) - offset + ) + except ParseBaseException as pbe: + last_expr = pbe + else: + break + else: + raise last_expr + return loc, ret + + +class Located(ParseElementEnhance): + """ + Decorates a returned token with its starting and ending + locations in the input string. + + This helper adds the following results names: + + - ``locn_start`` - location where matched expression begins + - ``locn_end`` - location where matched expression ends + - ``value`` - the actual parsed results + + Be careful if the input text contains ```` characters, you + may want to call :class:`ParserElement.parse_with_tabs` + + Example:: + + wd = Word(alphas) + for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): + print(match) + + prints:: + + [0, ['ljsdf'], 5] + [8, ['lksdjjf'], 15] + [18, ['lkkjj'], 23] + + """ + + def parseImpl(self, instring, loc, doActions=True): + start = loc + loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) + ret_tokens = ParseResults([start, tokens, loc]) + ret_tokens["locn_start"] = start + ret_tokens["value"] = tokens + ret_tokens["locn_end"] = loc + if self.resultsName: + # must return as a list, so that the name will be attached to the complete group + return loc, [ret_tokens] + else: + return loc, ret_tokens + + +class NotAny(ParseElementEnhance): + """ + Lookahead to disallow matching with the given parse expression. + ``NotAny`` does *not* advance the parsing position within the + input string, it only verifies that the specified parse expression + does *not* match at the current position. Also, ``NotAny`` does + *not* skip over leading whitespace. ``NotAny`` always returns + a null token list. May be constructed using the ``'~'`` operator. + + Example:: + + AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) + + # take care not to mistake keywords for identifiers + ident = ~(AND | OR | NOT) + Word(alphas) + boolean_term = Opt(NOT) + ident + + # very crude boolean expression - to support parenthesis groups and + # operation hierarchy, use infix_notation + boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] + + # integers that are followed by "." are actually floats + integer = Word(nums) + ~Char(".") + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + # do NOT use self.leave_whitespace(), don't want to propagate to exprs + # self.leave_whitespace() + self.skipWhitespace = False + + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, " + str(self.expr) + + def parseImpl(self, instring, loc, doActions=True): + if self.expr.can_parse_next(instring, loc): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + def _generateDefaultName(self): + return "~{" + str(self.expr) + "}" + + +class _MultipleMatch(ParseElementEnhance): + def __init__( + self, + expr: ParserElement, + stop_on: typing.Optional[Union[ParserElement, str]] = None, + *, + stopOn: typing.Optional[Union[ParserElement, str]] = None, + ): + super().__init__(expr) + stopOn = stopOn or stop_on + self.saveAsList = True + ender = stopOn + if isinstance(ender, str_type): + ender = self._literalStringClass(ender) + self.stopOn(ender) + + def stopOn(self, ender) -> ParserElement: + if isinstance(ender, str_type): + ender = self._literalStringClass(ender) + self.not_ender = ~ender if ender is not None else None + return self + + def parseImpl(self, instring, loc, doActions=True): + self_expr_parse = self.expr._parse + self_skip_ignorables = self._skipIgnorables + check_ender = self.not_ender is not None + if check_ender: + try_not_ender = self.not_ender.tryParse + + # must be at least one (but first see if we are the stopOn sentinel; + # if so, fail) + if check_ender: + try_not_ender(instring, loc) + loc, tokens = self_expr_parse(instring, loc, doActions) + try: + hasIgnoreExprs = not not self.ignoreExprs + while 1: + if check_ender: + try_not_ender(instring, loc) + if hasIgnoreExprs: + preloc = self_skip_ignorables(instring, loc) + else: + preloc = loc + loc, tmptokens = self_expr_parse(instring, preloc, doActions) + if tmptokens or tmptokens.haskeys(): + tokens += tmptokens + except (ParseException, IndexError): + pass + + return loc, tokens + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_ungrouped_named_tokens_in_collection + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in self.suppress_warnings_ + ): + for e in [self.expr] + self.expr.recurse(): + if ( + isinstance(e, ParserElement) + and e.resultsName + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in e.suppress_warnings_ + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "collides with {!r} on contained expression".format( + "warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + +class OneOrMore(_MultipleMatch): + """ + Repetition of one or more of the given expression. + + Parameters: + - expr - expression that must match one or more times + - stop_on - (default= ``None``) - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) + + Example:: + + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) + + text = "shape: SQUARE posn: upper left color: BLACK" + attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] + + # use stop_on attribute for OneOrMore to avoid reading label string as part of the data + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] + + # could also be written as + (attr_expr * (1,)).parse_string(text).pprint() + """ + + def _generateDefaultName(self): + return "{" + str(self.expr) + "}..." + + +class ZeroOrMore(_MultipleMatch): + """ + Optional repetition of zero or more of the given expression. + + Parameters: + - ``expr`` - expression that must match zero or more times + - ``stop_on`` - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) - (default= ``None``) + + Example: similar to :class:`OneOrMore` + """ + + def __init__( + self, + expr: ParserElement, + stop_on: typing.Optional[Union[ParserElement, str]] = None, + *, + stopOn: typing.Optional[Union[ParserElement, str]] = None, + ): + super().__init__(expr, stopOn=stopOn or stop_on) + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + try: + return super().parseImpl(instring, loc, doActions) + except (ParseException, IndexError): + return loc, ParseResults([], name=self.resultsName) + + def _generateDefaultName(self): + return "[" + str(self.expr) + "]..." + + +class _NullToken: + def __bool__(self): + return False + + def __str__(self): + return "" + + +class Opt(ParseElementEnhance): + """ + Optional matching of the given expression. + + Parameters: + - ``expr`` - expression that must match zero or more times + - ``default`` (optional) - value to be returned if the optional expression is not found. + + Example:: + + # US postal code can be a 5-digit zip, plus optional 4-digit qualifier + zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) + zip.run_tests(''' + # traditional ZIP code + 12345 + + # ZIP+4 form + 12101-0001 + + # invalid ZIP + 98765- + ''') + + prints:: + + # traditional ZIP code + 12345 + ['12345'] + + # ZIP+4 form + 12101-0001 + ['12101-0001'] + + # invalid ZIP + 98765- + ^ + FAIL: Expected end of text (at char 5), (line:1, col:6) + """ + + __optionalNotMatched = _NullToken() + + def __init__( + self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched + ): + super().__init__(expr, savelist=False) + self.saveAsList = self.expr.saveAsList + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + self_expr = self.expr + try: + loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) + except (ParseException, IndexError): + default_value = self.defaultValue + if default_value is not self.__optionalNotMatched: + if self_expr.resultsName: + tokens = ParseResults([default_value]) + tokens[self_expr.resultsName] = default_value + else: + tokens = [default_value] + else: + tokens = [] + return loc, tokens + + def _generateDefaultName(self): + inner = str(self.expr) + # strip off redundant inner {}'s + while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": + inner = inner[1:-1] + return "[" + inner + "]" + + +Optional = Opt + + +class SkipTo(ParseElementEnhance): + """ + Token for skipping over all undefined text until the matched + expression is found. + + Parameters: + - ``expr`` - target expression marking the end of the data to be skipped + - ``include`` - if ``True``, the target expression is also parsed + (the skipped text and target expression are returned as a 2-element + list) (default= ``False``). + - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and + comments) that might contain false matches to the target expression + - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be + included in the skipped test; if found before the target expression is found, + the :class:`SkipTo` is not a match + + Example:: + + report = ''' + Outstanding Issues Report - 1 Jan 2000 + + # | Severity | Description | Days Open + -----+----------+-------------------------------------------+----------- + 101 | Critical | Intermittent system crash | 6 + 94 | Cosmetic | Spelling error on Login ('log|n') | 14 + 79 | Minor | System slow when running too many reports | 47 + ''' + integer = Word(nums) + SEP = Suppress('|') + # use SkipTo to simply match everything up until the next SEP + # - ignore quoted strings, so that a '|' character inside a quoted string does not match + # - parse action will call token.strip() for each matched token, i.e., the description body + string_data = SkipTo(SEP, ignore=quoted_string) + string_data.set_parse_action(token_map(str.strip)) + ticket_expr = (integer("issue_num") + SEP + + string_data("sev") + SEP + + string_data("desc") + SEP + + integer("days_open")) + + for tkt in ticket_expr.search_string(report): + print tkt.dump() + + prints:: + + ['101', 'Critical', 'Intermittent system crash', '6'] + - days_open: '6' + - desc: 'Intermittent system crash' + - issue_num: '101' + - sev: 'Critical' + ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] + - days_open: '14' + - desc: "Spelling error on Login ('log|n')" + - issue_num: '94' + - sev: 'Cosmetic' + ['79', 'Minor', 'System slow when running too many reports', '47'] + - days_open: '47' + - desc: 'System slow when running too many reports' + - issue_num: '79' + - sev: 'Minor' + """ + + def __init__( + self, + other: Union[ParserElement, str], + include: bool = False, + ignore: bool = None, + fail_on: typing.Optional[Union[ParserElement, str]] = None, + *, + failOn: Union[ParserElement, str] = None, + ): + super().__init__(other) + failOn = failOn or fail_on + self.ignoreExpr = ignore + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.saveAsList = False + if isinstance(failOn, str_type): + self.failOn = self._literalStringClass(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for " + str(self.expr) + + def parseImpl(self, instring, loc, doActions=True): + startloc = loc + instrlen = len(instring) + self_expr_parse = self.expr._parse + self_failOn_canParseNext = ( + self.failOn.canParseNext if self.failOn is not None else None + ) + self_ignoreExpr_tryParse = ( + self.ignoreExpr.tryParse if self.ignoreExpr is not None else None + ) + + tmploc = loc + while tmploc <= instrlen: + if self_failOn_canParseNext is not None: + # break if failOn expression matches + if self_failOn_canParseNext(instring, tmploc): + break + + if self_ignoreExpr_tryParse is not None: + # advance past ignore expressions + while 1: + try: + tmploc = self_ignoreExpr_tryParse(instring, tmploc) + except ParseBaseException: + break + + try: + self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) + except (ParseException, IndexError): + # no match, advance loc in string + tmploc += 1 + else: + # matched skipto expr, done + break + + else: + # ran off the end of the input string without matching skipto expr, fail + raise ParseException(instring, loc, self.errmsg, self) + + # build up return values + loc = tmploc + skiptext = instring[startloc:loc] + skipresult = ParseResults(skiptext) + + if self.includeMatch: + loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) + skipresult += mat + + return loc, skipresult + + +class Forward(ParseElementEnhance): + """ + Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the ``Forward`` + variable using the ``'<<'`` operator. + + Note: take care when assigning to ``Forward`` not to overlook + precedence of operators. + + Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: + + fwd_expr << a | b | c + + will actually be evaluated as:: + + (fwd_expr << a) | b | c + + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the ``Forward``:: + + fwd_expr << (a | b | c) + + Converting to use the ``'<<='`` operator instead will avoid this problem. + + See :class:`ParseResults.pprint` for an example of a recursive + parser created using ``Forward``. + """ + + def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): + self.caller_frame = traceback.extract_stack(limit=2)[0] + super().__init__(other, savelist=False) + self.lshift_line = None + + def __lshift__(self, other): + if hasattr(self, "caller_frame"): + del self.caller_frame + if isinstance(other, str_type): + other = self._literalStringClass(other) + self.expr = other + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.set_whitespace_chars( + self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars + ) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + self.lshift_line = traceback.extract_stack(limit=2)[-2] + return self + + def __ilshift__(self, other): + return self << other + + def __or__(self, other): + caller_line = traceback.extract_stack(limit=2)[-2] + if ( + __diag__.warn_on_match_first_with_lshift_operator + and caller_line == self.lshift_line + and Diagnostics.warn_on_match_first_with_lshift_operator + not in self.suppress_warnings_ + ): + warnings.warn( + "using '<<' operator with '|' is probably an error, use '<<='", + stacklevel=2, + ) + ret = super().__or__(other) + return ret + + def __del__(self): + # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' + if ( + self.expr is None + and __diag__.warn_on_assignment_to_Forward + and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ + ): + warnings.warn_explicit( + "Forward defined here but no expression attached later using '<<=' or '<<'", + UserWarning, + filename=self.caller_frame.filename, + lineno=self.caller_frame.lineno, + ) + + def parseImpl(self, instring, loc, doActions=True): + if ( + self.expr is None + and __diag__.warn_on_parse_using_empty_Forward + and Diagnostics.warn_on_parse_using_empty_Forward + not in self.suppress_warnings_ + ): + # walk stack until parse_string, scan_string, search_string, or transform_string is found + parse_fns = [ + "parse_string", + "scan_string", + "search_string", + "transform_string", + ] + tb = traceback.extract_stack(limit=200) + for i, frm in enumerate(reversed(tb), start=1): + if frm.name in parse_fns: + stacklevel = i + 1 + break + else: + stacklevel = 2 + warnings.warn( + "Forward expression was never assigned a value, will not parse any input", + stacklevel=stacklevel, + ) + if not ParserElement._left_recursion_enabled: + return super().parseImpl(instring, loc, doActions) + # ## Bounded Recursion algorithm ## + # Recursion only needs to be processed at ``Forward`` elements, since they are + # the only ones that can actually refer to themselves. The general idea is + # to handle recursion stepwise: We start at no recursion, then recurse once, + # recurse twice, ..., until more recursion offers no benefit (we hit the bound). + # + # The "trick" here is that each ``Forward`` gets evaluated in two contexts + # - to *match* a specific recursion level, and + # - to *search* the bounded recursion level + # and the two run concurrently. The *search* must *match* each recursion level + # to find the best possible match. This is handled by a memo table, which + # provides the previous match to the next level match attempt. + # + # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. + # + # There is a complication since we not only *parse* but also *transform* via + # actions: We do not want to run the actions too often while expanding. Thus, + # we expand using `doActions=False` and only run `doActions=True` if the next + # recursion level is acceptable. + with ParserElement.recursion_lock: + memo = ParserElement.recursion_memos + try: + # we are parsing at a specific recursion expansion - use it as-is + prev_loc, prev_result = memo[loc, self, doActions] + if isinstance(prev_result, Exception): + raise prev_result + return prev_loc, prev_result.copy() + except KeyError: + act_key = (loc, self, True) + peek_key = (loc, self, False) + # we are searching for the best recursion expansion - keep on improving + # both `doActions` cases must be tracked separately here! + prev_loc, prev_peek = memo[peek_key] = ( + loc - 1, + ParseException( + instring, loc, "Forward recursion without base case", self + ), + ) + if doActions: + memo[act_key] = memo[peek_key] + while True: + try: + new_loc, new_peek = super().parseImpl(instring, loc, False) + except ParseException: + # we failed before getting any match – do not hide the error + if isinstance(prev_peek, Exception): + raise + new_loc, new_peek = prev_loc, prev_peek + # the match did not get better: we are done + if new_loc <= prev_loc: + if doActions: + # replace the match for doActions=False as well, + # in case the action did backtrack + prev_loc, prev_result = memo[peek_key] = memo[act_key] + del memo[peek_key], memo[act_key] + return prev_loc, prev_result.copy() + del memo[peek_key] + return prev_loc, prev_peek.copy() + # the match did get better: see if we can improve further + else: + if doActions: + try: + memo[act_key] = super().parseImpl(instring, loc, True) + except ParseException as e: + memo[peek_key] = memo[act_key] = (new_loc, e) + raise + prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek + + def leave_whitespace(self, recursive: bool = True) -> ParserElement: + self.skipWhitespace = False + return self + + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: + self.skipWhitespace = True + return self + + def streamline(self) -> ParserElement: + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate(self, validateTrace=None) -> None: + if validateTrace is None: + validateTrace = [] + + if self not in validateTrace: + tmp = validateTrace[:] + [self] + if self.expr is not None: + self.expr.validate(tmp) + self._checkRecursion([]) + + def _generateDefaultName(self): + # Avoid infinite recursion by setting a temporary _defaultName + self._defaultName = ": ..." + + # Use the string representation of main expression. + retString = "..." + try: + if self.expr is not None: + retString = str(self.expr)[:1000] + else: + retString = "None" + finally: + return self.__class__.__name__ + ": " + retString + + def copy(self) -> ParserElement: + if self.expr is not None: + return super().copy() + else: + ret = Forward() + ret <<= self + return ret + + def _setResultsName(self, name, list_all_matches=False): + if ( + __diag__.warn_name_set_on_empty_Forward + and Diagnostics.warn_name_set_on_empty_Forward + not in self.suppress_warnings_ + ): + if self.expr is None: + warnings.warn( + "{}: setting results name {!r} on {} expression " + "that has no contained expression".format( + "warn_name_set_on_empty_Forward", name, type(self).__name__ + ), + stacklevel=3, + ) + + return super()._setResultsName(name, list_all_matches) + + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class TokenConverter(ParseElementEnhance): + """ + Abstract subclass of :class:`ParseExpression`, for converting parsed results. + """ + + def __init__(self, expr: Union[ParserElement, str], savelist=False): + super().__init__(expr) # , savelist) + self.saveAsList = False + + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the + input string; this can be disabled by specifying + ``'adjacent=False'`` in the constructor. + + Example:: + + real = Word(nums) + '.' + Word(nums) + print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] + # will also erroneously match the following + print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] + + real = Combine(Word(nums) + '.' + Word(nums)) + print(real.parse_string('3.1416')) # -> ['3.1416'] + # no match when there are internal spaces + print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) + """ + + def __init__( + self, + expr: ParserElement, + join_string: str = "", + adjacent: bool = True, + *, + joinString: typing.Optional[str] = None, + ): + super().__init__(expr) + joinString = joinString if joinString is not None else join_string + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leave_whitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + self.callPreparse = True + + def ignore(self, other) -> ParserElement: + if self.adjacent: + ParserElement.ignore(self, other) + else: + super().ignore(other) + return self + + def postParse(self, instring, loc, tokenlist): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults( + ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults + ) + + if self.resultsName and retToks.haskeys(): + return [retToks] + else: + return retToks + + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for + returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. + + The optional ``aslist`` argument when set to True will return the + parsed tokens as a Python list instead of a pyparsing ParseResults. + + Example:: + + ident = Word(alphas) + num = Word(nums) + term = ident | num + func = ident + Opt(delimited_list(term)) + print(func.parse_string("fn a, b, 100")) + # -> ['fn', 'a', 'b', '100'] + + func = ident + Group(Opt(delimited_list(term))) + print(func.parse_string("fn a, b, 100")) + # -> ['fn', ['a', 'b', '100']] + """ + + def __init__(self, expr: ParserElement, aslist: bool = False): + super().__init__(expr) + self.saveAsList = True + self._asPythonList = aslist + + def postParse(self, instring, loc, tokenlist): + if self._asPythonList: + return ParseResults.List( + tokenlist.asList() + if isinstance(tokenlist, ParseResults) + else list(tokenlist) + ) + else: + return [tokenlist] + + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also + as a dictionary. Each element can also be referenced using the first + token in the expression as its key. Useful for tabular report + scraping when the first column can be used as a item key. + + The optional ``asdict`` argument when set to True will return the + parsed tokens as a Python dict instead of a pyparsing ParseResults. + + Example:: + + data_word = Word(alphas) + label = data_word + FollowedBy(':') + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + + # print attributes as plain groups + print(attr_expr[1, ...].parse_string(text).dump()) + + # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names + result = Dict(Group(attr_expr)[1, ...]).parse_string(text) + print(result.dump()) + + # access named fields as dict entries, or output as dict + print(result['shape']) + print(result.as_dict()) + + prints:: + + ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] + [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] + - color: 'light blue' + - posn: 'upper left' + - shape: 'SQUARE' + - texture: 'burlap' + SQUARE + {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} + + See more examples at :class:`ParseResults` of accessing fields by results name. + """ + + def __init__(self, expr: ParserElement, asdict: bool = False): + super().__init__(expr) + self.saveAsList = True + self._asPythonDict = asdict + + def postParse(self, instring, loc, tokenlist): + for i, tok in enumerate(tokenlist): + if len(tok) == 0: + continue + + ikey = tok[0] + if isinstance(ikey, int): + ikey = str(ikey).strip() + + if len(tok) == 1: + tokenlist[ikey] = _ParseResultsWithOffset("", i) + + elif len(tok) == 2 and not isinstance(tok[1], ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) + + else: + try: + dictvalue = tok.copy() # ParseResults(i) + except Exception: + exc = TypeError( + "could not extract dict values from parsed results" + " - Dict expression must contain Grouped expressions" + ) + raise exc from None + + del dictvalue[0] + + if len(dictvalue) != 1 or ( + isinstance(dictvalue, ParseResults) and dictvalue.haskeys() + ): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) + + if self._asPythonDict: + return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() + else: + return [tokenlist] if self.resultsName else tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression. + + Example:: + + source = "a, b, c,d" + wd = Word(alphas) + wd_list1 = wd + (',' + wd)[...] + print(wd_list1.parse_string(source)) + + # often, delimiters that are useful during parsing are just in the + # way afterward - use Suppress to keep them out of the parsed output + wd_list2 = wd + (Suppress(',') + wd)[...] + print(wd_list2.parse_string(source)) + + # Skipped text (using '...') can be suppressed as well + source = "lead in START relevant text END trailing text" + start_marker = Keyword("START") + end_marker = Keyword("END") + find_body = Suppress(...) + start_marker + ... + end_marker + print(find_body.parse_string(source) + + prints:: + + ['a', ',', 'b', ',', 'c', ',', 'd'] + ['a', 'b', 'c', 'd'] + ['START', 'relevant text ', 'END'] + + (See also :class:`delimited_list`.) + """ + + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + if expr is ...: + expr = _PendingSkip(NoMatch()) + super().__init__(expr) + + def __add__(self, other) -> "ParserElement": + if isinstance(self.expr, _PendingSkip): + return Suppress(SkipTo(other)) + other + else: + return super().__add__(other) + + def __sub__(self, other) -> "ParserElement": + if isinstance(self.expr, _PendingSkip): + return Suppress(SkipTo(other)) - other + else: + return super().__sub__(other) + + def postParse(self, instring, loc, tokenlist): + return [] + + def suppress(self) -> ParserElement: + return self + + +def trace_parse_action(f: ParseAction) -> ParseAction: + """Decorator for debugging parse actions. + + When the parse action is called, this decorator will print + ``">> entering method-name(line:, , )"``. + When the parse action completes, the decorator will print + ``"<<"`` followed by the returned value, or any exception that the parse action raised. + + Example:: + + wd = Word(alphas) + + @trace_parse_action + def remove_duplicate_chars(tokens): + return ''.join(sorted(set(''.join(tokens)))) + + wds = wd[1, ...].set_parse_action(remove_duplicate_chars) + print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) + + prints:: + + >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) + < 3: + thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc + sys.stderr.write( + ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t) + ) + try: + ret = f(*paArgs) + except Exception as exc: + sys.stderr.write("< str: + r"""Helper to easily define string ranges for use in :class:`Word` + construction. Borrows syntax from regexp ``'[]'`` string range + definitions:: + + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + + The input string must be enclosed in []'s, and the returned string + is the expanded character set joined into a single string. The + values enclosed in the []'s may be: + + - a single character + - an escaped character with a leading backslash (such as ``\-`` + or ``\]``) + - an escaped hex character with a leading ``'\x'`` + (``\x21``, which is a ``'!'`` character) (``\0x##`` + is also supported for backwards compatibility) + - an escaped octal character with a leading ``'\0'`` + (``\041``, which is a ``'!'`` character) + - a range of any of the above, separated by a dash (``'a-z'``, + etc.) + - any combination of the above (``'aeiouy'``, + ``'a-zA-Z0-9_$'``, etc.) + """ + _expanded = ( + lambda p: p + if not isinstance(p, ParseResults) + else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) + ) + try: + return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) + except Exception: + return "" + + +def token_map(func, *args) -> ParseAction: + """Helper to define a parse action by mapping a function to all + elements of a :class:`ParseResults` list. If any additional args are passed, + they are forwarded to the given function as additional arguments + after the token, as in + ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, + which will convert the parsed data to an integer using base 16. + + Example (compare the last to example in :class:`ParserElement.transform_string`:: + + hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) + hex_ints.run_tests(''' + 00 11 22 aa FF 0a 0d 1a + ''') + + upperword = Word(alphas).set_parse_action(token_map(str.upper)) + upperword[1, ...].run_tests(''' + my kingdom for a horse + ''') + + wd = Word(alphas).set_parse_action(token_map(str.title)) + wd[1, ...].set_parse_action(' '.join).run_tests(''' + now is the winter of our discontent made glorious summer by this sun of york + ''') + + prints:: + + 00 11 22 aa FF 0a 0d 1a + [0, 17, 34, 170, 255, 10, 13, 26] + + my kingdom for a horse + ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] + + now is the winter of our discontent made glorious summer by this sun of york + ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] + """ + + def pa(s, l, t): + return [func(tokn, *args) for tokn in t] + + func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) + pa.__name__ = func_name + + return pa + + +def autoname_elements() -> None: + """ + Utility to simplify mass-naming of parser elements, for + generating railroad diagram with named subdiagrams. + """ + for name, var in sys._getframe().f_back.f_locals.items(): + if isinstance(var, ParserElement) and not var.customName: + var.set_name(name) + + +dbl_quoted_string = Combine( + Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' +).set_name("string enclosed in double quotes") + +sgl_quoted_string = Combine( + Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" +).set_name("string enclosed in single quotes") + +quoted_string = Combine( + Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' + | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" +).set_name("quotedString using single or double quotes") + +unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") + + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +# build list of built-in expressions, for future reference if a global default value +# gets updated +_builtin_exprs: List[ParserElement] = [ + v for v in vars().values() if isinstance(v, ParserElement) +] + +# backward compatibility names +tokenMap = token_map +conditionAsParseAction = condition_as_parse_action +nullDebugAction = null_debug_action +sglQuotedString = sgl_quoted_string +dblQuotedString = dbl_quoted_string +quotedString = quoted_string +unicodeString = unicode_string +lineStart = line_start +lineEnd = line_end +stringStart = string_start +stringEnd = string_end +traceParseAction = trace_parse_action diff --git a/libs/common/pkg_resources/_vendor/pyparsing/diagram/__init__.py b/libs/common/pkg_resources/_vendor/pyparsing/diagram/__init__.py new file mode 100644 index 00000000..89864475 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/diagram/__init__.py @@ -0,0 +1,642 @@ +import railroad +import pyparsing +import typing +from typing import ( + List, + NamedTuple, + Generic, + TypeVar, + Dict, + Callable, + Set, + Iterable, +) +from jinja2 import Template +from io import StringIO +import inspect + + +jinja2_template_source = """\ + + + + {% if not head %} + + {% else %} + {{ head | safe }} + {% endif %} + + +{{ body | safe }} +{% for diagram in diagrams %} +
+

{{ diagram.title }}

+
{{ diagram.text }}
+
+ {{ diagram.svg }} +
+
+{% endfor %} + + +""" + +template = Template(jinja2_template_source) + +# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet +NamedDiagram = NamedTuple( + "NamedDiagram", + [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)], +) +""" +A simple structure for associating a name with a railroad diagram +""" + +T = TypeVar("T") + + +class EachItem(railroad.Group): + """ + Custom railroad item to compose a: + - Group containing a + - OneOrMore containing a + - Choice of the elements in the Each + with the group label indicating that all must be matched + """ + + all_label = "[ALL]" + + def __init__(self, *items): + choice_item = railroad.Choice(len(items) - 1, *items) + one_or_more_item = railroad.OneOrMore(item=choice_item) + super().__init__(one_or_more_item, label=self.all_label) + + +class AnnotatedItem(railroad.Group): + """ + Simple subclass of Group that creates an annotation label + """ + + def __init__(self, label: str, item): + super().__init__(item=item, label="[{}]".format(label) if label else label) + + +class EditablePartial(Generic[T]): + """ + Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been + constructed. + """ + + # We need this here because the railroad constructors actually transform the data, so can't be called until the + # entire tree is assembled + + def __init__(self, func: Callable[..., T], args: list, kwargs: dict): + self.func = func + self.args = args + self.kwargs = kwargs + + @classmethod + def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]": + """ + If you call this function in the same way that you would call the constructor, it will store the arguments + as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3) + """ + return EditablePartial(func=func, args=list(args), kwargs=kwargs) + + @property + def name(self): + return self.kwargs["name"] + + def __call__(self) -> T: + """ + Evaluate the partial and return the result + """ + args = self.args.copy() + kwargs = self.kwargs.copy() + + # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g. + # args=['list', 'of', 'things']) + arg_spec = inspect.getfullargspec(self.func) + if arg_spec.varargs in self.kwargs: + args += kwargs.pop(arg_spec.varargs) + + return self.func(*args, **kwargs) + + +def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str: + """ + Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams + :params kwargs: kwargs to be passed in to the template + """ + data = [] + for diagram in diagrams: + if diagram.diagram is None: + continue + io = StringIO() + diagram.diagram.writeSvg(io.write) + title = diagram.name + if diagram.index == 0: + title += " (root)" + data.append({"title": title, "text": "", "svg": io.getvalue()}) + + return template.render(diagrams=data, **kwargs) + + +def resolve_partial(partial: "EditablePartial[T]") -> T: + """ + Recursively resolves a collection of Partials into whatever type they are + """ + if isinstance(partial, EditablePartial): + partial.args = resolve_partial(partial.args) + partial.kwargs = resolve_partial(partial.kwargs) + return partial() + elif isinstance(partial, list): + return [resolve_partial(x) for x in partial] + elif isinstance(partial, dict): + return {key: resolve_partial(x) for key, x in partial.items()} + else: + return partial + + +def to_railroad( + element: pyparsing.ParserElement, + diagram_kwargs: typing.Optional[dict] = None, + vertical: int = 3, + show_results_names: bool = False, + show_groups: bool = False, +) -> List[NamedDiagram]: + """ + Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram + creation if you want to access the Railroad tree before it is converted to HTML + :param element: base element of the parser being diagrammed + :param diagram_kwargs: kwargs to pass to the Diagram() constructor + :param vertical: (optional) - int - limit at which number of alternatives should be + shown vertically instead of horizontally + :param show_results_names - bool to indicate whether results name annotations should be + included in the diagram + :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled + surrounding box + """ + # Convert the whole tree underneath the root + lookup = ConverterState(diagram_kwargs=diagram_kwargs or {}) + _to_diagram_element( + element, + lookup=lookup, + parent=None, + vertical=vertical, + show_results_names=show_results_names, + show_groups=show_groups, + ) + + root_id = id(element) + # Convert the root if it hasn't been already + if root_id in lookup: + if not element.customName: + lookup[root_id].name = "" + lookup[root_id].mark_for_extraction(root_id, lookup, force=True) + + # Now that we're finished, we can convert from intermediate structures into Railroad elements + diags = list(lookup.diagrams.values()) + if len(diags) > 1: + # collapse out duplicate diags with the same name + seen = set() + deduped_diags = [] + for d in diags: + # don't extract SkipTo elements, they are uninformative as subdiagrams + if d.name == "...": + continue + if d.name is not None and d.name not in seen: + seen.add(d.name) + deduped_diags.append(d) + resolved = [resolve_partial(partial) for partial in deduped_diags] + else: + # special case - if just one diagram, always display it, even if + # it has no name + resolved = [resolve_partial(partial) for partial in diags] + return sorted(resolved, key=lambda diag: diag.index) + + +def _should_vertical( + specification: int, exprs: Iterable[pyparsing.ParserElement] +) -> bool: + """ + Returns true if we should return a vertical list of elements + """ + if specification is None: + return False + else: + return len(_visible_exprs(exprs)) >= specification + + +class ElementState: + """ + State recorded for an individual pyparsing Element + """ + + # Note: this should be a dataclass, but we have to support Python 3.5 + def __init__( + self, + element: pyparsing.ParserElement, + converted: EditablePartial, + parent: EditablePartial, + number: int, + name: str = None, + parent_index: typing.Optional[int] = None, + ): + #: The pyparsing element that this represents + self.element: pyparsing.ParserElement = element + #: The name of the element + self.name: typing.Optional[str] = name + #: The output Railroad element in an unconverted state + self.converted: EditablePartial = converted + #: The parent Railroad element, which we store so that we can extract this if it's duplicated + self.parent: EditablePartial = parent + #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram + self.number: int = number + #: The index of this inside its parent + self.parent_index: typing.Optional[int] = parent_index + #: If true, we should extract this out into a subdiagram + self.extract: bool = False + #: If true, all of this element's children have been filled out + self.complete: bool = False + + def mark_for_extraction( + self, el_id: int, state: "ConverterState", name: str = None, force: bool = False + ): + """ + Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram + :param el_id: id of the element + :param state: element/diagram state tracker + :param name: name to use for this element's text + :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the + root element when we know we're finished + """ + self.extract = True + + # Set the name + if not self.name: + if name: + # Allow forcing a custom name + self.name = name + elif self.element.customName: + self.name = self.element.customName + else: + self.name = "" + + # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children + # to be added + # Also, if this is just a string literal etc, don't bother extracting it + if force or (self.complete and _worth_extracting(self.element)): + state.extract_into_diagram(el_id) + + +class ConverterState: + """ + Stores some state that persists between recursions into the element tree + """ + + def __init__(self, diagram_kwargs: typing.Optional[dict] = None): + #: A dictionary mapping ParserElements to state relating to them + self._element_diagram_states: Dict[int, ElementState] = {} + #: A dictionary mapping ParserElement IDs to subdiagrams generated from them + self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {} + #: The index of the next unnamed element + self.unnamed_index: int = 1 + #: The index of the next element. This is used for sorting + self.index: int = 0 + #: Shared kwargs that are used to customize the construction of diagrams + self.diagram_kwargs: dict = diagram_kwargs or {} + self.extracted_diagram_names: Set[str] = set() + + def __setitem__(self, key: int, value: ElementState): + self._element_diagram_states[key] = value + + def __getitem__(self, key: int) -> ElementState: + return self._element_diagram_states[key] + + def __delitem__(self, key: int): + del self._element_diagram_states[key] + + def __contains__(self, key: int): + return key in self._element_diagram_states + + def generate_unnamed(self) -> int: + """ + Generate a number used in the name of an otherwise unnamed diagram + """ + self.unnamed_index += 1 + return self.unnamed_index + + def generate_index(self) -> int: + """ + Generate a number used to index a diagram + """ + self.index += 1 + return self.index + + def extract_into_diagram(self, el_id: int): + """ + Used when we encounter the same token twice in the same tree. When this + happens, we replace all instances of that token with a terminal, and + create a new subdiagram for the token + """ + position = self[el_id] + + # Replace the original definition of this element with a regular block + if position.parent: + ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name) + if "item" in position.parent.kwargs: + position.parent.kwargs["item"] = ret + elif "items" in position.parent.kwargs: + position.parent.kwargs["items"][position.parent_index] = ret + + # If the element we're extracting is a group, skip to its content but keep the title + if position.converted.func == railroad.Group: + content = position.converted.kwargs["item"] + else: + content = position.converted + + self.diagrams[el_id] = EditablePartial.from_call( + NamedDiagram, + name=position.name, + diagram=EditablePartial.from_call( + railroad.Diagram, content, **self.diagram_kwargs + ), + index=position.number, + ) + + del self[el_id] + + +def _worth_extracting(element: pyparsing.ParserElement) -> bool: + """ + Returns true if this element is worth having its own sub-diagram. Simply, if any of its children + themselves have children, then its complex enough to extract + """ + children = element.recurse() + return any(child.recurse() for child in children) + + +def _apply_diagram_item_enhancements(fn): + """ + decorator to ensure enhancements to a diagram item (such as results name annotations) + get applied on return from _to_diagram_element (we do this since there are several + returns in _to_diagram_element) + """ + + def _inner( + element: pyparsing.ParserElement, + parent: typing.Optional[EditablePartial], + lookup: ConverterState = None, + vertical: int = None, + index: int = 0, + name_hint: str = None, + show_results_names: bool = False, + show_groups: bool = False, + ) -> typing.Optional[EditablePartial]: + + ret = fn( + element, + parent, + lookup, + vertical, + index, + name_hint, + show_results_names, + show_groups, + ) + + # apply annotation for results name, if present + if show_results_names and ret is not None: + element_results_name = element.resultsName + if element_results_name: + # add "*" to indicate if this is a "list all results" name + element_results_name += "" if element.modalResults else "*" + ret = EditablePartial.from_call( + railroad.Group, item=ret, label=element_results_name + ) + + return ret + + return _inner + + +def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]): + non_diagramming_exprs = ( + pyparsing.ParseElementEnhance, + pyparsing.PositionToken, + pyparsing.And._ErrorStop, + ) + return [ + e + for e in exprs + if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs)) + ] + + +@_apply_diagram_item_enhancements +def _to_diagram_element( + element: pyparsing.ParserElement, + parent: typing.Optional[EditablePartial], + lookup: ConverterState = None, + vertical: int = None, + index: int = 0, + name_hint: str = None, + show_results_names: bool = False, + show_groups: bool = False, +) -> typing.Optional[EditablePartial]: + """ + Recursively converts a PyParsing Element to a railroad Element + :param lookup: The shared converter state that keeps track of useful things + :param index: The index of this element within the parent + :param parent: The parent of this element in the output tree + :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default), + it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never + do so + :param name_hint: If provided, this will override the generated name + :param show_results_names: bool flag indicating whether to add annotations for results names + :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed + :param show_groups: bool flag indicating whether to show groups using bounding box + """ + exprs = element.recurse() + name = name_hint or element.customName or element.__class__.__name__ + + # Python's id() is used to provide a unique identifier for elements + el_id = id(element) + + element_results_name = element.resultsName + + # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram + if not element.customName: + if isinstance( + element, + ( + # pyparsing.TokenConverter, + # pyparsing.Forward, + pyparsing.Located, + ), + ): + # However, if this element has a useful custom name, and its child does not, we can pass it on to the child + if exprs: + if not exprs[0].customName: + propagated_name = name + else: + propagated_name = None + + return _to_diagram_element( + element.expr, + parent=parent, + lookup=lookup, + vertical=vertical, + index=index, + name_hint=propagated_name, + show_results_names=show_results_names, + show_groups=show_groups, + ) + + # If the element isn't worth extracting, we always treat it as the first time we say it + if _worth_extracting(element): + if el_id in lookup: + # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, + # so we have to extract it into a new diagram. + looked_up = lookup[el_id] + looked_up.mark_for_extraction(el_id, lookup, name=name_hint) + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) + return ret + + elif el_id in lookup.diagrams: + # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we + # just put in a marker element that refers to the sub-diagram + ret = EditablePartial.from_call( + railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + ) + return ret + + # Recursively convert child elements + # Here we find the most relevant Railroad element for matching pyparsing Element + # We use ``items=[]`` here to hold the place for where the child elements will go once created + if isinstance(element, pyparsing.And): + # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat + # (all will have the same name, and resultsName) + if not exprs: + return None + if len(set((e.name, e.resultsName) for e in exprs)) == 1: + ret = EditablePartial.from_call( + railroad.OneOrMore, item="", repeat=str(len(exprs)) + ) + elif _should_vertical(vertical, exprs): + ret = EditablePartial.from_call(railroad.Stack, items=[]) + else: + ret = EditablePartial.from_call(railroad.Sequence, items=[]) + elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)): + if not exprs: + return None + if _should_vertical(vertical, exprs): + ret = EditablePartial.from_call(railroad.Choice, 0, items=[]) + else: + ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[]) + elif isinstance(element, pyparsing.Each): + if not exprs: + return None + ret = EditablePartial.from_call(EachItem, items=[]) + elif isinstance(element, pyparsing.NotAny): + ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="") + elif isinstance(element, pyparsing.FollowedBy): + ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="") + elif isinstance(element, pyparsing.PrecededBy): + ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="") + elif isinstance(element, pyparsing.Group): + if show_groups: + ret = EditablePartial.from_call(AnnotatedItem, label="", item="") + else: + ret = EditablePartial.from_call(railroad.Group, label="", item="") + elif isinstance(element, pyparsing.TokenConverter): + ret = EditablePartial.from_call( + AnnotatedItem, label=type(element).__name__.lower(), item="" + ) + elif isinstance(element, pyparsing.Opt): + ret = EditablePartial.from_call(railroad.Optional, item="") + elif isinstance(element, pyparsing.OneOrMore): + ret = EditablePartial.from_call(railroad.OneOrMore, item="") + elif isinstance(element, pyparsing.ZeroOrMore): + ret = EditablePartial.from_call(railroad.ZeroOrMore, item="") + elif isinstance(element, pyparsing.Group): + ret = EditablePartial.from_call( + railroad.Group, item=None, label=element_results_name + ) + elif isinstance(element, pyparsing.Empty) and not element.customName: + # Skip unnamed "Empty" elements + ret = None + elif len(exprs) > 1: + ret = EditablePartial.from_call(railroad.Sequence, items=[]) + elif len(exprs) > 0 and not element_results_name: + ret = EditablePartial.from_call(railroad.Group, item="", label=name) + else: + terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName) + ret = terminal + + if ret is None: + return + + # Indicate this element's position in the tree so we can extract it if necessary + lookup[el_id] = ElementState( + element=element, + converted=ret, + parent=parent, + parent_index=index, + number=lookup.generate_index(), + ) + if element.customName: + lookup[el_id].mark_for_extraction(el_id, lookup, element.customName) + + i = 0 + for expr in exprs: + # Add a placeholder index in case we have to extract the child before we even add it to the parent + if "items" in ret.kwargs: + ret.kwargs["items"].insert(i, None) + + item = _to_diagram_element( + expr, + parent=ret, + lookup=lookup, + vertical=vertical, + index=i, + show_results_names=show_results_names, + show_groups=show_groups, + ) + + # Some elements don't need to be shown in the diagram + if item is not None: + if "item" in ret.kwargs: + ret.kwargs["item"] = item + elif "items" in ret.kwargs: + # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal + ret.kwargs["items"][i] = item + i += 1 + elif "items" in ret.kwargs: + # If we're supposed to skip this element, remove it from the parent + del ret.kwargs["items"][i] + + # If all this items children are none, skip this item + if ret and ( + ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0) + or ("item" in ret.kwargs and ret.kwargs["item"] is None) + ): + ret = EditablePartial.from_call(railroad.Terminal, name) + + # Mark this element as "complete", ie it has all of its children + if el_id in lookup: + lookup[el_id].complete = True + + if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete: + lookup.extract_into_diagram(el_id) + if ret is not None: + ret = EditablePartial.from_call( + railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + ) + + return ret diff --git a/libs/common/pkg_resources/_vendor/pyparsing/exceptions.py b/libs/common/pkg_resources/_vendor/pyparsing/exceptions.py new file mode 100644 index 00000000..a38447bb --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/exceptions.py @@ -0,0 +1,267 @@ +# exceptions.py + +import re +import sys +import typing + +from .util import col, line, lineno, _collapse_string_to_ranges +from .unicode import pyparsing_unicode as ppu + + +class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic): + pass + + +_extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums) +_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.") + + +class ParseBaseException(Exception): + """base exception class for all parsing runtime exceptions""" + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( + self, + pstr: str, + loc: int = 0, + msg: typing.Optional[str] = None, + elem=None, + ): + self.loc = loc + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr + self.parser_element = self.parserElement = elem + self.args = (pstr, loc, msg) + + @staticmethod + def explain_exception(exc, depth=16): + """ + Method to take an exception and translate the Python internal traceback into a list + of the pyparsing expressions that caused the exception to be raised. + + Parameters: + + - exc - exception raised during parsing (need not be a ParseException, in support + of Python exceptions that might be raised in a parse action) + - depth (default=16) - number of levels back in the stack trace to list expression + and function names; if None, the full stack trace names will be listed; if 0, only + the failing input line, marker, and exception string will be shown + + Returns a multi-line string listing the ParserElements and/or function names in the + exception's stack trace. + """ + import inspect + from .core import ParserElement + + if depth is None: + depth = sys.getrecursionlimit() + ret = [] + if isinstance(exc, ParseBaseException): + ret.append(exc.line) + ret.append(" " * (exc.column - 1) + "^") + ret.append("{}: {}".format(type(exc).__name__, exc)) + + if depth > 0: + callers = inspect.getinnerframes(exc.__traceback__, context=depth) + seen = set() + for i, ff in enumerate(callers[-depth:]): + frm = ff[0] + + f_self = frm.f_locals.get("self", None) + if isinstance(f_self, ParserElement): + if frm.f_code.co_name not in ("parseImpl", "_parseNoCache"): + continue + if id(f_self) in seen: + continue + seen.add(id(f_self)) + + self_type = type(f_self) + ret.append( + "{}.{} - {}".format( + self_type.__module__, self_type.__name__, f_self + ) + ) + + elif f_self is not None: + self_type = type(f_self) + ret.append("{}.{}".format(self_type.__module__, self_type.__name__)) + + else: + code = frm.f_code + if code.co_name in ("wrapper", ""): + continue + + ret.append("{}".format(code.co_name)) + + depth -= 1 + if not depth: + break + + return "\n".join(ret) + + @classmethod + def _from_exception(cls, pe): + """ + internal factory method to simplify creating one type of ParseException + from another - avoids having __init__ signature conflicts among subclasses + """ + return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) + + @property + def line(self) -> str: + """ + Return the line of text where the exception occurred. + """ + return line(self.loc, self.pstr) + + @property + def lineno(self) -> int: + """ + Return the 1-based line number of text where the exception occurred. + """ + return lineno(self.loc, self.pstr) + + @property + def col(self) -> int: + """ + Return the 1-based column on the line of text where the exception occurred. + """ + return col(self.loc, self.pstr) + + @property + def column(self) -> int: + """ + Return the 1-based column on the line of text where the exception occurred. + """ + return col(self.loc, self.pstr) + + def __str__(self) -> str: + if self.pstr: + if self.loc >= len(self.pstr): + foundstr = ", found end of text" + else: + # pull out next word at error location + found_match = _exception_word_extractor.match(self.pstr, self.loc) + if found_match is not None: + found = found_match.group(0) + else: + found = self.pstr[self.loc : self.loc + 1] + foundstr = (", found %r" % found).replace(r"\\", "\\") + else: + foundstr = "" + return "{}{} (at char {}), (line:{}, col:{})".format( + self.msg, foundstr, self.loc, self.lineno, self.column + ) + + def __repr__(self): + return str(self) + + def mark_input_line(self, marker_string: str = None, *, markerString=">!<") -> str: + """ + Extracts the exception line from the input string, and marks + the location of the exception with a special symbol. + """ + markerString = marker_string if marker_string is not None else markerString + line_str = self.line + line_column = self.column - 1 + if markerString: + line_str = "".join( + (line_str[:line_column], markerString, line_str[line_column:]) + ) + return line_str.strip() + + def explain(self, depth=16) -> str: + """ + Method to translate the Python internal traceback into a list + of the pyparsing expressions that caused the exception to be raised. + + Parameters: + + - depth (default=16) - number of levels back in the stack trace to list expression + and function names; if None, the full stack trace names will be listed; if 0, only + the failing input line, marker, and exception string will be shown + + Returns a multi-line string listing the ParserElements and/or function names in the + exception's stack trace. + + Example:: + + expr = pp.Word(pp.nums) * 3 + try: + expr.parse_string("123 456 A789") + except pp.ParseException as pe: + print(pe.explain(depth=0)) + + prints:: + + 123 456 A789 + ^ + ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9) + + Note: the diagnostic output will include string representations of the expressions + that failed to parse. These representations will be more helpful if you use `set_name` to + give identifiable names to your expressions. Otherwise they will use the default string + forms, which may be cryptic to read. + + Note: pyparsing's default truncation of exception tracebacks may also truncate the + stack of expressions that are displayed in the ``explain`` output. To get the full listing + of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True`` + """ + return self.explain_exception(self, depth) + + markInputline = mark_input_line + + +class ParseException(ParseBaseException): + """ + Exception thrown when a parse expression doesn't match the input string + + Example:: + + try: + Word(nums).set_name("integer").parse_string("ABC") + except ParseException as pe: + print(pe) + print("column: {}".format(pe.column)) + + prints:: + + Expected integer (at char 0), (line:1, col:1) + column: 1 + + """ + + +class ParseFatalException(ParseBaseException): + """ + User-throwable exception thrown when inconsistent parse content + is found; stops all parsing immediately + """ + + +class ParseSyntaxException(ParseFatalException): + """ + Just like :class:`ParseFatalException`, but thrown internally + when an :class:`ErrorStop` ('-' operator) indicates + that parsing is to stop immediately because an unbacktrackable + syntax error has been found. + """ + + +class RecursiveGrammarException(Exception): + """ + Exception thrown by :class:`ParserElement.validate` if the + grammar could be left-recursive; parser may need to enable + left recursion using :class:`ParserElement.enable_left_recursion` + """ + + def __init__(self, parseElementList): + self.parseElementTrace = parseElementList + + def __str__(self) -> str: + return "RecursiveGrammarException: {}".format(self.parseElementTrace) diff --git a/libs/common/pkg_resources/_vendor/pyparsing/helpers.py b/libs/common/pkg_resources/_vendor/pyparsing/helpers.py new file mode 100644 index 00000000..9588b3b7 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/helpers.py @@ -0,0 +1,1088 @@ +# helpers.py +import html.entities +import re +import typing + +from . import __diag__ +from .core import * +from .util import _bslash, _flatten, _escape_regex_range_chars + + +# +# global helpers +# +def delimited_list( + expr: Union[str, ParserElement], + delim: Union[str, ParserElement] = ",", + combine: bool = False, + min: typing.Optional[int] = None, + max: typing.Optional[int] = None, + *, + allow_trailing_delim: bool = False, +) -> ParserElement: + """Helper to define a delimited list of expressions - the delimiter + defaults to ','. By default, the list elements and delimiters can + have intervening whitespace, and comments, but this can be + overridden by passing ``combine=True`` in the constructor. If + ``combine`` is set to ``True``, the matching tokens are + returned as a single token string, with the delimiters included; + otherwise, the matching tokens are returned as a list of tokens, + with the delimiters suppressed. + + If ``allow_trailing_delim`` is set to True, then the list may end with + a delimiter. + + Example:: + + delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] + delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] + """ + if isinstance(expr, str_type): + expr = ParserElement._literalStringClass(expr) + + dlName = "{expr} [{delim} {expr}]...{end}".format( + expr=str(expr.copy().streamline()), + delim=str(delim), + end=" [{}]".format(str(delim)) if allow_trailing_delim else "", + ) + + if not combine: + delim = Suppress(delim) + + if min is not None: + if min < 1: + raise ValueError("min must be greater than 0") + min -= 1 + if max is not None: + if min is not None and max <= min: + raise ValueError("max must be greater than, or equal to min") + max -= 1 + delimited_list_expr = expr + (delim + expr)[min, max] + + if allow_trailing_delim: + delimited_list_expr += Opt(delim) + + if combine: + return Combine(delimited_list_expr).set_name(dlName) + else: + return delimited_list_expr.set_name(dlName) + + +def counted_array( + expr: ParserElement, + int_expr: typing.Optional[ParserElement] = None, + *, + intExpr: typing.Optional[ParserElement] = None, +) -> ParserElement: + """Helper to define a counted list of expressions. + + This helper defines a pattern of the form:: + + integer expr expr expr... + + where the leading integer tells how many expr expressions follow. + The matched tokens returns the array of expr tokens as a list - the + leading count token is suppressed. + + If ``int_expr`` is specified, it should be a pyparsing expression + that produces an integer value. + + Example:: + + counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] + + # in this parser, the leading integer value is given in binary, + # '10' indicating that 2 values are in the array + binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) + counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] + + # if other fields must be parsed after the count but before the + # list items, give the fields results names and they will + # be preserved in the returned ParseResults: + count_with_metadata = integer + Word(alphas)("type") + typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") + result = typed_array.parse_string("3 bool True True False") + print(result.dump()) + + # prints + # ['True', 'True', 'False'] + # - items: ['True', 'True', 'False'] + # - type: 'bool' + """ + intExpr = intExpr or int_expr + array_expr = Forward() + + def count_field_parse_action(s, l, t): + nonlocal array_expr + n = t[0] + array_expr <<= (expr * n) if n else Empty() + # clear list contents, but keep any named results + del t[:] + + if intExpr is None: + intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) + else: + intExpr = intExpr.copy() + intExpr.set_name("arrayLen") + intExpr.add_parse_action(count_field_parse_action, call_during_try=True) + return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") + + +def match_previous_literal(expr: ParserElement) -> ParserElement: + """Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks for + a 'repeat' of a previous expression. For example:: + + first = Word(nums) + second = match_previous_literal(first) + match_expr = first + ":" + second + + will match ``"1:1"``, but not ``"1:2"``. Because this + matches a previous literal, will also match the leading + ``"1:1"`` in ``"1:10"``. If this is not desired, use + :class:`match_previous_expr`. Do *not* use with packrat parsing + enabled. + """ + rep = Forward() + + def copy_token_to_repeater(s, l, t): + if t: + if len(t) == 1: + rep << t[0] + else: + # flatten t tokens + tflat = _flatten(t.as_list()) + rep << And(Literal(tt) for tt in tflat) + else: + rep << Empty() + + expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) + rep.set_name("(prev) " + str(expr)) + return rep + + +def match_previous_expr(expr: ParserElement) -> ParserElement: + """Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks for + a 'repeat' of a previous expression. For example:: + + first = Word(nums) + second = match_previous_expr(first) + match_expr = first + ":" + second + + will match ``"1:1"``, but not ``"1:2"``. Because this + matches by expressions, will *not* match the leading ``"1:1"`` + in ``"1:10"``; the expressions are evaluated first, and then + compared, so ``"1"`` is compared with ``"10"``. Do *not* use + with packrat parsing enabled. + """ + rep = Forward() + e2 = expr.copy() + rep <<= e2 + + def copy_token_to_repeater(s, l, t): + matchTokens = _flatten(t.as_list()) + + def must_match_these_tokens(s, l, t): + theseTokens = _flatten(t.as_list()) + if theseTokens != matchTokens: + raise ParseException( + s, l, "Expected {}, found{}".format(matchTokens, theseTokens) + ) + + rep.set_parse_action(must_match_these_tokens, callDuringTry=True) + + expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) + rep.set_name("(prev) " + str(expr)) + return rep + + +def one_of( + strs: Union[typing.Iterable[str], str], + caseless: bool = False, + use_regex: bool = True, + as_keyword: bool = False, + *, + useRegex: bool = True, + asKeyword: bool = False, +) -> ParserElement: + """Helper to quickly define a set of alternative :class:`Literal` s, + and makes sure to do longest-first testing when there is a conflict, + regardless of the input order, but returns + a :class:`MatchFirst` for best performance. + + Parameters: + + - ``strs`` - a string of space-delimited literals, or a collection of + string literals + - ``caseless`` - treat all literals as caseless - (default= ``False``) + - ``use_regex`` - as an optimization, will + generate a :class:`Regex` object; otherwise, will generate + a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if + creating a :class:`Regex` raises an exception) - (default= ``True``) + - ``as_keyword`` - enforce :class:`Keyword`-style matching on the + generated expressions - (default= ``False``) + - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, + but will be removed in a future release + + Example:: + + comp_oper = one_of("< = > <= >= !=") + var = Word(alphas) + number = Word(nums) + term = var | number + comparison_expr = term + comp_oper + term + print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) + + prints:: + + [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] + """ + asKeyword = asKeyword or as_keyword + useRegex = useRegex and use_regex + + if ( + isinstance(caseless, str_type) + and __diag__.warn_on_multiple_string_args_to_oneof + ): + warnings.warn( + "More than one string argument passed to one_of, pass" + " choices as a list or space-delimited string", + stacklevel=2, + ) + + if caseless: + isequal = lambda a, b: a.upper() == b.upper() + masks = lambda a, b: b.upper().startswith(a.upper()) + parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral + else: + isequal = lambda a, b: a == b + masks = lambda a, b: b.startswith(a) + parseElementClass = Keyword if asKeyword else Literal + + symbols: List[str] = [] + if isinstance(strs, str_type): + symbols = strs.split() + elif isinstance(strs, Iterable): + symbols = list(strs) + else: + raise TypeError("Invalid argument to one_of, expected string or iterable") + if not symbols: + return NoMatch() + + # reorder given symbols to take care to avoid masking longer choices with shorter ones + # (but only if the given symbols are not just single characters) + if any(len(sym) > 1 for sym in symbols): + i = 0 + while i < len(symbols) - 1: + cur = symbols[i] + for j, other in enumerate(symbols[i + 1 :]): + if isequal(other, cur): + del symbols[i + j + 1] + break + elif masks(cur, other): + del symbols[i + j + 1] + symbols.insert(i, other) + break + else: + i += 1 + + if useRegex: + re_flags: int = re.IGNORECASE if caseless else 0 + + try: + if all(len(sym) == 1 for sym in symbols): + # symbols are just single characters, create range regex pattern + patt = "[{}]".format( + "".join(_escape_regex_range_chars(sym) for sym in symbols) + ) + else: + patt = "|".join(re.escape(sym) for sym in symbols) + + # wrap with \b word break markers if defining as keywords + if asKeyword: + patt = r"\b(?:{})\b".format(patt) + + ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) + + if caseless: + # add parse action to return symbols as specified, not in random + # casing as found in input string + symbol_map = {sym.lower(): sym for sym in symbols} + ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) + + return ret + + except re.error: + warnings.warn( + "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 + ) + + # last resort, just use MatchFirst + return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( + " | ".join(symbols) + ) + + +def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: + """Helper to easily and clearly define a dictionary by specifying + the respective patterns for the key and value. Takes care of + defining the :class:`Dict`, :class:`ZeroOrMore`, and + :class:`Group` tokens in the proper order. The key pattern + can include delimiting markers or punctuation, as long as they are + suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the :class:`Dict` results + can include named token fields. + + Example:: + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + print(attr_expr[1, ...].parse_string(text).dump()) + + attr_label = label + attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) + + # similar to Dict, but simpler call format + result = dict_of(attr_label, attr_value).parse_string(text) + print(result.dump()) + print(result['shape']) + print(result.shape) # object attribute access works too + print(result.as_dict()) + + prints:: + + [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] + - color: 'light blue' + - posn: 'upper left' + - shape: 'SQUARE' + - texture: 'burlap' + SQUARE + SQUARE + {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} + """ + return Dict(OneOrMore(Group(key + value))) + + +def original_text_for( + expr: ParserElement, as_string: bool = True, *, asString: bool = True +) -> ParserElement: + """Helper to return the original, untokenized text for a given + expression. Useful to restore the parsed fields of an HTML start + tag into the raw tag text itself, or to revert separate tokens with + intervening whitespace back to the original matching input text. By + default, returns astring containing the original parsed text. + + If the optional ``as_string`` argument is passed as + ``False``, then the return value is + a :class:`ParseResults` containing any results names that + were originally matched, and a single token containing the original + matched text from the input string. So if the expression passed to + :class:`original_text_for` contains expressions with defined + results names, you must set ``as_string`` to ``False`` if you + want to preserve those results name values. + + The ``asString`` pre-PEP8 argument is retained for compatibility, + but will be removed in a future release. + + Example:: + + src = "this is test bold text normal text " + for tag in ("b", "i"): + opener, closer = make_html_tags(tag) + patt = original_text_for(opener + SkipTo(closer) + closer) + print(patt.search_string(src)[0]) + + prints:: + + [' bold text '] + ['text'] + """ + asString = asString and as_string + + locMarker = Empty().set_parse_action(lambda s, loc, t: loc) + endlocMarker = locMarker.copy() + endlocMarker.callPreparse = False + matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") + if asString: + extractText = lambda s, l, t: s[t._original_start : t._original_end] + else: + + def extractText(s, l, t): + t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] + + matchExpr.set_parse_action(extractText) + matchExpr.ignoreExprs = expr.ignoreExprs + matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) + return matchExpr + + +def ungroup(expr: ParserElement) -> ParserElement: + """Helper to undo pyparsing's default grouping of And expressions, + even if all but one are non-empty. + """ + return TokenConverter(expr).add_parse_action(lambda t: t[0]) + + +def locatedExpr(expr: ParserElement) -> ParserElement: + """ + (DEPRECATED - future code should use the Located class) + Helper to decorate a returned token with its starting and ending + locations in the input string. + + This helper adds the following results names: + + - ``locn_start`` - location where matched expression begins + - ``locn_end`` - location where matched expression ends + - ``value`` - the actual parsed results + + Be careful if the input text contains ```` characters, you + may want to call :class:`ParserElement.parseWithTabs` + + Example:: + + wd = Word(alphas) + for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): + print(match) + + prints:: + + [[0, 'ljsdf', 5]] + [[8, 'lksdjjf', 15]] + [[18, 'lkkjj', 23]] + """ + locator = Empty().set_parse_action(lambda ss, ll, tt: ll) + return Group( + locator("locn_start") + + expr("value") + + locator.copy().leaveWhitespace()("locn_end") + ) + + +def nested_expr( + opener: Union[str, ParserElement] = "(", + closer: Union[str, ParserElement] = ")", + content: typing.Optional[ParserElement] = None, + ignore_expr: ParserElement = quoted_string(), + *, + ignoreExpr: ParserElement = quoted_string(), +) -> ParserElement: + """Helper method for defining nested lists enclosed in opening and + closing delimiters (``"("`` and ``")"`` are the default). + + Parameters: + - ``opener`` - opening character for a nested list + (default= ``"("``); can also be a pyparsing expression + - ``closer`` - closing character for a nested list + (default= ``")"``); can also be a pyparsing expression + - ``content`` - expression for items within the nested lists + (default= ``None``) + - ``ignore_expr`` - expression for ignoring opening and closing delimiters + (default= :class:`quoted_string`) + - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility + but will be removed in a future release + + If an expression is not provided for the content argument, the + nested expression will capture all whitespace-delimited content + between delimiters as a list of separate values. + + Use the ``ignore_expr`` argument to define expressions that may + contain opening or closing characters that should not be treated as + opening or closing characters for nesting, such as quoted_string or + a comment expression. Specify multiple expressions using an + :class:`Or` or :class:`MatchFirst`. The default is + :class:`quoted_string`, but if no expressions are to be ignored, then + pass ``None`` for this argument. + + Example:: + + data_type = one_of("void int short long char float double") + decl_data_type = Combine(data_type + Opt(Word('*'))) + ident = Word(alphas+'_', alphanums+'_') + number = pyparsing_common.number + arg = Group(decl_data_type + ident) + LPAR, RPAR = map(Suppress, "()") + + code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) + + c_function = (decl_data_type("type") + + ident("name") + + LPAR + Opt(delimited_list(arg), [])("args") + RPAR + + code_body("body")) + c_function.ignore(c_style_comment) + + source_code = ''' + int is_odd(int x) { + return (x%2); + } + + int dec_to_hex(char hchar) { + if (hchar >= '0' && hchar <= '9') { + return (ord(hchar)-ord('0')); + } else { + return (10+ord(hchar)-ord('A')); + } + } + ''' + for func in c_function.search_string(source_code): + print("%(name)s (%(type)s) args: %(args)s" % func) + + + prints:: + + is_odd (int) args: [['int', 'x']] + dec_to_hex (int) args: [['char', 'hchar']] + """ + if ignoreExpr != ignore_expr: + ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener, str_type) and isinstance(closer, str_type): + if len(opener) == 1 and len(closer) == 1: + if ignoreExpr is not None: + content = Combine( + OneOrMore( + ~ignoreExpr + + CharsNotIn( + opener + closer + ParserElement.DEFAULT_WHITE_CHARS, + exact=1, + ) + ) + ).set_parse_action(lambda t: t[0].strip()) + else: + content = empty.copy() + CharsNotIn( + opener + closer + ParserElement.DEFAULT_WHITE_CHARS + ).set_parse_action(lambda t: t[0].strip()) + else: + if ignoreExpr is not None: + content = Combine( + OneOrMore( + ~ignoreExpr + + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).set_parse_action(lambda t: t[0].strip()) + else: + content = Combine( + OneOrMore( + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).set_parse_action(lambda t: t[0].strip()) + else: + raise ValueError( + "opening and closing arguments must be strings if no content expression is given" + ) + ret = Forward() + if ignoreExpr is not None: + ret <<= Group( + Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) + ) + else: + ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) + ret.set_name("nested %s%s expression" % (opener, closer)) + return ret + + +def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr, str_type): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas, alphanums + "_-:") + if xml: + tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) + openTag = ( + suppress_LT + + tagStr("tag") + + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + + Opt("/", default=[False])("empty").set_parse_action( + lambda s, l, t: t[0] == "/" + ) + + suppress_GT + ) + else: + tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( + printables, exclude_chars=">" + ) + openTag = ( + suppress_LT + + tagStr("tag") + + Dict( + ZeroOrMore( + Group( + tagAttrName.set_parse_action(lambda t: t[0].lower()) + + Opt(Suppress("=") + tagAttrValue) + ) + ) + ) + + Opt("/", default=[False])("empty").set_parse_action( + lambda s, l, t: t[0] == "/" + ) + + suppress_GT + ) + closeTag = Combine(Literal("", adjacent=False) + + openTag.set_name("<%s>" % resname) + # add start results name in parse action now that ungrouped names are not reported at two levels + openTag.add_parse_action( + lambda t: t.__setitem__( + "start" + "".join(resname.replace(":", " ").title().split()), t.copy() + ) + ) + closeTag = closeTag( + "end" + "".join(resname.replace(":", " ").title().split()) + ).set_name("" % resname) + openTag.tag = resname + closeTag.tag = resname + openTag.tag_body = SkipTo(closeTag()) + return openTag, closeTag + + +def make_html_tags( + tag_str: Union[str, ParserElement] +) -> Tuple[ParserElement, ParserElement]: + """Helper to construct opening and closing tag expressions for HTML, + given a tag name. Matches tags in either upper or lower case, + attributes with namespaces and with quoted or unquoted values. + + Example:: + + text = 'More info at the pyparsing wiki page' + # make_html_tags returns pyparsing expressions for the opening and + # closing tags as a 2-tuple + a, a_end = make_html_tags("A") + link_expr = a + SkipTo(a_end)("link_text") + a_end + + for link in link_expr.search_string(text): + # attributes in the tag (like "href" shown here) are + # also accessible as named results + print(link.link_text, '->', link.href) + + prints:: + + pyparsing -> https://github.com/pyparsing/pyparsing/wiki + """ + return _makeTags(tag_str, False) + + +def make_xml_tags( + tag_str: Union[str, ParserElement] +) -> Tuple[ParserElement, ParserElement]: + """Helper to construct opening and closing tag expressions for XML, + given a tag name. Matches tags only in the given upper/lower case. + + Example: similar to :class:`make_html_tags` + """ + return _makeTags(tag_str, True) + + +any_open_tag: ParserElement +any_close_tag: ParserElement +any_open_tag, any_close_tag = make_html_tags( + Word(alphas, alphanums + "_:").set_name("any tag") +) + +_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} +common_html_entity = Regex("&(?P" + "|".join(_htmlEntityMap) + ");").set_name( + "common HTML entity" +) + + +def replace_html_entity(t): + """Helper parser action to replace common HTML entities with their special characters""" + return _htmlEntityMap.get(t.entity) + + +class OpAssoc(Enum): + LEFT = 1 + RIGHT = 2 + + +InfixNotationOperatorArgType = Union[ + ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] +] +InfixNotationOperatorSpec = Union[ + Tuple[ + InfixNotationOperatorArgType, + int, + OpAssoc, + typing.Optional[ParseAction], + ], + Tuple[ + InfixNotationOperatorArgType, + int, + OpAssoc, + ], +] + + +def infix_notation( + base_expr: ParserElement, + op_list: List[InfixNotationOperatorSpec], + lpar: Union[str, ParserElement] = Suppress("("), + rpar: Union[str, ParserElement] = Suppress(")"), +) -> ParserElement: + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary + or binary, left- or right-associative. Parse actions can also be + attached to operator expressions. The generated parser will also + recognize the use of parentheses to override operator precedences + (see example below). + + Note: if you define a deep operator list, you may see performance + issues when using infix_notation. See + :class:`ParserElement.enable_packrat` for a mechanism to potentially + improve your parser performance. + + Parameters: + - ``base_expr`` - expression representing the most basic operand to + be used in the expression + - ``op_list`` - list of tuples, one for each operator precedence level + in the expression grammar; each tuple is of the form ``(op_expr, + num_operands, right_left_assoc, (optional)parse_action)``, where: + + - ``op_expr`` is the pyparsing expression for the operator; may also + be a string, which will be converted to a Literal; if ``num_operands`` + is 3, ``op_expr`` is a tuple of two expressions, for the two + operators separating the 3 terms + - ``num_operands`` is the number of terms for this operator (must be 1, + 2, or 3) + - ``right_left_assoc`` is the indicator whether the operator is right + or left associative, using the pyparsing-defined constants + ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. + - ``parse_action`` is the parse action to be associated with + expressions matching this operator expression (the parse action + tuple member may be omitted); if the parse action is passed + a tuple or list of functions, this is equivalent to calling + ``set_parse_action(*fn)`` + (:class:`ParserElement.set_parse_action`) + - ``lpar`` - expression for matching left-parentheses; if passed as a + str, then will be parsed as Suppress(lpar). If lpar is passed as + an expression (such as ``Literal('(')``), then it will be kept in + the parsed results, and grouped with them. (default= ``Suppress('(')``) + - ``rpar`` - expression for matching right-parentheses; if passed as a + str, then will be parsed as Suppress(rpar). If rpar is passed as + an expression (such as ``Literal(')')``), then it will be kept in + the parsed results, and grouped with them. (default= ``Suppress(')')``) + + Example:: + + # simple example of four-function arithmetic with ints and + # variable names + integer = pyparsing_common.signed_integer + varname = pyparsing_common.identifier + + arith_expr = infix_notation(integer | varname, + [ + ('-', 1, OpAssoc.RIGHT), + (one_of('* /'), 2, OpAssoc.LEFT), + (one_of('+ -'), 2, OpAssoc.LEFT), + ]) + + arith_expr.run_tests(''' + 5+3*6 + (5+3)*6 + -2--11 + ''', full_dump=False) + + prints:: + + 5+3*6 + [[5, '+', [3, '*', 6]]] + + (5+3)*6 + [[[5, '+', 3], '*', 6]] + + -2--11 + [[['-', 2], '-', ['-', 11]]] + """ + # captive version of FollowedBy that does not do parse actions or capture results names + class _FB(FollowedBy): + def parseImpl(self, instring, loc, doActions=True): + self.expr.try_parse(instring, loc) + return loc, [] + + _FB.__name__ = "FollowedBy>" + + ret = Forward() + if isinstance(lpar, str): + lpar = Suppress(lpar) + if isinstance(rpar, str): + rpar = Suppress(rpar) + + # if lpar and rpar are not suppressed, wrap in group + if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): + lastExpr = base_expr | Group(lpar + ret + rpar) + else: + lastExpr = base_expr | (lpar + ret + rpar) + + for i, operDef in enumerate(op_list): + opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] + if isinstance(opExpr, str_type): + opExpr = ParserElement._literalStringClass(opExpr) + if arity == 3: + if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: + raise ValueError( + "if numterms=3, opExpr must be a tuple or list of two expressions" + ) + opExpr1, opExpr2 = opExpr + term_name = "{}{} term".format(opExpr1, opExpr2) + else: + term_name = "{} term".format(opExpr) + + if not 1 <= arity <= 3: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + + if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): + raise ValueError("operator must indicate right or left associativity") + + thisExpr: Forward = Forward().set_name(term_name) + if rightLeftAssoc is OpAssoc.LEFT: + if arity == 1: + matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) + elif arity == 2: + if opExpr is not None: + matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( + lastExpr + (opExpr + lastExpr)[1, ...] + ) + else: + matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) + elif arity == 3: + matchExpr = _FB( + lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr + ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) + elif rightLeftAssoc is OpAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Opt): + opExpr = Opt(opExpr) + matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) + elif arity == 2: + if opExpr is not None: + matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( + lastExpr + (opExpr + thisExpr)[1, ...] + ) + else: + matchExpr = _FB(lastExpr + thisExpr) + Group( + lastExpr + thisExpr[1, ...] + ) + elif arity == 3: + matchExpr = _FB( + lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr + ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + if pa: + if isinstance(pa, (tuple, list)): + matchExpr.set_parse_action(*pa) + else: + matchExpr.set_parse_action(pa) + thisExpr <<= (matchExpr | lastExpr).setName(term_name) + lastExpr = thisExpr + ret <<= lastExpr + return ret + + +def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): + """ + (DEPRECATED - use IndentedBlock class instead) + Helper method for defining space-delimited indentation blocks, + such as those used to define block statements in Python source code. + + Parameters: + + - ``blockStatementExpr`` - expression defining syntax of statement that + is repeated within the indented block + - ``indentStack`` - list created by caller to manage indentation stack + (multiple ``statementWithIndentedBlock`` expressions within a single + grammar should share a common ``indentStack``) + - ``indent`` - boolean indicating whether block must be indented beyond + the current level; set to ``False`` for block of left-most statements + (default= ``True``) + + A valid block must contain at least one ``blockStatement``. + + (Note that indentedBlock uses internal parse actions which make it + incompatible with packrat parsing.) + + Example:: + + data = ''' + def A(z): + A1 + B = 100 + G = A2 + A2 + A3 + B + def BB(a,b,c): + BB1 + def BBA(): + bba1 + bba2 + bba3 + C + D + def spam(x,y): + def eggs(z): + pass + ''' + + + indentStack = [1] + stmt = Forward() + + identifier = Word(alphas, alphanums) + funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") + func_body = indentedBlock(stmt, indentStack) + funcDef = Group(funcDecl + func_body) + + rvalue = Forward() + funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") + rvalue << (funcCall | identifier | Word(nums)) + assignment = Group(identifier + "=" + rvalue) + stmt << (funcDef | assignment | identifier) + + module_body = stmt[1, ...] + + parseTree = module_body.parseString(data) + parseTree.pprint() + + prints:: + + [['def', + 'A', + ['(', 'z', ')'], + ':', + [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], + 'B', + ['def', + 'BB', + ['(', 'a', 'b', 'c', ')'], + ':', + [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], + 'C', + 'D', + ['def', + 'spam', + ['(', 'x', 'y', ')'], + ':', + [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] + """ + backup_stacks.append(indentStack[:]) + + def reset_stack(): + indentStack[:] = backup_stacks[-1] + + def checkPeerIndent(s, l, t): + if l >= len(s): + return + curCol = col(l, s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseException(s, l, "illegal nesting") + raise ParseException(s, l, "not a peer entry") + + def checkSubIndent(s, l, t): + curCol = col(l, s) + if curCol > indentStack[-1]: + indentStack.append(curCol) + else: + raise ParseException(s, l, "not a subentry") + + def checkUnindent(s, l, t): + if l >= len(s): + return + curCol = col(l, s) + if not (indentStack and curCol in indentStack): + raise ParseException(s, l, "not an unindent") + if curCol < indentStack[-1]: + indentStack.pop() + + NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) + INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") + PEER = Empty().set_parse_action(checkPeerIndent).set_name("") + UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") + if indent: + smExpr = Group( + Opt(NL) + + INDENT + + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) + + UNDENT + ) + else: + smExpr = Group( + Opt(NL) + + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) + + Opt(UNDENT) + ) + + # add a parse action to remove backup_stack from list of backups + smExpr.add_parse_action( + lambda: backup_stacks.pop(-1) and None if backup_stacks else None + ) + smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr.set_name("indented block") + + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( + "C style comment" +) +"Comment of the form ``/* ... */``" + +html_comment = Regex(r"").set_name("HTML comment") +"Comment of the form ````" + +rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") +dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") +"Comment of the form ``// ... (to end of line)``" + +cpp_style_comment = Combine( + Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment +).set_name("C++ style comment") +"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" + +java_style_comment = cpp_style_comment +"Same as :class:`cpp_style_comment`" + +python_style_comment = Regex(r"#.*").set_name("Python style comment") +"Comment of the form ``# ... (to end of line)``" + + +# build list of built-in expressions, for future reference if a global default value +# gets updated +_builtin_exprs: List[ParserElement] = [ + v for v in vars().values() if isinstance(v, ParserElement) +] + + +# pre-PEP8 compatible names +delimitedList = delimited_list +countedArray = counted_array +matchPreviousLiteral = match_previous_literal +matchPreviousExpr = match_previous_expr +oneOf = one_of +dictOf = dict_of +originalTextFor = original_text_for +nestedExpr = nested_expr +makeHTMLTags = make_html_tags +makeXMLTags = make_xml_tags +anyOpenTag, anyCloseTag = any_open_tag, any_close_tag +commonHTMLEntity = common_html_entity +replaceHTMLEntity = replace_html_entity +opAssoc = OpAssoc +infixNotation = infix_notation +cStyleComment = c_style_comment +htmlComment = html_comment +restOfLine = rest_of_line +dblSlashComment = dbl_slash_comment +cppStyleComment = cpp_style_comment +javaStyleComment = java_style_comment +pythonStyleComment = python_style_comment diff --git a/libs/common/pkg_resources/_vendor/pyparsing/results.py b/libs/common/pkg_resources/_vendor/pyparsing/results.py new file mode 100644 index 00000000..00c9421d --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/results.py @@ -0,0 +1,760 @@ +# results.py +from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator +import pprint +from weakref import ref as wkref +from typing import Tuple, Any + +str_type: Tuple[type, ...] = (str, bytes) +_generator_type = type((_ for _ in ())) + + +class _ParseResultsWithOffset: + __slots__ = ["tup"] + + def __init__(self, p1, p2): + self.tup = (p1, p2) + + def __getitem__(self, i): + return self.tup[i] + + def __getstate__(self): + return self.tup + + def __setstate__(self, *args): + self.tup = args[0] + + +class ParseResults: + """Structured parse results, to provide multiple means of access to + the parsed data: + + - as a list (``len(results)``) + - by list index (``results[0], results[1]``, etc.) + - by attribute (``results.`` - see :class:`ParserElement.set_results_name`) + + Example:: + + integer = Word(nums) + date_str = (integer.set_results_name("year") + '/' + + integer.set_results_name("month") + '/' + + integer.set_results_name("day")) + # equivalent form: + # date_str = (integer("year") + '/' + # + integer("month") + '/' + # + integer("day")) + + # parse_string returns a ParseResults object + result = date_str.parse_string("1999/12/31") + + def test(s, fn=repr): + print("{} -> {}".format(s, fn(eval(s)))) + test("list(result)") + test("result[0]") + test("result['month']") + test("result.day") + test("'month' in result") + test("'minutes' in result") + test("result.dump()", str) + + prints:: + + list(result) -> ['1999', '/', '12', '/', '31'] + result[0] -> '1999' + result['month'] -> '12' + result.day -> '31' + 'month' in result -> True + 'minutes' in result -> False + result.dump() -> ['1999', '/', '12', '/', '31'] + - day: '31' + - month: '12' + - year: '1999' + """ + + _null_values: Tuple[Any, ...] = (None, [], "", ()) + + __slots__ = [ + "_name", + "_parent", + "_all_names", + "_modal", + "_toklist", + "_tokdict", + "__weakref__", + ] + + class List(list): + """ + Simple wrapper class to distinguish parsed list results that should be preserved + as actual Python lists, instead of being converted to :class:`ParseResults`: + + LBRACK, RBRACK = map(pp.Suppress, "[]") + element = pp.Forward() + item = ppc.integer + element_list = LBRACK + pp.delimited_list(element) + RBRACK + + # add parse actions to convert from ParseResults to actual Python collection types + def as_python_list(t): + return pp.ParseResults.List(t.as_list()) + element_list.add_parse_action(as_python_list) + + element <<= item | element_list + + element.run_tests(''' + 100 + [2,3,4] + [[2, 1],3,4] + [(2, 1),3,4] + (2,3,4) + ''', post_parse=lambda s, r: (r[0], type(r[0]))) + + prints: + + 100 + (100, ) + + [2,3,4] + ([2, 3, 4], ) + + [[2, 1],3,4] + ([[2, 1], 3, 4], ) + + (Used internally by :class:`Group` when `aslist=True`.) + """ + + def __new__(cls, contained=None): + if contained is None: + contained = [] + + if not isinstance(contained, list): + raise TypeError( + "{} may only be constructed with a list," + " not {}".format(cls.__name__, type(contained).__name__) + ) + + return list.__new__(cls) + + def __new__(cls, toklist=None, name=None, **kwargs): + if isinstance(toklist, ParseResults): + return toklist + self = object.__new__(cls) + self._name = None + self._parent = None + self._all_names = set() + + if toklist is None: + self._toklist = [] + elif isinstance(toklist, (list, _generator_type)): + self._toklist = ( + [toklist[:]] + if isinstance(toklist, ParseResults.List) + else list(toklist) + ) + else: + self._toklist = [toklist] + self._tokdict = dict() + return self + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( + self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance + ): + self._modal = modal + if name is not None and name != "": + if isinstance(name, int): + name = str(name) + if not modal: + self._all_names = {name} + self._name = name + if toklist not in self._null_values: + if isinstance(toklist, (str_type, type)): + toklist = [toklist] + if asList: + if isinstance(toklist, ParseResults): + self[name] = _ParseResultsWithOffset( + ParseResults(toklist._toklist), 0 + ) + else: + self[name] = _ParseResultsWithOffset( + ParseResults(toklist[0]), 0 + ) + self[name]._name = name + else: + try: + self[name] = toklist[0] + except (KeyError, TypeError, IndexError): + if toklist is not self: + self[name] = toklist + else: + self._name = name + + def __getitem__(self, i): + if isinstance(i, (int, slice)): + return self._toklist[i] + else: + if i not in self._all_names: + return self._tokdict[i][-1][0] + else: + return ParseResults([v[0] for v in self._tokdict[i]]) + + def __setitem__(self, k, v, isinstance=isinstance): + if isinstance(v, _ParseResultsWithOffset): + self._tokdict[k] = self._tokdict.get(k, list()) + [v] + sub = v[0] + elif isinstance(k, (int, slice)): + self._toklist[k] = v + sub = v + else: + self._tokdict[k] = self._tokdict.get(k, list()) + [ + _ParseResultsWithOffset(v, 0) + ] + sub = v + if isinstance(sub, ParseResults): + sub._parent = wkref(self) + + def __delitem__(self, i): + if isinstance(i, (int, slice)): + mylen = len(self._toklist) + del self._toklist[i] + + # convert int to slice + if isinstance(i, int): + if i < 0: + i += mylen + i = slice(i, i + 1) + # get removed indices + removed = list(range(*i.indices(mylen))) + removed.reverse() + # fixup indices in token dictionary + for name, occurrences in self._tokdict.items(): + for j in removed: + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset( + value, position - (position > j) + ) + else: + del self._tokdict[i] + + def __contains__(self, k) -> bool: + return k in self._tokdict + + def __len__(self) -> int: + return len(self._toklist) + + def __bool__(self) -> bool: + return not not (self._toklist or self._tokdict) + + def __iter__(self) -> Iterator: + return iter(self._toklist) + + def __reversed__(self) -> Iterator: + return iter(self._toklist[::-1]) + + def keys(self): + return iter(self._tokdict) + + def values(self): + return (self[k] for k in self.keys()) + + def items(self): + return ((k, self[k]) for k in self.keys()) + + def haskeys(self) -> bool: + """ + Since ``keys()`` returns an iterator, this method is helpful in bypassing + code that looks for the existence of any defined results names.""" + return bool(self._tokdict) + + def pop(self, *args, **kwargs): + """ + Removes and returns item at specified index (default= ``last``). + Supports both ``list`` and ``dict`` semantics for ``pop()``. If + passed no argument or an integer argument, it will use ``list`` + semantics and pop tokens from the list of parsed tokens. If passed + a non-integer argument (most likely a string), it will use ``dict`` + semantics and pop the corresponding value from any defined results + names. A second default return value argument is supported, just as in + ``dict.pop()``. + + Example:: + + numlist = Word(nums)[...] + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] + + def remove_first(tokens): + tokens.pop(0) + numlist.add_parse_action(remove_first) + print(numlist.parse_string("0 123 321")) # -> ['123', '321'] + + label = Word(alphas) + patt = label("LABEL") + Word(nums)[1, ...] + print(patt.parse_string("AAB 123 321").dump()) + + # Use pop() in a parse action to remove named result (note that corresponding value is not + # removed from list form of results) + def remove_LABEL(tokens): + tokens.pop("LABEL") + return tokens + patt.add_parse_action(remove_LABEL) + print(patt.parse_string("AAB 123 321").dump()) + + prints:: + + ['AAB', '123', '321'] + - LABEL: 'AAB' + + ['AAB', '123', '321'] + """ + if not args: + args = [-1] + for k, v in kwargs.items(): + if k == "default": + args = (args[0], v) + else: + raise TypeError( + "pop() got an unexpected keyword argument {!r}".format(k) + ) + if isinstance(args[0], int) or len(args) == 1 or args[0] in self: + index = args[0] + ret = self[index] + del self[index] + return ret + else: + defaultvalue = args[1] + return defaultvalue + + def get(self, key, default_value=None): + """ + Returns named result matching the given key, or if there is no + such name, then returns the given ``default_value`` or ``None`` if no + ``default_value`` is specified. + + Similar to ``dict.get()``. + + Example:: + + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parse_string("1999/12/31") + print(result.get("year")) # -> '1999' + print(result.get("hour", "not specified")) # -> 'not specified' + print(result.get("hour")) # -> None + """ + if key in self: + return self[key] + else: + return default_value + + def insert(self, index, ins_string): + """ + Inserts new element at location index in the list of parsed tokens. + + Similar to ``list.insert()``. + + Example:: + + numlist = Word(nums)[...] + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] + + # use a parse action to insert the parse location in the front of the parsed results + def insert_locn(locn, tokens): + tokens.insert(0, locn) + numlist.add_parse_action(insert_locn) + print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321'] + """ + self._toklist.insert(index, ins_string) + # fixup indices in token dictionary + for name, occurrences in self._tokdict.items(): + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset( + value, position + (position > index) + ) + + def append(self, item): + """ + Add single element to end of ``ParseResults`` list of elements. + + Example:: + + numlist = Word(nums)[...] + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] + + # use a parse action to compute the sum of the parsed integers, and add it to the end + def append_sum(tokens): + tokens.append(sum(map(int, tokens))) + numlist.add_parse_action(append_sum) + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444] + """ + self._toklist.append(item) + + def extend(self, itemseq): + """ + Add sequence of elements to end of ``ParseResults`` list of elements. + + Example:: + + patt = Word(alphas)[1, ...] + + # use a parse action to append the reverse of the matched strings, to make a palindrome + def make_palindrome(tokens): + tokens.extend(reversed([t[::-1] for t in tokens])) + return ''.join(tokens) + patt.add_parse_action(make_palindrome) + print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' + """ + if isinstance(itemseq, ParseResults): + self.__iadd__(itemseq) + else: + self._toklist.extend(itemseq) + + def clear(self): + """ + Clear all elements and results names. + """ + del self._toklist[:] + self._tokdict.clear() + + def __getattr__(self, name): + try: + return self[name] + except KeyError: + if name.startswith("__"): + raise AttributeError(name) + return "" + + def __add__(self, other) -> "ParseResults": + ret = self.copy() + ret += other + return ret + + def __iadd__(self, other) -> "ParseResults": + if other._tokdict: + offset = len(self._toklist) + addoffset = lambda a: offset if a < 0 else a + offset + otheritems = other._tokdict.items() + otherdictitems = [ + (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) + for k, vlist in otheritems + for v in vlist + ] + for k, v in otherdictitems: + self[k] = v + if isinstance(v[0], ParseResults): + v[0]._parent = wkref(self) + + self._toklist += other._toklist + self._all_names |= other._all_names + return self + + def __radd__(self, other) -> "ParseResults": + if isinstance(other, int) and other == 0: + # useful for merging many ParseResults using sum() builtin + return self.copy() + else: + # this may raise a TypeError - so be it + return other + self + + def __repr__(self) -> str: + return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict()) + + def __str__(self) -> str: + return ( + "[" + + ", ".join( + [ + str(i) if isinstance(i, ParseResults) else repr(i) + for i in self._toklist + ] + ) + + "]" + ) + + def _asStringList(self, sep=""): + out = [] + for item in self._toklist: + if out and sep: + out.append(sep) + if isinstance(item, ParseResults): + out += item._asStringList() + else: + out.append(str(item)) + return out + + def as_list(self) -> list: + """ + Returns the parse results as a nested list of matching tokens, all converted to strings. + + Example:: + + patt = Word(alphas)[1, ...] + result = patt.parse_string("sldkj lsdkj sldkj") + # even though the result prints in string-like form, it is actually a pyparsing ParseResults + print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] + + # Use as_list() to create an actual list + result_list = result.as_list() + print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] + """ + return [ + res.as_list() if isinstance(res, ParseResults) else res + for res in self._toklist + ] + + def as_dict(self) -> dict: + """ + Returns the named parse results as a nested dictionary. + + Example:: + + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parse_string('12/31/1999') + print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) + + result_dict = result.as_dict() + print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} + + # even though a ParseResults supports dict-like access, sometime you just need to have a dict + import json + print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable + print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"} + """ + + def to_item(obj): + if isinstance(obj, ParseResults): + return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj] + else: + return obj + + return dict((k, to_item(v)) for k, v in self.items()) + + def copy(self) -> "ParseResults": + """ + Returns a new copy of a :class:`ParseResults` object. + """ + ret = ParseResults(self._toklist) + ret._tokdict = self._tokdict.copy() + ret._parent = self._parent + ret._all_names |= self._all_names + ret._name = self._name + return ret + + def get_name(self): + r""" + Returns the results name for this token expression. Useful when several + different expressions might match at a particular location. + + Example:: + + integer = Word(nums) + ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") + house_number_expr = Suppress('#') + Word(nums, alphanums) + user_data = (Group(house_number_expr)("house_number") + | Group(ssn_expr)("ssn") + | Group(integer)("age")) + user_info = user_data[1, ...] + + result = user_info.parse_string("22 111-22-3333 #221B") + for item in result: + print(item.get_name(), ':', item[0]) + + prints:: + + age : 22 + ssn : 111-22-3333 + house_number : 221B + """ + if self._name: + return self._name + elif self._parent: + par = self._parent() + + def find_in_parent(sub): + return next( + ( + k + for k, vlist in par._tokdict.items() + for v, loc in vlist + if sub is v + ), + None, + ) + + return find_in_parent(self) if par else None + elif ( + len(self) == 1 + and len(self._tokdict) == 1 + and next(iter(self._tokdict.values()))[0][1] in (0, -1) + ): + return next(iter(self._tokdict.keys())) + else: + return None + + def dump(self, indent="", full=True, include_list=True, _depth=0) -> str: + """ + Diagnostic method for listing out the contents of + a :class:`ParseResults`. Accepts an optional ``indent`` argument so + that this string can be embedded in a nested display of other data. + + Example:: + + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parse_string('1999/12/31') + print(result.dump()) + + prints:: + + ['1999', '/', '12', '/', '31'] + - day: '31' + - month: '12' + - year: '1999' + """ + out = [] + NL = "\n" + out.append(indent + str(self.as_list()) if include_list else "") + + if full: + if self.haskeys(): + items = sorted((str(k), v) for k, v in self.items()) + for k, v in items: + if out: + out.append(NL) + out.append("{}{}- {}: ".format(indent, (" " * _depth), k)) + if isinstance(v, ParseResults): + if v: + out.append( + v.dump( + indent=indent, + full=full, + include_list=include_list, + _depth=_depth + 1, + ) + ) + else: + out.append(str(v)) + else: + out.append(repr(v)) + if any(isinstance(vv, ParseResults) for vv in self): + v = self + for i, vv in enumerate(v): + if isinstance(vv, ParseResults): + out.append( + "\n{}{}[{}]:\n{}{}{}".format( + indent, + (" " * (_depth)), + i, + indent, + (" " * (_depth + 1)), + vv.dump( + indent=indent, + full=full, + include_list=include_list, + _depth=_depth + 1, + ), + ) + ) + else: + out.append( + "\n%s%s[%d]:\n%s%s%s" + % ( + indent, + (" " * (_depth)), + i, + indent, + (" " * (_depth + 1)), + str(vv), + ) + ) + + return "".join(out) + + def pprint(self, *args, **kwargs): + """ + Pretty-printer for parsed results as a list, using the + `pprint `_ module. + Accepts additional positional or keyword args as defined for + `pprint.pprint `_ . + + Example:: + + ident = Word(alphas, alphanums) + num = Word(nums) + func = Forward() + term = ident | num | Group('(' + func + ')') + func <<= ident + Group(Optional(delimited_list(term))) + result = func.parse_string("fna a,b,(fnb c,d,200),100") + result.pprint(width=40) + + prints:: + + ['fna', + ['a', + 'b', + ['(', 'fnb', ['c', 'd', '200'], ')'], + '100']] + """ + pprint.pprint(self.as_list(), *args, **kwargs) + + # add support for pickle protocol + def __getstate__(self): + return ( + self._toklist, + ( + self._tokdict.copy(), + self._parent is not None and self._parent() or None, + self._all_names, + self._name, + ), + ) + + def __setstate__(self, state): + self._toklist, (self._tokdict, par, inAccumNames, self._name) = state + self._all_names = set(inAccumNames) + if par is not None: + self._parent = wkref(par) + else: + self._parent = None + + def __getnewargs__(self): + return self._toklist, self._name + + def __dir__(self): + return dir(type(self)) + list(self.keys()) + + @classmethod + def from_dict(cls, other, name=None) -> "ParseResults": + """ + Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the + name-value relations as results names. If an optional ``name`` argument is + given, a nested ``ParseResults`` will be returned. + """ + + def is_iterable(obj): + try: + iter(obj) + except Exception: + return False + else: + return not isinstance(obj, str_type) + + ret = cls([]) + for k, v in other.items(): + if isinstance(v, Mapping): + ret += cls.from_dict(v, name=k) + else: + ret += cls([v], name=k, asList=is_iterable(v)) + if name is not None: + ret = cls([ret], name=name) + return ret + + asList = as_list + asDict = as_dict + getName = get_name + + +MutableMapping.register(ParseResults) +MutableSequence.register(ParseResults) diff --git a/libs/common/pkg_resources/_vendor/pyparsing/testing.py b/libs/common/pkg_resources/_vendor/pyparsing/testing.py new file mode 100644 index 00000000..84a0ef17 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/testing.py @@ -0,0 +1,331 @@ +# testing.py + +from contextlib import contextmanager +import typing + +from .core import ( + ParserElement, + ParseException, + Keyword, + __diag__, + __compat__, +) + + +class pyparsing_test: + """ + namespace class for classes useful in writing unit tests + """ + + class reset_pyparsing_context: + """ + Context manager to be used when writing unit tests that modify pyparsing config values: + - packrat parsing + - bounded recursion parsing + - default whitespace characters. + - default keyword characters + - literal string auto-conversion class + - __diag__ settings + + Example:: + + with reset_pyparsing_context(): + # test that literals used to construct a grammar are automatically suppressed + ParserElement.inlineLiteralsUsing(Suppress) + + term = Word(alphas) | Word(nums) + group = Group('(' + term[...] + ')') + + # assert that the '()' characters are not included in the parsed tokens + self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def']) + + # after exiting context manager, literals are converted to Literal expressions again + """ + + def __init__(self): + self._save_context = {} + + def save(self): + self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS + self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS + + self._save_context[ + "literal_string_class" + ] = ParserElement._literalStringClass + + self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace + + self._save_context["packrat_enabled"] = ParserElement._packratEnabled + if ParserElement._packratEnabled: + self._save_context[ + "packrat_cache_size" + ] = ParserElement.packrat_cache.size + else: + self._save_context["packrat_cache_size"] = None + self._save_context["packrat_parse"] = ParserElement._parse + self._save_context[ + "recursion_enabled" + ] = ParserElement._left_recursion_enabled + + self._save_context["__diag__"] = { + name: getattr(__diag__, name) for name in __diag__._all_names + } + + self._save_context["__compat__"] = { + "collect_all_And_tokens": __compat__.collect_all_And_tokens + } + + return self + + def restore(self): + # reset pyparsing global state + if ( + ParserElement.DEFAULT_WHITE_CHARS + != self._save_context["default_whitespace"] + ): + ParserElement.set_default_whitespace_chars( + self._save_context["default_whitespace"] + ) + + ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"] + + Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] + ParserElement.inlineLiteralsUsing( + self._save_context["literal_string_class"] + ) + + for name, value in self._save_context["__diag__"].items(): + (__diag__.enable if value else __diag__.disable)(name) + + ParserElement._packratEnabled = False + if self._save_context["packrat_enabled"]: + ParserElement.enable_packrat(self._save_context["packrat_cache_size"]) + else: + ParserElement._parse = self._save_context["packrat_parse"] + ParserElement._left_recursion_enabled = self._save_context[ + "recursion_enabled" + ] + + __compat__.collect_all_And_tokens = self._save_context["__compat__"] + + return self + + def copy(self): + ret = type(self)() + ret._save_context.update(self._save_context) + return ret + + def __enter__(self): + return self.save() + + def __exit__(self, *args): + self.restore() + + class TestParseResultsAsserts: + """ + A mixin class to add parse results assertion methods to normal unittest.TestCase classes. + """ + + def assertParseResultsEquals( + self, result, expected_list=None, expected_dict=None, msg=None + ): + """ + Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``, + and compare any defined results names with an optional ``expected_dict``. + """ + if expected_list is not None: + self.assertEqual(expected_list, result.as_list(), msg=msg) + if expected_dict is not None: + self.assertEqual(expected_dict, result.as_dict(), msg=msg) + + def assertParseAndCheckList( + self, expr, test_string, expected_list, msg=None, verbose=True + ): + """ + Convenience wrapper assert to test a parser element and input string, and assert that + the resulting ``ParseResults.asList()`` is equal to the ``expected_list``. + """ + result = expr.parse_string(test_string, parse_all=True) + if verbose: + print(result.dump()) + else: + print(result.as_list()) + self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) + + def assertParseAndCheckDict( + self, expr, test_string, expected_dict, msg=None, verbose=True + ): + """ + Convenience wrapper assert to test a parser element and input string, and assert that + the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``. + """ + result = expr.parse_string(test_string, parseAll=True) + if verbose: + print(result.dump()) + else: + print(result.as_list()) + self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) + + def assertRunTestResults( + self, run_tests_report, expected_parse_results=None, msg=None + ): + """ + Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of + list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped + with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``. + Finally, asserts that the overall ``runTests()`` success value is ``True``. + + :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests + :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] + """ + run_test_success, run_test_results = run_tests_report + + if expected_parse_results is not None: + merged = [ + (*rpt, expected) + for rpt, expected in zip(run_test_results, expected_parse_results) + ] + for test_string, result, expected in merged: + # expected should be a tuple containing a list and/or a dict or an exception, + # and optional failure message string + # an empty tuple will skip any result validation + fail_msg = next( + (exp for exp in expected if isinstance(exp, str)), None + ) + expected_exception = next( + ( + exp + for exp in expected + if isinstance(exp, type) and issubclass(exp, Exception) + ), + None, + ) + if expected_exception is not None: + with self.assertRaises( + expected_exception=expected_exception, msg=fail_msg or msg + ): + if isinstance(result, Exception): + raise result + else: + expected_list = next( + (exp for exp in expected if isinstance(exp, list)), None + ) + expected_dict = next( + (exp for exp in expected if isinstance(exp, dict)), None + ) + if (expected_list, expected_dict) != (None, None): + self.assertParseResultsEquals( + result, + expected_list=expected_list, + expected_dict=expected_dict, + msg=fail_msg or msg, + ) + else: + # warning here maybe? + print("no validation for {!r}".format(test_string)) + + # do this last, in case some specific test results can be reported instead + self.assertTrue( + run_test_success, msg=msg if msg is not None else "failed runTests" + ) + + @contextmanager + def assertRaisesParseException(self, exc_type=ParseException, msg=None): + with self.assertRaises(exc_type, msg=msg): + yield + + @staticmethod + def with_line_numbers( + s: str, + start_line: typing.Optional[int] = None, + end_line: typing.Optional[int] = None, + expand_tabs: bool = True, + eol_mark: str = "|", + mark_spaces: typing.Optional[str] = None, + mark_control: typing.Optional[str] = None, + ) -> str: + """ + Helpful method for debugging a parser - prints a string with line and column numbers. + (Line and column numbers are 1-based.) + + :param s: tuple(bool, str - string to be printed with line and column numbers + :param start_line: int - (optional) starting line number in s to print (default=1) + :param end_line: int - (optional) ending line number in s to print (default=len(s)) + :param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default + :param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|") + :param mark_spaces: str - (optional) special character to display in place of spaces + :param mark_control: str - (optional) convert non-printing control characters to a placeholding + character; valid values: + - "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊" + - any single character string - replace control characters with given string + - None (default) - string is displayed as-is + + :return: str - input string with leading line numbers and column number headers + """ + if expand_tabs: + s = s.expandtabs() + if mark_control is not None: + if mark_control == "unicode": + tbl = str.maketrans( + {c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))} + | {127: 0x2421} + ) + eol_mark = "" + else: + tbl = str.maketrans( + {c: mark_control for c in list(range(0, 32)) + [127]} + ) + s = s.translate(tbl) + if mark_spaces is not None and mark_spaces != " ": + if mark_spaces == "unicode": + tbl = str.maketrans({9: 0x2409, 32: 0x2423}) + s = s.translate(tbl) + else: + s = s.replace(" ", mark_spaces) + if start_line is None: + start_line = 1 + if end_line is None: + end_line = len(s) + end_line = min(end_line, len(s)) + start_line = min(max(1, start_line), end_line) + + if mark_control != "unicode": + s_lines = s.splitlines()[start_line - 1 : end_line] + else: + s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]] + if not s_lines: + return "" + + lineno_width = len(str(end_line)) + max_line_len = max(len(line) for line in s_lines) + lead = " " * (lineno_width + 1) + if max_line_len >= 99: + header0 = ( + lead + + "".join( + "{}{}".format(" " * 99, (i + 1) % 100) + for i in range(max(max_line_len // 100, 1)) + ) + + "\n" + ) + else: + header0 = "" + header1 = ( + header0 + + lead + + "".join( + " {}".format((i + 1) % 10) + for i in range(-(-max_line_len // 10)) + ) + + "\n" + ) + header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n" + return ( + header1 + + header2 + + "\n".join( + "{:{}d}:{}{}".format(i, lineno_width, line, eol_mark) + for i, line in enumerate(s_lines, start=start_line) + ) + + "\n" + ) diff --git a/libs/common/pkg_resources/_vendor/pyparsing/unicode.py b/libs/common/pkg_resources/_vendor/pyparsing/unicode.py new file mode 100644 index 00000000..06526203 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/unicode.py @@ -0,0 +1,352 @@ +# unicode.py + +import sys +from itertools import filterfalse +from typing import List, Tuple, Union + + +class _lazyclassproperty: + def __init__(self, fn): + self.fn = fn + self.__doc__ = fn.__doc__ + self.__name__ = fn.__name__ + + def __get__(self, obj, cls): + if cls is None: + cls = type(obj) + if not hasattr(cls, "_intern") or any( + cls._intern is getattr(superclass, "_intern", []) + for superclass in cls.__mro__[1:] + ): + cls._intern = {} + attrname = self.fn.__name__ + if attrname not in cls._intern: + cls._intern[attrname] = self.fn(cls) + return cls._intern[attrname] + + +UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]] + + +class unicode_set: + """ + A set of Unicode characters, for language-specific strings for + ``alphas``, ``nums``, ``alphanums``, and ``printables``. + A unicode_set is defined by a list of ranges in the Unicode character + set, in a class attribute ``_ranges``. Ranges can be specified using + 2-tuples or a 1-tuple, such as:: + + _ranges = [ + (0x0020, 0x007e), + (0x00a0, 0x00ff), + (0x0100,), + ] + + Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x). + + A unicode set can also be defined using multiple inheritance of other unicode sets:: + + class CJK(Chinese, Japanese, Korean): + pass + """ + + _ranges: UnicodeRangeList = [] + + @_lazyclassproperty + def _chars_for_ranges(cls): + ret = [] + for cc in cls.__mro__: + if cc is unicode_set: + break + for rr in getattr(cc, "_ranges", ()): + ret.extend(range(rr[0], rr[-1] + 1)) + return [chr(c) for c in sorted(set(ret))] + + @_lazyclassproperty + def printables(cls): + "all non-whitespace characters in this range" + return "".join(filterfalse(str.isspace, cls._chars_for_ranges)) + + @_lazyclassproperty + def alphas(cls): + "all alphabetic characters in this range" + return "".join(filter(str.isalpha, cls._chars_for_ranges)) + + @_lazyclassproperty + def nums(cls): + "all numeric digit characters in this range" + return "".join(filter(str.isdigit, cls._chars_for_ranges)) + + @_lazyclassproperty + def alphanums(cls): + "all alphanumeric characters in this range" + return cls.alphas + cls.nums + + @_lazyclassproperty + def identchars(cls): + "all characters in this range that are valid identifier characters, plus underscore '_'" + return "".join( + sorted( + set( + "".join(filter(str.isidentifier, cls._chars_for_ranges)) + + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº" + + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" + + "_" + ) + ) + ) + + @_lazyclassproperty + def identbodychars(cls): + """ + all characters in this range that are valid identifier body characters, + plus the digits 0-9 + """ + return "".join( + sorted( + set( + cls.identchars + + "0123456789" + + "".join( + [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()] + ) + ) + ) + ) + + +class pyparsing_unicode(unicode_set): + """ + A namespace class for defining common language unicode_sets. + """ + + # fmt: off + + # define ranges in language character sets + _ranges: UnicodeRangeList = [ + (0x0020, sys.maxunicode), + ] + + class BasicMultilingualPlane(unicode_set): + "Unicode set for the Basic Multilingual Plane" + _ranges: UnicodeRangeList = [ + (0x0020, 0xFFFF), + ] + + class Latin1(unicode_set): + "Unicode set for Latin-1 Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0020, 0x007E), + (0x00A0, 0x00FF), + ] + + class LatinA(unicode_set): + "Unicode set for Latin-A Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0100, 0x017F), + ] + + class LatinB(unicode_set): + "Unicode set for Latin-B Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0180, 0x024F), + ] + + class Greek(unicode_set): + "Unicode set for Greek Unicode Character Ranges" + _ranges: UnicodeRangeList = [ + (0x0342, 0x0345), + (0x0370, 0x0377), + (0x037A, 0x037F), + (0x0384, 0x038A), + (0x038C,), + (0x038E, 0x03A1), + (0x03A3, 0x03E1), + (0x03F0, 0x03FF), + (0x1D26, 0x1D2A), + (0x1D5E,), + (0x1D60,), + (0x1D66, 0x1D6A), + (0x1F00, 0x1F15), + (0x1F18, 0x1F1D), + (0x1F20, 0x1F45), + (0x1F48, 0x1F4D), + (0x1F50, 0x1F57), + (0x1F59,), + (0x1F5B,), + (0x1F5D,), + (0x1F5F, 0x1F7D), + (0x1F80, 0x1FB4), + (0x1FB6, 0x1FC4), + (0x1FC6, 0x1FD3), + (0x1FD6, 0x1FDB), + (0x1FDD, 0x1FEF), + (0x1FF2, 0x1FF4), + (0x1FF6, 0x1FFE), + (0x2129,), + (0x2719, 0x271A), + (0xAB65,), + (0x10140, 0x1018D), + (0x101A0,), + (0x1D200, 0x1D245), + (0x1F7A1, 0x1F7A7), + ] + + class Cyrillic(unicode_set): + "Unicode set for Cyrillic Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0400, 0x052F), + (0x1C80, 0x1C88), + (0x1D2B,), + (0x1D78,), + (0x2DE0, 0x2DFF), + (0xA640, 0xA672), + (0xA674, 0xA69F), + (0xFE2E, 0xFE2F), + ] + + class Chinese(unicode_set): + "Unicode set for Chinese Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x2E80, 0x2E99), + (0x2E9B, 0x2EF3), + (0x31C0, 0x31E3), + (0x3400, 0x4DB5), + (0x4E00, 0x9FEF), + (0xA700, 0xA707), + (0xF900, 0xFA6D), + (0xFA70, 0xFAD9), + (0x16FE2, 0x16FE3), + (0x1F210, 0x1F212), + (0x1F214, 0x1F23B), + (0x1F240, 0x1F248), + (0x20000, 0x2A6D6), + (0x2A700, 0x2B734), + (0x2B740, 0x2B81D), + (0x2B820, 0x2CEA1), + (0x2CEB0, 0x2EBE0), + (0x2F800, 0x2FA1D), + ] + + class Japanese(unicode_set): + "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" + _ranges: UnicodeRangeList = [] + + class Kanji(unicode_set): + "Unicode set for Kanji Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x4E00, 0x9FBF), + (0x3000, 0x303F), + ] + + class Hiragana(unicode_set): + "Unicode set for Hiragana Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x3041, 0x3096), + (0x3099, 0x30A0), + (0x30FC,), + (0xFF70,), + (0x1B001,), + (0x1B150, 0x1B152), + (0x1F200,), + ] + + class Katakana(unicode_set): + "Unicode set for Katakana Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x3099, 0x309C), + (0x30A0, 0x30FF), + (0x31F0, 0x31FF), + (0x32D0, 0x32FE), + (0xFF65, 0xFF9F), + (0x1B000,), + (0x1B164, 0x1B167), + (0x1F201, 0x1F202), + (0x1F213,), + ] + + class Hangul(unicode_set): + "Unicode set for Hangul (Korean) Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x1100, 0x11FF), + (0x302E, 0x302F), + (0x3131, 0x318E), + (0x3200, 0x321C), + (0x3260, 0x327B), + (0x327E,), + (0xA960, 0xA97C), + (0xAC00, 0xD7A3), + (0xD7B0, 0xD7C6), + (0xD7CB, 0xD7FB), + (0xFFA0, 0xFFBE), + (0xFFC2, 0xFFC7), + (0xFFCA, 0xFFCF), + (0xFFD2, 0xFFD7), + (0xFFDA, 0xFFDC), + ] + + Korean = Hangul + + class CJK(Chinese, Japanese, Hangul): + "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" + + class Thai(unicode_set): + "Unicode set for Thai Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0E01, 0x0E3A), + (0x0E3F, 0x0E5B) + ] + + class Arabic(unicode_set): + "Unicode set for Arabic Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0600, 0x061B), + (0x061E, 0x06FF), + (0x0700, 0x077F), + ] + + class Hebrew(unicode_set): + "Unicode set for Hebrew Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0591, 0x05C7), + (0x05D0, 0x05EA), + (0x05EF, 0x05F4), + (0xFB1D, 0xFB36), + (0xFB38, 0xFB3C), + (0xFB3E,), + (0xFB40, 0xFB41), + (0xFB43, 0xFB44), + (0xFB46, 0xFB4F), + ] + + class Devanagari(unicode_set): + "Unicode set for Devanagari Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0900, 0x097F), + (0xA8E0, 0xA8FF) + ] + + # fmt: on + + +pyparsing_unicode.Japanese._ranges = ( + pyparsing_unicode.Japanese.Kanji._ranges + + pyparsing_unicode.Japanese.Hiragana._ranges + + pyparsing_unicode.Japanese.Katakana._ranges +) + +pyparsing_unicode.BMP = pyparsing_unicode.BasicMultilingualPlane + +# add language identifiers using language Unicode +pyparsing_unicode.العربية = pyparsing_unicode.Arabic +pyparsing_unicode.中文 = pyparsing_unicode.Chinese +pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic +pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek +pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew +pyparsing_unicode.日本語 = pyparsing_unicode.Japanese +pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji +pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana +pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana +pyparsing_unicode.한국어 = pyparsing_unicode.Korean +pyparsing_unicode.ไทย = pyparsing_unicode.Thai +pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari diff --git a/libs/common/pkg_resources/_vendor/pyparsing/util.py b/libs/common/pkg_resources/_vendor/pyparsing/util.py new file mode 100644 index 00000000..34ce092c --- /dev/null +++ b/libs/common/pkg_resources/_vendor/pyparsing/util.py @@ -0,0 +1,235 @@ +# util.py +import warnings +import types +import collections +import itertools +from functools import lru_cache +from typing import List, Union, Iterable + +_bslash = chr(92) + + +class __config_flags: + """Internal class for defining compatibility and debugging flags""" + + _all_names: List[str] = [] + _fixed_names: List[str] = [] + _type_desc = "configuration" + + @classmethod + def _set(cls, dname, value): + if dname in cls._fixed_names: + warnings.warn( + "{}.{} {} is {} and cannot be overridden".format( + cls.__name__, + dname, + cls._type_desc, + str(getattr(cls, dname)).upper(), + ) + ) + return + if dname in cls._all_names: + setattr(cls, dname, value) + else: + raise ValueError("no such {} {!r}".format(cls._type_desc, dname)) + + enable = classmethod(lambda cls, name: cls._set(name, True)) + disable = classmethod(lambda cls, name: cls._set(name, False)) + + +@lru_cache(maxsize=128) +def col(loc: int, strg: str) -> int: + """ + Returns current column within a string, counting newlines as line separators. + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See + :class:`ParserElement.parseString` for more + information on parsing strings containing ```` s, and suggested + methods to maintain a consistent view of the parsed string, the parse + location, and line and column positions within the parsed string. + """ + s = strg + return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) + + +@lru_cache(maxsize=128) +def lineno(loc: int, strg: str) -> int: + """Returns current line number within a string, counting newlines as line separators. + The first line is number 1. + + Note - the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See :class:`ParserElement.parseString` + for more information on parsing strings containing ```` s, and + suggested methods to maintain a consistent view of the parsed string, the + parse location, and line and column positions within the parsed string. + """ + return strg.count("\n", 0, loc) + 1 + + +@lru_cache(maxsize=128) +def line(loc: int, strg: str) -> str: + """ + Returns the line of text containing loc within a string, counting newlines as line separators. + """ + last_cr = strg.rfind("\n", 0, loc) + next_cr = strg.find("\n", loc) + return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :] + + +class _UnboundedCache: + def __init__(self): + cache = {} + cache_get = cache.get + self.not_in_cache = not_in_cache = object() + + def get(_, key): + return cache_get(key, not_in_cache) + + def set_(_, key, value): + cache[key] = value + + def clear(_): + cache.clear() + + self.size = None + self.get = types.MethodType(get, self) + self.set = types.MethodType(set_, self) + self.clear = types.MethodType(clear, self) + + +class _FifoCache: + def __init__(self, size): + self.not_in_cache = not_in_cache = object() + cache = collections.OrderedDict() + cache_get = cache.get + + def get(_, key): + return cache_get(key, not_in_cache) + + def set_(_, key, value): + cache[key] = value + while len(cache) > size: + cache.popitem(last=False) + + def clear(_): + cache.clear() + + self.size = size + self.get = types.MethodType(get, self) + self.set = types.MethodType(set_, self) + self.clear = types.MethodType(clear, self) + + +class LRUMemo: + """ + A memoizing mapping that retains `capacity` deleted items + + The memo tracks retained items by their access order; once `capacity` items + are retained, the least recently used item is discarded. + """ + + def __init__(self, capacity): + self._capacity = capacity + self._active = {} + self._memory = collections.OrderedDict() + + def __getitem__(self, key): + try: + return self._active[key] + except KeyError: + self._memory.move_to_end(key) + return self._memory[key] + + def __setitem__(self, key, value): + self._memory.pop(key, None) + self._active[key] = value + + def __delitem__(self, key): + try: + value = self._active.pop(key) + except KeyError: + pass + else: + while len(self._memory) >= self._capacity: + self._memory.popitem(last=False) + self._memory[key] = value + + def clear(self): + self._active.clear() + self._memory.clear() + + +class UnboundedMemo(dict): + """ + A memoizing mapping that retains all deleted items + """ + + def __delitem__(self, key): + pass + + +def _escape_regex_range_chars(s: str) -> str: + # escape these chars: ^-[] + for c in r"\^-[]": + s = s.replace(c, _bslash + c) + s = s.replace("\n", r"\n") + s = s.replace("\t", r"\t") + return str(s) + + +def _collapse_string_to_ranges( + s: Union[str, Iterable[str]], re_escape: bool = True +) -> str: + def is_consecutive(c): + c_int = ord(c) + is_consecutive.prev, prev = c_int, is_consecutive.prev + if c_int - prev > 1: + is_consecutive.value = next(is_consecutive.counter) + return is_consecutive.value + + is_consecutive.prev = 0 + is_consecutive.counter = itertools.count() + is_consecutive.value = -1 + + def escape_re_range_char(c): + return "\\" + c if c in r"\^-][" else c + + def no_escape_re_range_char(c): + return c + + if not re_escape: + escape_re_range_char = no_escape_re_range_char + + ret = [] + s = "".join(sorted(set(s))) + if len(s) > 3: + for _, chars in itertools.groupby(s, key=is_consecutive): + first = last = next(chars) + last = collections.deque( + itertools.chain(iter([last]), chars), maxlen=1 + ).pop() + if first == last: + ret.append(escape_re_range_char(first)) + else: + sep = "" if ord(last) == ord(first) + 1 else "-" + ret.append( + "{}{}{}".format( + escape_re_range_char(first), sep, escape_re_range_char(last) + ) + ) + else: + ret = [escape_re_range_char(c) for c in s] + + return "".join(ret) + + +def _flatten(ll: list) -> list: + ret = [] + for i in ll: + if isinstance(i, list): + ret.extend(_flatten(i)) + else: + ret.append(i) + return ret diff --git a/libs/common/pkg_resources/_vendor/six.py b/libs/common/pkg_resources/_vendor/six.py deleted file mode 100644 index 190c0239..00000000 --- a/libs/common/pkg_resources/_vendor/six.py +++ /dev/null @@ -1,868 +0,0 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2015 Benjamin Peterson -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from __future__ import absolute_import - -import functools -import itertools -import operator -import sys -import types - -__author__ = "Benjamin Peterson " -__version__ = "1.10.0" - - -# Useful for very coarse version differentiation. -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 -PY34 = sys.version_info[0:2] >= (3, 4) - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) - else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X - - -def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc - - -def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] - - -class _LazyDescr(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, tp): - result = self._resolve() - setattr(obj, self.name, result) # Invokes __set__. - try: - # This is a bit ugly, but it avoids running this again by - # removing this descriptor. - delattr(obj.__class__, self.name) - except AttributeError: - pass - return result - - -class MovedModule(_LazyDescr): - - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old - - def _resolve(self): - return _import_module(self.mod) - - def __getattr__(self, attr): - _module = self._resolve() - value = getattr(_module, attr) - setattr(self, attr, value) - return value - - -class _LazyModule(types.ModuleType): - - def __init__(self, name): - super(_LazyModule, self).__init__(name) - self.__doc__ = self.__class__.__doc__ - - def __dir__(self): - attrs = ["__doc__", "__name__"] - attrs += [attr.name for attr in self._moved_attributes] - return attrs - - # Subclasses should override this - _moved_attributes = [] - - -class MovedAttribute(_LazyDescr): - - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name - else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr - - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) - - -class _SixMetaPathImporter(object): - - """ - A meta path importer to import six.moves and its submodules. - - This class implements a PEP302 finder and loader. It should be compatible - with Python 2.5 and all existing versions of Python3 - """ - - def __init__(self, six_module_name): - self.name = six_module_name - self.known_modules = {} - - def _add_module(self, mod, *fullnames): - for fullname in fullnames: - self.known_modules[self.name + "." + fullname] = mod - - def _get_module(self, fullname): - return self.known_modules[self.name + "." + fullname] - - def find_module(self, fullname, path=None): - if fullname in self.known_modules: - return self - return None - - def __get_module(self, fullname): - try: - return self.known_modules[fullname] - except KeyError: - raise ImportError("This loader does not know module " + fullname) - - def load_module(self, fullname): - try: - # in case of a reload - return sys.modules[fullname] - except KeyError: - pass - mod = self.__get_module(fullname) - if isinstance(mod, MovedModule): - mod = mod._resolve() - else: - mod.__loader__ = self - sys.modules[fullname] = mod - return mod - - def is_package(self, fullname): - """ - Return true, if the named module is a package. - - We need this method to get correct spec objects with - Python 3.4 (see PEP451) - """ - return hasattr(self.__get_module(fullname), "__path__") - - def get_code(self, fullname): - """Return None - - Required, if is_package is implemented""" - self.__get_module(fullname) # eventually raises ImportError - return None - get_source = get_code # same as get_code - -_importer = _SixMetaPathImporter(__name__) - - -class _MovedItems(_LazyModule): - - """Lazy loading of moved objects""" - __path__ = [] # mark as package - - -_moved_attributes = [ - MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), - MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), - MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), - MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), - MovedAttribute("intern", "__builtin__", "sys"), - MovedAttribute("map", "itertools", "builtins", "imap", "map"), - MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), - MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), - MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), - MovedAttribute("reduce", "__builtin__", "functools"), - MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), - MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("UserDict", "UserDict", "collections"), - MovedAttribute("UserList", "UserList", "collections"), - MovedAttribute("UserString", "UserString", "collections"), - MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), - MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), - MovedModule("builtins", "__builtin__"), - MovedModule("configparser", "ConfigParser"), - MovedModule("copyreg", "copy_reg"), - MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), - MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), - MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), - MovedModule("http_cookies", "Cookie", "http.cookies"), - MovedModule("html_entities", "htmlentitydefs", "html.entities"), - MovedModule("html_parser", "HTMLParser", "html.parser"), - MovedModule("http_client", "httplib", "http.client"), - MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), - MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), - MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), - MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), - MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), - MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("queue", "Queue"), - MovedModule("reprlib", "repr"), - MovedModule("socketserver", "SocketServer"), - MovedModule("_thread", "thread", "_thread"), - MovedModule("tkinter", "Tkinter"), - MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), - MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), - MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), - MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), - MovedModule("tkinter_tix", "Tix", "tkinter.tix"), - MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), - MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), - MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), - MovedModule("tkinter_colorchooser", "tkColorChooser", - "tkinter.colorchooser"), - MovedModule("tkinter_commondialog", "tkCommonDialog", - "tkinter.commondialog"), - MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), - MovedModule("tkinter_font", "tkFont", "tkinter.font"), - MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), - MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", - "tkinter.simpledialog"), - MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), - MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), - MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), - MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), - MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), - MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), -] -# Add windows specific modules. -if sys.platform == "win32": - _moved_attributes += [ - MovedModule("winreg", "_winreg"), - ] - -for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) - if isinstance(attr, MovedModule): - _importer._add_module(attr, "moves." + attr.name) -del attr - -_MovedItems._moved_attributes = _moved_attributes - -moves = _MovedItems(__name__ + ".moves") -_importer._add_module(moves, "moves") - - -class Module_six_moves_urllib_parse(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_parse""" - - -_urllib_parse_moved_attributes = [ - MovedAttribute("ParseResult", "urlparse", "urllib.parse"), - MovedAttribute("SplitResult", "urlparse", "urllib.parse"), - MovedAttribute("parse_qs", "urlparse", "urllib.parse"), - MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), - MovedAttribute("urldefrag", "urlparse", "urllib.parse"), - MovedAttribute("urljoin", "urlparse", "urllib.parse"), - MovedAttribute("urlparse", "urlparse", "urllib.parse"), - MovedAttribute("urlsplit", "urlparse", "urllib.parse"), - MovedAttribute("urlunparse", "urlparse", "urllib.parse"), - MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), - MovedAttribute("quote", "urllib", "urllib.parse"), - MovedAttribute("quote_plus", "urllib", "urllib.parse"), - MovedAttribute("unquote", "urllib", "urllib.parse"), - MovedAttribute("unquote_plus", "urllib", "urllib.parse"), - MovedAttribute("urlencode", "urllib", "urllib.parse"), - MovedAttribute("splitquery", "urllib", "urllib.parse"), - MovedAttribute("splittag", "urllib", "urllib.parse"), - MovedAttribute("splituser", "urllib", "urllib.parse"), - MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), - MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), - MovedAttribute("uses_params", "urlparse", "urllib.parse"), - MovedAttribute("uses_query", "urlparse", "urllib.parse"), - MovedAttribute("uses_relative", "urlparse", "urllib.parse"), -] -for attr in _urllib_parse_moved_attributes: - setattr(Module_six_moves_urllib_parse, attr.name, attr) -del attr - -Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes - -_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), - "moves.urllib_parse", "moves.urllib.parse") - - -class Module_six_moves_urllib_error(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_error""" - - -_urllib_error_moved_attributes = [ - MovedAttribute("URLError", "urllib2", "urllib.error"), - MovedAttribute("HTTPError", "urllib2", "urllib.error"), - MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), -] -for attr in _urllib_error_moved_attributes: - setattr(Module_six_moves_urllib_error, attr.name, attr) -del attr - -Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes - -_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), - "moves.urllib_error", "moves.urllib.error") - - -class Module_six_moves_urllib_request(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_request""" - - -_urllib_request_moved_attributes = [ - MovedAttribute("urlopen", "urllib2", "urllib.request"), - MovedAttribute("install_opener", "urllib2", "urllib.request"), - MovedAttribute("build_opener", "urllib2", "urllib.request"), - MovedAttribute("pathname2url", "urllib", "urllib.request"), - MovedAttribute("url2pathname", "urllib", "urllib.request"), - MovedAttribute("getproxies", "urllib", "urllib.request"), - MovedAttribute("Request", "urllib2", "urllib.request"), - MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), - MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), - MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), - MovedAttribute("BaseHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), - MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), - MovedAttribute("FileHandler", "urllib2", "urllib.request"), - MovedAttribute("FTPHandler", "urllib2", "urllib.request"), - MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), - MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), - MovedAttribute("urlretrieve", "urllib", "urllib.request"), - MovedAttribute("urlcleanup", "urllib", "urllib.request"), - MovedAttribute("URLopener", "urllib", "urllib.request"), - MovedAttribute("FancyURLopener", "urllib", "urllib.request"), - MovedAttribute("proxy_bypass", "urllib", "urllib.request"), -] -for attr in _urllib_request_moved_attributes: - setattr(Module_six_moves_urllib_request, attr.name, attr) -del attr - -Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes - -_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), - "moves.urllib_request", "moves.urllib.request") - - -class Module_six_moves_urllib_response(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_response""" - - -_urllib_response_moved_attributes = [ - MovedAttribute("addbase", "urllib", "urllib.response"), - MovedAttribute("addclosehook", "urllib", "urllib.response"), - MovedAttribute("addinfo", "urllib", "urllib.response"), - MovedAttribute("addinfourl", "urllib", "urllib.response"), -] -for attr in _urllib_response_moved_attributes: - setattr(Module_six_moves_urllib_response, attr.name, attr) -del attr - -Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes - -_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), - "moves.urllib_response", "moves.urllib.response") - - -class Module_six_moves_urllib_robotparser(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_robotparser""" - - -_urllib_robotparser_moved_attributes = [ - MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), -] -for attr in _urllib_robotparser_moved_attributes: - setattr(Module_six_moves_urllib_robotparser, attr.name, attr) -del attr - -Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes - -_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), - "moves.urllib_robotparser", "moves.urllib.robotparser") - - -class Module_six_moves_urllib(types.ModuleType): - - """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" - __path__ = [] # mark as package - parse = _importer._get_module("moves.urllib_parse") - error = _importer._get_module("moves.urllib_error") - request = _importer._get_module("moves.urllib_request") - response = _importer._get_module("moves.urllib_response") - robotparser = _importer._get_module("moves.urllib_robotparser") - - def __dir__(self): - return ['parse', 'error', 'request', 'response', 'robotparser'] - -_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), - "moves.urllib") - - -def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) - - -def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: - try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name,)) - - -if PY3: - _meth_func = "__func__" - _meth_self = "__self__" - - _func_closure = "__closure__" - _func_code = "__code__" - _func_defaults = "__defaults__" - _func_globals = "__globals__" -else: - _meth_func = "im_func" - _meth_self = "im_self" - - _func_closure = "func_closure" - _func_code = "func_code" - _func_defaults = "func_defaults" - _func_globals = "func_globals" - - -try: - advance_iterator = next -except NameError: - def advance_iterator(it): - return it.next() -next = advance_iterator - - -try: - callable = callable -except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - - -if PY3: - def get_unbound_function(unbound): - return unbound - - create_bound_method = types.MethodType - - def create_unbound_method(func, cls): - return func - - Iterator = object -else: - def get_unbound_function(unbound): - return unbound.im_func - - def create_bound_method(func, obj): - return types.MethodType(func, obj, obj.__class__) - - def create_unbound_method(func, cls): - return types.MethodType(func, None, cls) - - class Iterator(object): - - def next(self): - return type(self).__next__(self) - - callable = callable -_add_doc(get_unbound_function, - """Get the function out of a possibly unbound function""") - - -get_method_function = operator.attrgetter(_meth_func) -get_method_self = operator.attrgetter(_meth_self) -get_function_closure = operator.attrgetter(_func_closure) -get_function_code = operator.attrgetter(_func_code) -get_function_defaults = operator.attrgetter(_func_defaults) -get_function_globals = operator.attrgetter(_func_globals) - - -if PY3: - def iterkeys(d, **kw): - return iter(d.keys(**kw)) - - def itervalues(d, **kw): - return iter(d.values(**kw)) - - def iteritems(d, **kw): - return iter(d.items(**kw)) - - def iterlists(d, **kw): - return iter(d.lists(**kw)) - - viewkeys = operator.methodcaller("keys") - - viewvalues = operator.methodcaller("values") - - viewitems = operator.methodcaller("items") -else: - def iterkeys(d, **kw): - return d.iterkeys(**kw) - - def itervalues(d, **kw): - return d.itervalues(**kw) - - def iteritems(d, **kw): - return d.iteritems(**kw) - - def iterlists(d, **kw): - return d.iterlists(**kw) - - viewkeys = operator.methodcaller("viewkeys") - - viewvalues = operator.methodcaller("viewvalues") - - viewitems = operator.methodcaller("viewitems") - -_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") -_add_doc(itervalues, "Return an iterator over the values of a dictionary.") -_add_doc(iteritems, - "Return an iterator over the (key, value) pairs of a dictionary.") -_add_doc(iterlists, - "Return an iterator over the (key, [values]) pairs of a dictionary.") - - -if PY3: - def b(s): - return s.encode("latin-1") - - def u(s): - return s - unichr = chr - import struct - int2byte = struct.Struct(">B").pack - del struct - byte2int = operator.itemgetter(0) - indexbytes = operator.getitem - iterbytes = iter - import io - StringIO = io.StringIO - BytesIO = io.BytesIO - _assertCountEqual = "assertCountEqual" - if sys.version_info[1] <= 1: - _assertRaisesRegex = "assertRaisesRegexp" - _assertRegex = "assertRegexpMatches" - else: - _assertRaisesRegex = "assertRaisesRegex" - _assertRegex = "assertRegex" -else: - def b(s): - return s - # Workaround for standalone backslash - - def u(s): - return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") - unichr = unichr - int2byte = chr - - def byte2int(bs): - return ord(bs[0]) - - def indexbytes(buf, i): - return ord(buf[i]) - iterbytes = functools.partial(itertools.imap, ord) - import StringIO - StringIO = BytesIO = StringIO.StringIO - _assertCountEqual = "assertItemsEqual" - _assertRaisesRegex = "assertRaisesRegexp" - _assertRegex = "assertRegexpMatches" -_add_doc(b, """Byte literal""") -_add_doc(u, """Text literal""") - - -def assertCountEqual(self, *args, **kwargs): - return getattr(self, _assertCountEqual)(*args, **kwargs) - - -def assertRaisesRegex(self, *args, **kwargs): - return getattr(self, _assertRaisesRegex)(*args, **kwargs) - - -def assertRegex(self, *args, **kwargs): - return getattr(self, _assertRegex)(*args, **kwargs) - - -if PY3: - exec_ = getattr(moves.builtins, "exec") - - def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - -else: - def exec_(_code_, _globs_=None, _locs_=None): - """Execute code in a namespace.""" - if _globs_ is None: - frame = sys._getframe(1) - _globs_ = frame.f_globals - if _locs_ is None: - _locs_ = frame.f_locals - del frame - elif _locs_ is None: - _locs_ = _globs_ - exec("""exec _code_ in _globs_, _locs_""") - - exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb -""") - - -if sys.version_info[:2] == (3, 2): - exec_("""def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value -""") -elif sys.version_info[:2] > (3, 2): - exec_("""def raise_from(value, from_value): - raise value from from_value -""") -else: - def raise_from(value, from_value): - raise value - - -print_ = getattr(moves.builtins, "print", None) -if print_ is None: - def print_(*args, **kwargs): - """The new-style print function for Python 2.4 and 2.5.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return - - def write(data): - if not isinstance(data, basestring): - data = str(data) - # If the file has an encoding, encode unicode with it. - if (isinstance(fp, file) and - isinstance(data, unicode) and - fp.encoding is not None): - errors = getattr(fp, "errors", None) - if errors is None: - errors = "strict" - data = data.encode(fp.encoding, errors) - fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) -if sys.version_info[:2] < (3, 3): - _print = print_ - - def print_(*args, **kwargs): - fp = kwargs.get("file", sys.stdout) - flush = kwargs.pop("flush", False) - _print(*args, **kwargs) - if flush and fp is not None: - fp.flush() - -_add_doc(reraise, """Reraise an exception.""") - -if sys.version_info[0:2] < (3, 4): - def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, - updated=functools.WRAPPER_UPDATES): - def wrapper(f): - f = functools.wraps(wrapped, assigned, updated)(f) - f.__wrapped__ = wrapped - return f - return wrapper -else: - wraps = functools.wraps - - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): - - def __new__(cls, name, this_bases, d): - return meta(name, bases, d) - return type.__new__(metaclass, 'temporary_class', (), {}) - - -def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - slots = orig_vars.get('__slots__') - if slots is not None: - if isinstance(slots, str): - slots = [slots] - for slots_var in slots: - orig_vars.pop(slots_var) - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper - - -def python_2_unicode_compatible(klass): - """ - A decorator that defines __unicode__ and __str__ methods under Python 2. - Under Python 3 it does nothing. - - To support Python 2 and 3 with a single code base, define a __str__ method - returning text and apply this decorator to the class. - """ - if PY2: - if '__str__' not in klass.__dict__: - raise ValueError("@python_2_unicode_compatible cannot be applied " - "to %s because it doesn't define __str__()." % - klass.__name__) - klass.__unicode__ = klass.__str__ - klass.__str__ = lambda self: self.__unicode__().encode('utf-8') - return klass - - -# Complete the moves implementation. -# This code is at the end of this module to speed up module loading. -# Turn this module into a package. -__path__ = [] # required for PEP 302 and PEP 451 -__package__ = __name__ # see PEP 366 @ReservedAssignment -if globals().get("__spec__") is not None: - __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable -# Remove other six meta path importers, since they cause problems. This can -# happen if six is removed from sys.modules and then reloaded. (Setuptools does -# this for some reason.) -if sys.meta_path: - for i, importer in enumerate(sys.meta_path): - # Here's some real nastiness: Another "instance" of the six module might - # be floating around. Therefore, we can't use isinstance() to check for - # the six meta path importer, since the other six instance will have - # inserted an importer with different class. - if (type(importer).__name__ == "_SixMetaPathImporter" and - importer.name == __name__): - del sys.meta_path[i] - break - del i, importer -# Finally, add the importer to the meta path import hook. -sys.meta_path.append(_importer) diff --git a/libs/common/pkg_resources/_vendor/zipp.py b/libs/common/pkg_resources/_vendor/zipp.py new file mode 100644 index 00000000..26b723c1 --- /dev/null +++ b/libs/common/pkg_resources/_vendor/zipp.py @@ -0,0 +1,329 @@ +import io +import posixpath +import zipfile +import itertools +import contextlib +import sys +import pathlib + +if sys.version_info < (3, 7): + from collections import OrderedDict +else: + OrderedDict = dict + + +__all__ = ['Path'] + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + +_dedupe = OrderedDict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +class CompleteDirs(zipfile.ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) + + def namelist(self): + names = super(CompleteDirs, self).namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, zipfile.ZipFile): + return cls(_pathlib_compat(source)) + + # Only allow for FastLookup when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + source.__class__ = cls + return source + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super(FastLookup, self).namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super(FastLookup, self)._name_set() + return self.__lookup + + +def _pathlib_compat(path): + """ + For path-like objects, convert to a filename for compatibility + on Python 3.6.1 and earlier. + """ + try: + return path.__fspath__() + except AttributeError: + return str(path) + + +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = zipfile.ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'mem/abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('mem/abcde.zip', 'a.txt') + >>> b + Path('mem/abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('mem/abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text() + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coercion to string: + + >>> import os + >>> str(c).replace(os.sep, posixpath.sep) + 'mem/abcde.zip/b/c.txt' + + At the root, ``name``, ``filename``, and ``parent`` + resolve to the zipfile. Note these attributes are not + valid and will raise a ``ValueError`` if the zipfile + has no filename. + + >>> root.name + 'abcde.zip' + >>> str(root.filename).replace(os.sep, posixpath.sep) + 'mem/abcde.zip' + >>> str(root.parent) + 'mem' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + """ + Construct a Path from a ZipFile or filename. + + Note: When the source is an existing ZipFile object, + its type (__class__) will be mutated to a + specialized type. If the caller wishes to retain the + original type, the caller should either create a + separate ZipFile object or pass a filename. + """ + self.root = FastLookup.make(root) + self.at = at + + def open(self, mode='r', *args, pwd=None, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + if self.is_dir(): + raise IsADirectoryError(self) + zip_mode = mode[0] + if not self.exists() and zip_mode == 'r': + raise FileNotFoundError(self) + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + return io.TextIOWrapper(stream, *args, **kwargs) + + @property + def name(self): + return pathlib.Path(self.at).name or self.filename.name + + @property + def suffix(self): + return pathlib.Path(self.at).suffix or self.filename.suffix + + @property + def suffixes(self): + return pathlib.Path(self.at).suffixes or self.filename.suffixes + + @property + def stem(self): + return pathlib.Path(self.at).stem or self.filename.stem + + @property + def filename(self): + return pathlib.Path(self.root.filename).joinpath(self.at) + + def read_text(self, *args, **kwargs): + with self.open('r', *args, **kwargs) as strm: + return strm.read() + + def read_bytes(self): + with self.open('rb') as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return self.__class__(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return self.exists() and not self.is_dir() + + def exists(self): + return self.at in self.root._name_set() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self.root.namelist()) + return filter(self._is_child, subs) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def joinpath(self, *other): + next = posixpath.join(self.at, *map(_pathlib_compat, other)) + return self._next(self.root.resolve_dir(next)) + + __truediv__ = joinpath + + @property + def parent(self): + if not self.at: + return self.filename.parent + parent_at = posixpath.dirname(self.at.rstrip('/')) + if parent_at: + parent_at += '/' + return self._next(parent_at) diff --git a/libs/common/pkg_resources/extern/__init__.py b/libs/common/pkg_resources/extern/__init__.py index c1eb9e99..70897eea 100644 --- a/libs/common/pkg_resources/extern/__init__.py +++ b/libs/common/pkg_resources/extern/__init__.py @@ -1,3 +1,4 @@ +import importlib.util import sys @@ -20,17 +21,10 @@ class VendorImporter: yield self.vendor_pkg + '.' yield '' - def find_module(self, fullname, path=None): - """ - Return self when fullname starts with root_name and the - target module is one vendored through this importer. - """ + def _module_matches_namespace(self, fullname): + """Figure out if the target module is vendored.""" root, base, target = fullname.partition(self.root_name + '.') - if root: - return - if not any(map(target.startswith, self.vendored_names)): - return - return self + return not root and any(map(target.startswith, self.vendored_names)) def load_module(self, fullname): """ @@ -43,13 +37,6 @@ class VendorImporter: __import__(extant) mod = sys.modules[extant] sys.modules[fullname] = mod - # mysterious hack: - # Remove the reference to the extant package/module - # on later Python versions to cause relative imports - # in the vendor package to resolve the same modules - # as those going through this importer. - if prefix and sys.version_info > (3, 3): - del sys.modules[extant] return mod except ImportError: pass @@ -61,6 +48,19 @@ class VendorImporter: "distribution.".format(**locals()) ) + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + + def find_spec(self, fullname, path=None, target=None): + """Return a module spec for vendored names.""" + return ( + importlib.util.spec_from_loader(fullname, self) + if self._module_matches_namespace(fullname) else None + ) + def install(self): """ Install this importer into sys.meta_path if not already present. @@ -69,5 +69,8 @@ class VendorImporter: sys.meta_path.append(self) -names = 'packaging', 'pyparsing', 'six', 'appdirs' +names = ( + 'packaging', 'pyparsing', 'appdirs', 'jaraco', 'importlib_resources', + 'more_itertools', +) VendorImporter(__name__, names).install() diff --git a/libs/common/pkg_resources/py31compat.py b/libs/common/pkg_resources/py31compat.py deleted file mode 100644 index a381c424..00000000 --- a/libs/common/pkg_resources/py31compat.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import errno -import sys - -from .extern import six - - -def _makedirs_31(path, exist_ok=False): - try: - os.makedirs(path) - except OSError as exc: - if not exist_ok or exc.errno != errno.EEXIST: - raise - - -# rely on compatibility behavior until mode considerations -# and exists_ok considerations are disentangled. -# See https://github.com/pypa/setuptools/pull/1083#issuecomment-315168663 -needs_makedirs = ( - six.PY2 or - (3, 4) <= sys.version_info < (3, 4, 1) -) -makedirs = _makedirs_31 if needs_makedirs else os.makedirs diff --git a/libs/common/rarfile.py b/libs/common/rarfile.py index 2ff5af49..58a59abb 100644 --- a/libs/common/rarfile.py +++ b/libs/common/rarfile.py @@ -1,6 +1,6 @@ # rarfile.py # -# Copyright (c) 2005-2019 Marko Kreen +# Copyright (c) 2005-2020 Marko Kreen # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -14,7 +14,7 @@ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -r"""RAR archive reader. +"""RAR archive reader. This is Python module for Rar archive reading. The interface is made as :mod:`zipfile`-like as possible. @@ -30,10 +30,10 @@ Example:: import rarfile - rf = rarfile.RarFile('myarchive.rar') + rf = rarfile.RarFile("myarchive.rar") for f in rf.infolist(): - print f.filename, f.file_size - if f.filename == 'README': + print(f.filename, f.file_size) + if f.filename == "README": print(rf.read(f)) Archive files can also be accessed via file-like object returned @@ -41,194 +41,78 @@ by :meth:`RarFile.open`:: import rarfile - with rarfile.RarFile('archive.rar') as rf: - with rf.open('README') as f: + with rarfile.RarFile("archive.rar") as rf: + with rf.open("README") as f: for ln in f: print(ln.strip()) -There are few module-level parameters to tune behaviour, -here they are with defaults, and reason to change it:: - - import rarfile - - # Set to full path of unrar.exe if it is not in PATH - rarfile.UNRAR_TOOL = "unrar" - - # Set to '\\' to be more compatible with old rarfile - rarfile.PATH_SEP = '/' - -For more details, refer to source. - +For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH. """ -from __future__ import division, print_function - -## -## Imports and compat - support both Python 2.x and 3.x -## - -import sys -import os import errno +import io +import os +import re +import shutil import struct - -from struct import pack, unpack, Struct +import sys +import warnings from binascii import crc32, hexlify +from datetime import datetime, timezone +from hashlib import blake2s, pbkdf2_hmac, sha1 +from pathlib import Path +from struct import Struct, pack, unpack +from subprocess import DEVNULL, PIPE, STDOUT, Popen from tempfile import mkstemp -from subprocess import Popen, PIPE, STDOUT -from io import RawIOBase -from hashlib import sha1, sha256 -from hmac import HMAC -from datetime import datetime, timedelta, tzinfo -# fixed offset timezone, for UTC -try: - from datetime import timezone -except ImportError: - class timezone(tzinfo): - """Compat timezone.""" - __slots__ = ('_ofs', '_name') - _DST = timedelta(0) - - def __init__(self, offset, name): - super(timezone, self).__init__() - self._ofs, self._name = offset, name - - def utcoffset(self, dt): - return self._ofs - - def tzname(self, dt): - return self._name - - def dst(self, dt): - return self._DST - -# only needed for encryped headers +# only needed for encrypted headers try: try: - from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher from cryptography.hazmat.backends import default_backend - from cryptography.hazmat.primitives import hashes - from cryptography.hazmat.primitives.kdf import pbkdf2 - - class AES_CBC_Decrypt(object): - """Decrypt API""" - def __init__(self, key, iv): - ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) - self.decrypt = ciph.decryptor().update - - def pbkdf2_sha256(password, salt, iters): - """PBKDF2 with HMAC-SHA256""" - ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend()) - return ctx.derive(password) - + from cryptography.hazmat.primitives.ciphers import ( + Cipher, algorithms, modes, + ) + _have_crypto = 1 except ImportError: from Crypto.Cipher import AES - from Crypto.Protocol import KDF - - class AES_CBC_Decrypt(object): - """Decrypt API""" - def __init__(self, key, iv): - self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt - - def pbkdf2_sha256(password, salt, iters): - """PBKDF2 with HMAC-SHA256""" - return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256) - - _have_crypto = 1 + _have_crypto = 2 except ImportError: _have_crypto = 0 -try: - try: - from hashlib import blake2s - _have_blake2 = True - except ImportError: - from pyblake2 import blake2s - _have_blake2 = True -except ImportError: - _have_blake2 = False -# compat with 2.x -if sys.hexversion < 0x3000000: - def rar_crc32(data, prev=0): - """CRC32 with unsigned values. - """ - if (prev > 0) and (prev & 0x80000000): - prev -= (1 << 32) - res = crc32(data, prev) - if res < 0: - res += (1 << 32) - return res - tohex = hexlify - _byte_code = ord -else: # pragma: no cover - def tohex(data): - """Return hex string.""" - return hexlify(data).decode('ascii') - rar_crc32 = crc32 - unicode = str - _byte_code = int # noqa +class AES_CBC_Decrypt: + """Decrypt API""" + def __init__(self, key, iv): + if _have_crypto == 2: + self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt + else: + ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) + self.decrypt = ciph.decryptor().update -# don't break 2.6 completely -if sys.hexversion < 0x2070000: - memoryview = lambda x: x # noqa -try: - from pathlib import Path - _have_pathlib = True -except ImportError: - _have_pathlib = False - -__version__ = '3.1' +__version__ = "4.0" # export only interesting items -__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] +__all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"] ## ## Module configuration. Can be tuned after importing. ## +#: executable for unrar tool +UNRAR_TOOL = "unrar" + +#: executable for unar tool +UNAR_TOOL = "unar" + +#: executable for bsdtar tool +BSDTAR_TOOL = "bsdtar" + #: default fallback charset DEFAULT_CHARSET = "windows-1252" #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed -TRY_ENCODINGS = ('utf8', 'utf-16le') - -#: 'unrar', 'rar' or full path to either one -UNRAR_TOOL = "unrar" - -#: Command line args to use for opening file for reading. -OPEN_ARGS = ('p', '-inul') - -#: Command line args to use for extracting file to disk. -EXTRACT_ARGS = ('x', '-y', '-idq') - -#: args for testrar() -TEST_ARGS = ('t', '-idq') - -# -# Allow use of tool that is not compatible with unrar. -# -# By default use 'bsdtar' which is 'tar' program that -# sits on top of libarchive. -# -# Problems with libarchive RAR backend: -# - Does not support solid archives. -# - Does not support password-protected archives. -# - -ALT_TOOL = 'bsdtar' -ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f') -ALT_EXTRACT_ARGS = ('-x', '-f') -ALT_TEST_ARGS = ('-t', '-f') -ALT_CHECK_ARGS = ('--help',) - -#ALT_TOOL = 'unar' -#ALT_OPEN_ARGS = ('-o', '-') -#ALT_EXTRACT_ARGS = () -#ALT_TEST_ARGS = ('-test',) # does not work -#ALT_CHECK_ARGS = ('-v',) +TRY_ENCODINGS = ("utf8", "utf-16le") #: whether to speed up decompression by using tmp archive USE_EXTRACT_HACK = 1 @@ -236,85 +120,87 @@ USE_EXTRACT_HACK = 1 #: limit the filesize for tmp archive usage HACK_SIZE_LIMIT = 20 * 1024 * 1024 -#: Separator for path name components. RAR internally uses '\\'. -#: Use '/' to be similar with zipfile. -PATH_SEP = '/' +#: set specific directory for mkstemp() used by hack dir usage +HACK_TMP_DIR = None + +#: Separator for path name components. Always "/". +PATH_SEP = "/" ## ## rar constants ## # block types -RAR_BLOCK_MARK = 0x72 # r -RAR_BLOCK_MAIN = 0x73 # s -RAR_BLOCK_FILE = 0x74 # t -RAR_BLOCK_OLD_COMMENT = 0x75 # u -RAR_BLOCK_OLD_EXTRA = 0x76 # v -RAR_BLOCK_OLD_SUB = 0x77 # w -RAR_BLOCK_OLD_RECOVERY = 0x78 # x -RAR_BLOCK_OLD_AUTH = 0x79 # y -RAR_BLOCK_SUB = 0x7a # z -RAR_BLOCK_ENDARC = 0x7b # { +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { # flags for RAR_BLOCK_MAIN -RAR_MAIN_VOLUME = 0x0001 -RAR_MAIN_COMMENT = 0x0002 -RAR_MAIN_LOCK = 0x0004 -RAR_MAIN_SOLID = 0x0008 -RAR_MAIN_NEWNUMBERING = 0x0010 -RAR_MAIN_AUTH = 0x0020 -RAR_MAIN_RECOVERY = 0x0040 -RAR_MAIN_PASSWORD = 0x0080 -RAR_MAIN_FIRSTVOLUME = 0x0100 -RAR_MAIN_ENCRYPTVER = 0x0200 +RAR_MAIN_VOLUME = 0x0001 +RAR_MAIN_COMMENT = 0x0002 +RAR_MAIN_LOCK = 0x0004 +RAR_MAIN_SOLID = 0x0008 +RAR_MAIN_NEWNUMBERING = 0x0010 +RAR_MAIN_AUTH = 0x0020 +RAR_MAIN_RECOVERY = 0x0040 +RAR_MAIN_PASSWORD = 0x0080 +RAR_MAIN_FIRSTVOLUME = 0x0100 +RAR_MAIN_ENCRYPTVER = 0x0200 # flags for RAR_BLOCK_FILE -RAR_FILE_SPLIT_BEFORE = 0x0001 -RAR_FILE_SPLIT_AFTER = 0x0002 -RAR_FILE_PASSWORD = 0x0004 -RAR_FILE_COMMENT = 0x0008 -RAR_FILE_SOLID = 0x0010 -RAR_FILE_DICTMASK = 0x00e0 -RAR_FILE_DICT64 = 0x0000 -RAR_FILE_DICT128 = 0x0020 -RAR_FILE_DICT256 = 0x0040 -RAR_FILE_DICT512 = 0x0060 -RAR_FILE_DICT1024 = 0x0080 -RAR_FILE_DICT2048 = 0x00a0 -RAR_FILE_DICT4096 = 0x00c0 -RAR_FILE_DIRECTORY = 0x00e0 -RAR_FILE_LARGE = 0x0100 -RAR_FILE_UNICODE = 0x0200 -RAR_FILE_SALT = 0x0400 -RAR_FILE_VERSION = 0x0800 -RAR_FILE_EXTTIME = 0x1000 -RAR_FILE_EXTFLAGS = 0x2000 +RAR_FILE_SPLIT_BEFORE = 0x0001 +RAR_FILE_SPLIT_AFTER = 0x0002 +RAR_FILE_PASSWORD = 0x0004 +RAR_FILE_COMMENT = 0x0008 +RAR_FILE_SOLID = 0x0010 +RAR_FILE_DICTMASK = 0x00e0 +RAR_FILE_DICT64 = 0x0000 +RAR_FILE_DICT128 = 0x0020 +RAR_FILE_DICT256 = 0x0040 +RAR_FILE_DICT512 = 0x0060 +RAR_FILE_DICT1024 = 0x0080 +RAR_FILE_DICT2048 = 0x00a0 +RAR_FILE_DICT4096 = 0x00c0 +RAR_FILE_DIRECTORY = 0x00e0 +RAR_FILE_LARGE = 0x0100 +RAR_FILE_UNICODE = 0x0200 +RAR_FILE_SALT = 0x0400 +RAR_FILE_VERSION = 0x0800 +RAR_FILE_EXTTIME = 0x1000 +RAR_FILE_EXTFLAGS = 0x2000 # flags for RAR_BLOCK_ENDARC -RAR_ENDARC_NEXT_VOLUME = 0x0001 -RAR_ENDARC_DATACRC = 0x0002 -RAR_ENDARC_REVSPACE = 0x0004 -RAR_ENDARC_VOLNR = 0x0008 +RAR_ENDARC_NEXT_VOLUME = 0x0001 +RAR_ENDARC_DATACRC = 0x0002 +RAR_ENDARC_REVSPACE = 0x0004 +RAR_ENDARC_VOLNR = 0x0008 # flags common to all blocks -RAR_SKIP_IF_UNKNOWN = 0x4000 -RAR_LONG_BLOCK = 0x8000 +RAR_SKIP_IF_UNKNOWN = 0x4000 +RAR_LONG_BLOCK = 0x8000 # Host OS types -RAR_OS_MSDOS = 0 -RAR_OS_OS2 = 1 -RAR_OS_WIN32 = 2 -RAR_OS_UNIX = 3 -RAR_OS_MACOS = 4 -RAR_OS_BEOS = 5 +RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3) +RAR_OS_OS2 = 1 #: OS2 (only in RAR3) +RAR_OS_WIN32 = 2 #: Windows +RAR_OS_UNIX = 3 #: UNIX +RAR_OS_MACOS = 4 #: MacOS (only in RAR3) +RAR_OS_BEOS = 5 #: BeOS (only in RAR3) -# Compression methods - '0'..'5' -RAR_M0 = 0x30 -RAR_M1 = 0x31 -RAR_M2 = 0x32 -RAR_M3 = 0x33 -RAR_M4 = 0x34 -RAR_M5 = 0x35 +# Compression methods - "0".."5" +RAR_M0 = 0x30 #: No compression. +RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression. +RAR_M2 = 0x32 #: Compression level `-m2`. +RAR_M3 = 0x33 #: Compression level `-m3`. +RAR_M4 = 0x34 #: Compression level `-m4`. +RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression. # # RAR5 constants @@ -363,6 +249,7 @@ RAR5_XTIME_UNIXTIME = 0x01 RAR5_XTIME_HAS_MTIME = 0x02 RAR5_XTIME_HAS_CTIME = 0x04 RAR5_XTIME_HAS_ATIME = 0x08 +RAR5_XTIME_UNIXTIME_NS = 0x10 RAR5_XENC_CIPHER_AES256 = 0 @@ -387,16 +274,30 @@ RAR5_XOWNER_GID = 0x08 RAR5_OS_WINDOWS = 0 RAR5_OS_UNIX = 1 +DOS_MODE_ARCHIVE = 0x20 +DOS_MODE_DIR = 0x10 +DOS_MODE_SYSTEM = 0x04 +DOS_MODE_HIDDEN = 0x02 +DOS_MODE_READONLY = 0x01 + ## ## internal constants ## RAR_ID = b"Rar!\x1a\x07\x00" RAR5_ID = b"Rar!\x1a\x07\x01\x00" -ZERO = b'\0' -EMPTY = b'' -UTC = timezone(timedelta(0), 'UTC') -BSIZE = 32 * 1024 + +WIN32 = sys.platform == "win32" +BSIZE = 512 * 1024 if WIN32 else 64 * 1024 + +SFX_MAX_SIZE = 2 * 1024 * 1024 +RAR_V3 = 3 +RAR_V5 = 5 + +_BAD_CHARS = r"""\x00-\x1F<>|"?*""" +RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS) +RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS) + def _get_rar_version(xfile): """Check quickly whether file is rar archive. @@ -404,11 +305,37 @@ def _get_rar_version(xfile): with XFile(xfile) as fd: buf = fd.read(len(RAR5_ID)) if buf.startswith(RAR_ID): - return 3 + return RAR_V3 elif buf.startswith(RAR5_ID): - return 5 + return RAR_V5 return 0 + +def _find_sfx_header(xfile): + sig = RAR_ID[:-1] + buf = io.BytesIO() + steps = (64, SFX_MAX_SIZE) + + with XFile(xfile) as fd: + for step in steps: + data = fd.read(step) + if not data: + break + buf.write(data) + curdata = buf.getvalue() + findpos = 0 + while True: + pos = curdata.find(sig, findpos) + if pos < 0: + break + if curdata[pos:pos + len(RAR_ID)] == RAR_ID: + return RAR_V3, pos + if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID: + return RAR_V5, pos + findpos = pos + len(sig) + return 0, 0 + + ## ## Public interface ## @@ -418,95 +345,146 @@ def is_rarfile(xfile): """ return _get_rar_version(xfile) > 0 + +def is_rarfile_sfx(xfile): + """Check whether file is rar archive with support for SFX. + + It will read 2M from file. + """ + return _find_sfx_header(xfile)[0] > 0 + + class Error(Exception): """Base class for rarfile errors.""" + class BadRarFile(Error): """Incorrect data in archive.""" + class NotRarFile(Error): """The file is not RAR archive.""" + class BadRarName(Error): """Cannot guess multipart name components.""" + class NoRarEntry(Error): """File not found in RAR""" + class PasswordRequired(Error): """File requires password""" + class NeedFirstVolume(Error): - """Need to start from first volume.""" + """Need to start from first volume. + + Attributes: + + current_volume + Volume number of current file or None if not known + """ + def __init__(self, msg, volume): + super().__init__(msg) + self.current_volume = volume + class NoCrypto(Error): """Cannot parse encrypted headers - no crypto available.""" + class RarExecError(Error): """Problem reported by unrar/rar.""" + class RarWarning(RarExecError): """Non-fatal error""" + class RarFatalError(RarExecError): """Fatal error""" + class RarCRCError(RarExecError): """CRC error during unpacking""" + class RarLockedArchiveError(RarExecError): """Must not modify locked archive""" + class RarWriteError(RarExecError): """Write error""" + class RarOpenError(RarExecError): """Open error""" + class RarUserError(RarExecError): """User error""" + class RarMemoryError(RarExecError): """Memory error""" + class RarCreateError(RarExecError): """Create error""" + class RarNoFilesError(RarExecError): """No files that match pattern were found""" + class RarUserBreak(RarExecError): """User stop""" + class RarWrongPassword(RarExecError): """Incorrect password""" + class RarUnknownError(RarExecError): """Unknown exit code""" + class RarSignalExit(RarExecError): """Unrar exited with signal""" + class RarCannotExec(RarExecError): """Executable not found.""" -class RarInfo(object): +class UnsupportedWarning(UserWarning): + """Archive uses feature that are unsupported by rarfile. + + .. versionadded:: 4.0 + """ + + +class RarInfo: r"""An entry in rar archive. - RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone. - RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone. + Timestamps as :class:`~datetime.datetime` are without timezone in RAR3, + with UTC timezone in RAR5 archives. Attributes: filename File name with relative path. - Path separator is '/'. Always unicode string. + Path separator is "/". Always unicode string. date_time File modification timestamp. As tuple of (year, month, day, hour, minute, second). RAR5 allows archives where it is missing, it's None then. + comment + Optional file comment field. Unicode string. (RAR3-only) + file_size Uncompressed size. @@ -537,16 +515,16 @@ class RarInfo(object): mtime File modification time. Same value as :attr:`date_time` - but as :class:`datetime.datetime` object with extended precision. + but as :class:`~datetime.datetime` object with extended precision. ctime - Optional time field: creation time. As :class:`datetime.datetime` object. + Optional time field: creation time. As :class:`~datetime.datetime` object. atime - Optional time field: last access time. As :class:`datetime.datetime` object. + Optional time field: last access time. As :class:`~datetime.datetime` object. arctime - Optional time field: archival time. As :class:`datetime.datetime` object. + Optional time field: archival time. As :class:`~datetime.datetime` object. (RAR3-only) CRC @@ -557,8 +535,11 @@ class RarInfo(object): blake2sp_hash Blake2SP hash over decompressed data. (RAR5-only) - comment - Optional file comment field. Unicode string. (RAR3-only) + volume + Volume nr, starting from 0. + + volume_file + Volume file name, where file starts. file_redir If not None, file is link of some sort. Contains tuple of (type, flags, target). @@ -567,24 +548,20 @@ class RarInfo(object): Type is one of constants: :data:`RAR5_XREDIR_UNIX_SYMLINK` - unix symlink to target. + Unix symlink. :data:`RAR5_XREDIR_WINDOWS_SYMLINK` - windows symlink to target. + Windows symlink. :data:`RAR5_XREDIR_WINDOWS_JUNCTION` - windows junction. + Windows junction. :data:`RAR5_XREDIR_HARD_LINK` - hard link to target. + Hard link to target. :data:`RAR5_XREDIR_FILE_COPY` - current file is copy of another archive entry. + Current file is copy of another archive entry. - Flags may contain :data:`RAR5_XREDIR_ISDIR` bit. - - volume - Volume nr, starting from 0. - - volume_file - Volume file name, where file starts. + Flags may contain bits: + :data:`RAR5_XREDIR_ISDIR` + Symlink points to directory. """ # zipfile-compatible fields @@ -592,7 +569,6 @@ class RarInfo(object): file_size = None compress_size = None date_time = None - comment = None CRC = None volume = None orig_filename = None @@ -619,11 +595,26 @@ class RarInfo(object): flags = 0 type = None - def isdir(self): + # zipfile compat + def is_dir(self): """Returns True if entry is a directory. + + .. versionadded:: 4.0 + """ + return False + + def is_symlink(self): + """Returns True if entry is a symlink. + + .. versionadded:: 4.0 + """ + return False + + def is_file(self): + """Returns True if entry is a normal file. + + .. versionadded:: 4.0 """ - if self.type == RAR_BLOCK_FILE: - return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY return False def needs_password(self): @@ -633,24 +624,34 @@ class RarInfo(object): return (self.flags & RAR_FILE_PASSWORD) > 0 return False + def isdir(self): + """Returns True if entry is a directory. -class RarFile(object): + .. deprecated:: 4.0 + """ + return self.is_dir() + + +class RarFile: """Parse RAR structure, provide access to files in archive. """ + #: File name, if available. Unicode string or None. + filename = None + #: Archive comment. Unicode string or None. comment = None - def __init__(self, rarfile, mode="r", charset=None, info_callback=None, + def __init__(self, file, mode="r", charset=None, info_callback=None, crc_check=True, errors="stop"): """Open and parse a RAR archive. Parameters: - rarfile - archive file name + file + archive file name or file-like object. mode - only 'r' is supported. + only "r" is supported. charset fallback charset to use, if filenames are not already Unicode-enabled. info_callback @@ -661,10 +662,13 @@ class RarFile(object): Either "stop" to quietly stop parsing on errors, or "strict" to raise errors. Default is "stop". """ - if _have_pathlib and isinstance(rarfile, Path): - self._rarfile = str(rarfile) + if is_filelike(file): + self.filename = getattr(file, "name", None) else: - self._rarfile = rarfile + if isinstance(file, Path): + file = str(file) + self.filename = file + self._rarfile = file self._charset = charset or DEFAULT_CHARSET self._info_callback = info_callback @@ -677,7 +681,7 @@ class RarFile(object): elif errors == "strict": self._strict = True else: - raise ValueError("Invalid value for 'errors' parameter.") + raise ValueError("Invalid value for errors= parameter.") if mode != "r": raise NotImplementedError("RarFile supports only mode=r") @@ -689,13 +693,17 @@ class RarFile(object): return self def __exit__(self, typ, value, traceback): - """Exit context""" + """Exit context.""" self.close() - def setpassword(self, password): + def __iter__(self): + """Iterate over members.""" + return iter(self.infolist()) + + def setpassword(self, pwd): """Sets the password to use when extracting. """ - self._password = password + self._password = pwd if self._file_parser: if self._file_parser.has_header_encryption(): self._file_parser = None @@ -727,12 +735,12 @@ class RarFile(object): """ return self._file_parser.volumelist() - def getinfo(self, fname): + def getinfo(self, name): """Return RarInfo for file. """ - return self._file_parser.getinfo(fname) + return self._file_parser.getinfo(name) - def open(self, fname, mode='r', psw=None): + def open(self, name, mode="r", pwd=None): """Returns file-like object (:class:`RarExtFile`) from where the data can be read. The object implements :class:`io.RawIOBase` interface, so it can @@ -748,56 +756,59 @@ class RarFile(object): Parameters: - fname + name file name or RarInfo instance. mode - must be 'r' - psw + must be "r" + pwd password to use for extracting. """ - if mode != 'r': + if mode != "r": raise NotImplementedError("RarFile.open() supports only mode=r") # entry lookup - inf = self.getinfo(fname) - if inf.isdir(): - raise TypeError("Directory does not have any data: " + inf.filename) + inf = self.getinfo(name) + if inf.is_dir(): + raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename) # check password if inf.needs_password(): - psw = psw or self._password - if psw is None: + pwd = pwd or self._password + if pwd is None: raise PasswordRequired("File %s requires password" % inf.filename) else: - psw = None + pwd = None - return self._file_parser.open(inf, psw) + return self._file_parser.open(inf, pwd) - def read(self, fname, psw=None): + def read(self, name, pwd=None): """Return uncompressed data for archive entry. - For longer files using :meth:`RarFile.open` may be better idea. + For longer files using :meth:`~RarFile.open` may be better idea. Parameters: - fname + name filename or RarInfo instance - psw + pwd password to use for extracting. """ - with self.open(fname, 'r', psw) as f: + with self.open(name, "r", pwd) as f: return f.read() def close(self): """Release open resources.""" pass - def printdir(self): - """Print archive file list to stdout.""" + def printdir(self, file=None): + """Print archive file list to stdout or given file. + """ + if file is None: + file = sys.stdout for f in self.infolist(): - print(f.filename) + print(f.filename, file=file) def extract(self, member, path=None, pwd=None): """Extract single file into current directory. @@ -811,13 +822,8 @@ class RarFile(object): pwd optional password to use """ - if isinstance(member, RarInfo): - fname = member.filename - elif _have_pathlib and isinstance(member, Path): - fname = str(member) - else: - fname = member - self._extract([fname], path, pwd) + inf = self.getinfo(member) + return self._extract_one(inf, path, pwd, True) def extractall(self, path=None, members=None, pwd=None): """Extract all files into current directory. @@ -831,26 +837,30 @@ class RarFile(object): pwd optional password to use """ - fnlist = [] - if members is not None: - for m in members: - if isinstance(m, RarInfo): - fnlist.append(m.filename) - else: - fnlist.append(m) - self._extract(fnlist, path, pwd) + if members is None: + members = self.namelist() - def testrar(self): - """Let 'unrar' test the archive. + done = set() + dirs = [] + for m in members: + inf = self.getinfo(m) + dst = self._extract_one(inf, path, pwd, not inf.is_dir()) + if inf.is_dir(): + if dst not in done: + dirs.append((dst, inf)) + done.add(dst) + if dirs: + dirs.sort(reverse=True) + for dst, inf in dirs: + self._set_attrs(inf, dst) + + def testrar(self, pwd=None): + """Read all files and test CRC. """ - cmd = [UNRAR_TOOL] + list(TEST_ARGS) - add_password_arg(cmd, self._password) - cmd.append('--') - with XTempFile(self._rarfile) as rarfile: - cmd.append(rarfile) - p = custom_popen(cmd) - output = p.communicate()[0] - check_returncode(p, output) + for member in self.infolist(): + if member.is_file(): + with self.open(member, 'r', pwd) as f: + empty_read(f, member.file_size, BSIZE) def strerror(self): """Return error string if parsing failed or None if no problems. @@ -864,56 +874,106 @@ class RarFile(object): ## def _parse(self): - ver = _get_rar_version(self._rarfile) - if ver == 3: + """Run parser for file type + """ + ver, sfx_ofs = _find_sfx_header(self._rarfile) + if ver == RAR_V3: p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, - self._charset, self._strict, self._info_callback) + self._charset, self._strict, self._info_callback, + sfx_ofs) self._file_parser = p3 # noqa - elif ver == 5: + elif ver == RAR_V5: p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, - self._charset, self._strict, self._info_callback) + self._charset, self._strict, self._info_callback, + sfx_ofs) self._file_parser = p5 # noqa else: - raise BadRarFile("Not a RAR file") + raise NotRarFile("Not a RAR file") self._file_parser.parse() self.comment = self._file_parser.comment - # call unrar to extract a file - def _extract(self, fnlist, path=None, psw=None): - cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) + def _extract_one(self, info, path, pwd, set_attrs): + fname = sanitize_filename( + info.filename, os.path.sep, WIN32 + ) - # pasoword - psw = psw or self._password - add_password_arg(cmd, psw) - cmd.append('--') + if path is None: + path = os.getcwd() + else: + path = os.fspath(path) + dstfn = os.path.join(path, fname) - # rar file - with XTempFile(self._rarfile) as rarfn: - cmd.append(rarfn) + dirname = os.path.dirname(dstfn) + if dirname and dirname != ".": + os.makedirs(dirname, exist_ok=True) - # file list - for fn in fnlist: - if os.sep != PATH_SEP: - fn = fn.replace(PATH_SEP, os.sep) - cmd.append(fn) + if info.is_file(): + return self._make_file(info, dstfn, pwd, set_attrs) + if info.is_dir(): + return self._make_dir(info, dstfn, pwd, set_attrs) + if info.is_symlink(): + return self._make_symlink(info, dstfn, pwd, set_attrs) + return None - # destination path - if path is not None: - if _have_pathlib and isinstance(path, Path): - path = str(path) - cmd.append(path + os.sep) + def _create_helper(self, name, flags, info): + return os.open(name, flags) + + def _make_file(self, info, dstfn, pwd, set_attrs): + def helper(name, flags): + return self._create_helper(name, flags, info) + with self.open(info, "r", pwd) as src: + with open(dstfn, "wb", opener=helper) as dst: + shutil.copyfileobj(src, dst) + if set_attrs: + self._set_attrs(info, dstfn) + return dstfn + + def _make_dir(self, info, dstfn, pwd, set_attrs): + os.makedirs(dstfn, exist_ok=True) + if set_attrs: + self._set_attrs(info, dstfn) + return dstfn + + def _make_symlink(self, info, dstfn, pwd, set_attrs): + target_is_directory = False + if info.host_os == RAR_OS_UNIX: + link_name = self.read(info, pwd) + target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + elif info.file_redir: + redir_type, redir_flags, link_name = info.file_redir + if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION: + warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning) + return None + target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0 + else: + warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning) + return None + + os.symlink(link_name, dstfn, target_is_directory=target_is_directory) + return dstfn + + def _set_attrs(self, info, dstfn): + if info.host_os == RAR_OS_UNIX: + os.chmod(dstfn, info.mode & 0o777) + elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS): + # only keep R/O attr, except for dirs on win32 + if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32): + st = os.stat(dstfn) + new_mode = st.st_mode & ~0o222 + os.chmod(dstfn, new_mode) + + if info.mtime: + mtime_ns = to_nsecs(info.mtime) + atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns + os.utime(dstfn, ns=(atime_ns, mtime_ns)) - # call - p = custom_popen(cmd) - output = p.communicate()[0] - check_returncode(p, output) # # File format parsing # -class CommonParser(object): +class CommonParser: """Shared parser parts.""" _main = None _hdrenc_main = None @@ -924,7 +984,7 @@ class CommonParser(object): _password = None comment = None - def __init__(self, rarfile, password, crc_check, charset, strict, info_cb): + def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset): self._rarfile = rarfile self._password = password self._crc_check = crc_check @@ -934,6 +994,7 @@ class CommonParser(object): self._info_list = [] self._info_map = {} self._vol_list = [] + self._sfx_offset = sfx_offset def has_header_encryption(self): """Returns True if headers are encrypted @@ -945,9 +1006,9 @@ class CommonParser(object): return True return False - def setpassword(self, psw): + def setpassword(self, pwd): """Set cached password.""" - self._password = psw + self._password = pwd def volumelist(self): """Volume files""" @@ -971,26 +1032,19 @@ class CommonParser(object): """ if isinstance(member, RarInfo): fname = member.filename - elif _have_pathlib and isinstance(member, Path): + elif isinstance(member, Path): fname = str(member) else: fname = member - # accept both ways here - if PATH_SEP == '/': - fname2 = fname.replace("\\", "/") - else: - fname2 = fname.replace("/", "\\") + if fname.endswith("/"): + fname = fname.rstrip("/") try: return self._info_map[fname] except KeyError: - try: - return self._info_map[fname2] - except KeyError: - raise NoRarEntry("No such file: %s" % fname) + raise NoRarEntry("No such file: %s" % fname) - # read rar def parse(self): """Process file.""" self._fd = None @@ -1002,12 +1056,13 @@ class CommonParser(object): self._fd = None def _parse_real(self): + """Actually read file. + """ fd = XFile(self._rarfile) self._fd = fd + fd.seek(self._sfx_offset, 0) sig = fd.read(len(self._expect_sig)) if sig != self._expect_sig: - if isinstance(self._rarfile, (str, unicode)): - raise NotRarFile("Not a Rar archive: {}".format(self._rarfile)) raise NotRarFile("Not a Rar archive") volume = 0 # first vol (.rar) is 0 @@ -1015,12 +1070,16 @@ class CommonParser(object): endarc = False volfile = self._rarfile self._vol_list = [self._rarfile] - while 1: + raise_need_first_vol = False + while True: if endarc: - h = None # don't read past ENDARC + h = None # don"t read past ENDARC else: h = self._parse_header(fd) if not h: + if raise_need_first_vol: + # did not find ENDARC with VOLNR + raise NeedFirstVolume("Need to start from first volume", None) if more_vols: volume += 1 fd.close() @@ -1038,6 +1097,7 @@ class CommonParser(object): more_vols = False endarc = False self._vol_list.append(volfile) + self._main = None continue break h.volume = volume @@ -1045,17 +1105,19 @@ class CommonParser(object): if h.type == RAR_BLOCK_MAIN and not self._main: self._main = h - if h.flags & RAR_MAIN_NEWNUMBERING: + if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING): # RAR 2.x does not set FIRSTVOLUME, # so check it only if NEWNUMBERING is used if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: - if getattr(h, 'main_volume_number', None) is not None: + if getattr(h, "main_volume_number", None) is not None: # rar5 may have more info raise NeedFirstVolume( "Need to start from first volume (current: %r)" - % (h.main_volume_number,) + % (h.main_volume_number,), + h.main_volume_number ) - raise NeedFirstVolume("Need to start from first volume") + # delay raise until we have volnr from ENDARC + raise_need_first_vol = True if h.flags & RAR_MAIN_PASSWORD: self._needs_password = True if not self._password: @@ -1063,13 +1125,19 @@ class CommonParser(object): elif h.type == RAR_BLOCK_ENDARC: more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0 endarc = True + if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0: + raise NeedFirstVolume( + "Need to start from first volume (current: %r)" + % (h.endarc_volnr,), + h.endarc_volnr + ) elif h.type == RAR_BLOCK_FILE: # RAR 2.x does not write RAR_BLOCK_ENDARC if h.flags & RAR_FILE_SPLIT_AFTER: more_vols = True # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: - raise NeedFirstVolume("Need to start from first volume") + raise_need_first_vol = True if h.needs_password(): self._needs_password = True @@ -1089,16 +1157,17 @@ class CommonParser(object): raise NotImplementedError() def _decrypt_header(self, fd): - raise NotImplementedError('_decrypt_header') + raise NotImplementedError("_decrypt_header") def _parse_block_header(self, fd): - raise NotImplementedError('_parse_block_header') + raise NotImplementedError("_parse_block_header") - def _open_hack(self, inf, psw): - raise NotImplementedError('_open_hack') + def _open_hack(self, inf, pwd): + raise NotImplementedError("_open_hack") - # read single header def _parse_header(self, fd): + """Read single header + """ try: # handle encrypted headers if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main: @@ -1109,11 +1178,12 @@ class CommonParser(object): # now read actual header return self._parse_block_header(fd) except struct.error: - self._set_error('Broken header in RAR file') + self._set_error("Broken header in RAR file") return None - # given current vol name, construct next one def _next_volname(self, volfile): + """Given current vol name, construct next one + """ if is_filelike(volfile): raise IOError("Working on single FD") if self._main.flags & RAR_MAIN_NEWNUMBERING: @@ -1127,18 +1197,23 @@ class CommonParser(object): if self._strict: raise BadRarFile(msg) - def open(self, inf, psw): + def open(self, inf, pwd): """Return stream object for file data.""" if inf.file_redir: + redir_type, redir_flags, redir_name = inf.file_redir # cannot leave to unrar as it expects copied file to exist - if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): - inf = self.getinfo(inf.file_redir[2]) + if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): + inf = self.getinfo(redir_name) if not inf: - raise BadRarFile('cannot find copied file') - + raise BadRarFile("cannot find copied file") + elif redir_type in ( + RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK, + RAR5_XREDIR_WINDOWS_JUNCTION, + ): + return io.BytesIO(redir_name.encode("utf8")) if inf.flags & RAR_FILE_SPLIT_BEFORE: - raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None) # is temp write usable? use_hack = 1 @@ -1159,22 +1234,22 @@ class CommonParser(object): if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None: return self._open_clear(inf) elif use_hack: - return self._open_hack(inf, psw) + return self._open_hack(inf, pwd) elif is_filelike(self._rarfile): - return self._open_unrar_membuf(self._rarfile, inf, psw) + return self._open_unrar_membuf(self._rarfile, inf, pwd) else: - return self._open_unrar(self._rarfile, inf, psw) + return self._open_unrar(self._rarfile, inf, pwd) def _open_clear(self, inf): return DirectReader(self, inf) - def _open_hack_core(self, inf, psw, prefix, suffix): + def _open_hack_core(self, inf, pwd, prefix, suffix): size = inf.compress_size + inf.header_size rf = XFile(inf.volume_file, 0) rf.seek(inf.header_offset) - tmpfd, tmpname = mkstemp(suffix='.rar') + tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR) tmpf = os.fdopen(tmpfd, "wb") try: @@ -1185,42 +1260,41 @@ class CommonParser(object): else: buf = rf.read(size) if not buf: - raise BadRarFile('read failed: ' + inf.filename) + raise BadRarFile("read failed: " + inf.filename) tmpf.write(buf) size -= len(buf) tmpf.write(suffix) tmpf.close() rf.close() - except: + except BaseException: rf.close() tmpf.close() os.unlink(tmpname) raise - return self._open_unrar(tmpname, inf, psw, tmpname) + return self._open_unrar(tmpname, inf, pwd, tmpname) - # write in-memory archive to temp file - needed for solid archives - def _open_unrar_membuf(self, memfile, inf, psw): + def _open_unrar_membuf(self, memfile, inf, pwd): + """Write in-memory archive to temp file, needed for solid archives. + """ tmpname = membuf_tempfile(memfile) - return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True) + return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True) - # extract using unrar - def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False): - cmd = [UNRAR_TOOL] + list(OPEN_ARGS) - add_password_arg(cmd, psw) - cmd.append("--") - cmd.append(rarfile) + def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False): + """Extract using unrar + """ + setup = tool_setup() # not giving filename avoids encoding related problems + fn = None if not tmpfile or force_file: fn = inf.filename - if PATH_SEP != os.sep: - fn = fn.replace(PATH_SEP, os.sep) - cmd.append(fn) # read from unrar pipe + cmd = setup.open_cmdline(pwd, rarfile, fn) return PipeReader(self, inf, cmd, tmpfile) + # # RAR3 format # @@ -1236,11 +1310,15 @@ class Rar3Info(RarInfo): data_offset = None _md_class = None _md_expect = None + _name_size = None # make sure some rar5 fields are always present file_redir = None blake2sp_hash = None + endarc_datacrc = None + endarc_volnr = None + def _must_disable_hack(self): if self.type == RAR_BLOCK_FILE: if self.flags & RAR_FILE_PASSWORD: @@ -1252,6 +1330,27 @@ class Rar3Info(RarInfo): return True return False + def is_dir(self): + """Returns True if entry is a directory.""" + if self.type == RAR_BLOCK_FILE and not self.is_symlink(): + return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + return False + + def is_symlink(self): + """Returns True if entry is a symlink.""" + return ( + self.type == RAR_BLOCK_FILE and + self.host_os == RAR_OS_UNIX and + self.mode & 0xF000 == 0xA000 + ) + + def is_file(self): + """Returns True if entry is a normal file.""" + return ( + self.type == RAR_BLOCK_FILE and + not (self.is_dir() or self.is_symlink()) + ) + class RAR3Parser(CommonParser): """Parse RAR3 file format. @@ -1261,7 +1360,7 @@ class RAR3Parser(CommonParser): def _decrypt_header(self, fd): if not _have_crypto: - raise NoCrypto('Cannot parse encrypted headers - no crypto') + raise NoCrypto("Cannot parse encrypted headers - no crypto") salt = fd.read(8) if self._last_aes_key[0] == salt: key, iv = self._last_aes_key[1:] @@ -1270,8 +1369,9 @@ class RAR3Parser(CommonParser): self._last_aes_key = (salt, key, iv) return HeaderDecrypt(fd, key, iv) - # common header def _parse_block_header(self, fd): + """Parse common block header + """ h = Rar3Info() h.header_offset = fd.tell() @@ -1291,7 +1391,7 @@ class RAR3Parser(CommonParser): # unexpected EOF? if len(hdata) != h.header_size: - self._set_error('Unexpected EOF when reading header') + self._set_error("Unexpected EOF when reading header") return None pos = S_BLK_HDR.size @@ -1326,6 +1426,13 @@ class RAR3Parser(CommonParser): elif h.type == RAR_BLOCK_OLD_EXTRA: pos += 7 crc_pos = pos + elif h.type == RAR_BLOCK_ENDARC: + if h.flags & RAR_ENDARC_DATACRC: + h.endarc_datacrc, pos = load_le32(hdata, pos) + if h.flags & RAR_ENDARC_VOLNR: + h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0] + pos += 2 + crc_pos = h.header_size else: crc_pos = h.header_size @@ -1335,21 +1442,22 @@ class RAR3Parser(CommonParser): else: crcdat = hdata[2:crc_pos] - calc_crc = rar_crc32(crcdat) & 0xFFFF + calc_crc = crc32(crcdat) & 0xFFFF # return good header if h.header_crc == calc_crc: return h # header parsing failed. - self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)', + self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)", h.type, h.header_crc, calc_crc, len(crcdat)) # instead panicing, send eof return None - # read file-specific header def _parse_file_header(self, h, hdata, pos): + """Read file-specific header + """ fld = S_FILE_HDR.unpack_from(hdata, pos) pos += S_FILE_HDR.size @@ -1361,7 +1469,7 @@ class RAR3Parser(CommonParser): h.mtime = to_datetime(h.date_time) h.extract_version = fld[5] h.compress_type = fld[6] - name_size = fld[7] + h._name_size = name_size = fld[7] h.mode = fld[8] h._md_class = CRC32Context @@ -1375,8 +1483,9 @@ class RAR3Parser(CommonParser): h.add_size = h.compress_size name, pos = load_bytes(hdata, name_size, pos) - if h.flags & RAR_FILE_UNICODE: - nul = name.find(ZERO) + if h.flags & RAR_FILE_UNICODE and b"\0" in name: + # stored in custom encoding + nul = name.find(b"\0") h.orig_filename = name[:nul] u = UnicodeFilename(h.orig_filename, name[nul + 1:]) h.filename = u.decode() @@ -1384,13 +1493,19 @@ class RAR3Parser(CommonParser): # if parsing failed fall back to simple name if u.failed: h.filename = self._decode(h.orig_filename) + elif h.flags & RAR_FILE_UNICODE: + # stored in UTF8 + h.orig_filename = name + h.filename = name.decode("utf8", "replace") else: + # stored in random encoding h.orig_filename = name h.filename = self._decode(name) - # change separator, if requested - if PATH_SEP != '\\': - h.filename = h.filename.replace('\\', PATH_SEP) + # change separator, set dir suffix + h.filename = h.filename.replace("\\", "/").rstrip("/") + if h.is_dir(): + h.filename = h.filename + "/" if h.flags & RAR_FILE_SALT: h.salt, pos = load_bytes(hdata, 8, pos) @@ -1405,8 +1520,9 @@ class RAR3Parser(CommonParser): return pos - # find old-style comment subblock def _parse_subblocks(self, h, hdata, pos): + """Find old-style comment subblock + """ while pos < len(hdata): # ordinary block header t = S_BLK_HDR.unpack_from(hdata, pos) @@ -1422,18 +1538,16 @@ class RAR3Parser(CommonParser): if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) pos += S_COMMENT_HDR.size - data = hdata[pos : pos_next] + data = hdata[pos: pos_next] cmt = rar3_decompress(ver, meth, data, declen, sflags, crc, self._password) - if not self._crc_check: - h.comment = self._decode_comment(cmt) - elif rar_crc32(cmt) & 0xFFFF == crc: + if not self._crc_check or (crc32(cmt) & 0xFFFF == crc): h.comment = self._decode_comment(cmt) pos = pos_next return pos - def _read_comment_v3(self, inf, psw=None): + def _read_comment_v3(self, inf, pwd=None): # read data with XFile(inf.volume_file) as rf: @@ -1442,11 +1556,11 @@ class RAR3Parser(CommonParser): # decompress cmt = rar3_decompress(inf.extract_version, inf.compress_type, data, - inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + inf.file_size, inf.flags, inf.CRC, pwd, inf.salt) # check crc if self._crc_check: - crc = rar_crc32(cmt) + crc = crc32(cmt) if crc != inf.CRC: return None @@ -1458,7 +1572,7 @@ class RAR3Parser(CommonParser): return val.decode(c) except UnicodeError: pass - return val.decode(self._charset, 'replace') + return val.decode(self._charset, "replace") def _decode_comment(self, val): return self._decode(val) @@ -1466,8 +1580,10 @@ class RAR3Parser(CommonParser): def process_entry(self, fd, item): if item.type == RAR_BLOCK_FILE: # use only first part - if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: - self._info_map[item.filename] = item + if item.flags & RAR_FILE_VERSION: + pass # skip old versions + elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename.rstrip("/")] = item self._info_list.append(item) elif len(self._info_list) > 0: # final crc is in last block @@ -1477,7 +1593,7 @@ class RAR3Parser(CommonParser): old.compress_size += item.compress_size # parse new-style comment - if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': + if item.type == RAR_BLOCK_SUB and item.filename == "CMT": if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): pass elif item.flags & RAR_FILE_SOLID: @@ -1499,10 +1615,11 @@ class RAR3Parser(CommonParser): # put file compressed data into temporary .rar archive, and run # unrar on that, thus avoiding unrar going over whole archive - def _open_hack(self, inf, psw): + def _open_hack(self, inf, pwd): # create main header: crc, type, flags, size, res1, res2 - prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4) - return self._open_hack_core(inf, psw, prefix, EMPTY) + prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4) + return self._open_hack_core(inf, pwd, prefix, b"") + # # RAR5 format @@ -1537,7 +1654,7 @@ class Rar5BaseFile(Rar5Info): """ type = -1 file_flags = None - file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY) + file_encryption = (0, 0, 0, b"", b"", b"") file_compress_flags = None file_redir = None file_owner = None @@ -1561,6 +1678,29 @@ class Rar5FileInfo(Rar5BaseFile): """ type = RAR_BLOCK_FILE + def is_symlink(self): + """Returns True if entry is a symlink.""" + # pylint: disable=unsubscriptable-object + return ( + self.file_redir is not None and + self.file_redir[0] in ( + RAR5_XREDIR_UNIX_SYMLINK, + RAR5_XREDIR_WINDOWS_SYMLINK, + RAR5_XREDIR_WINDOWS_JUNCTION, + ) + ) + + def is_file(self): + """Returns True if entry is a normal file.""" + return not (self.is_dir() or self.is_symlink()) + + def is_dir(self): + """Returns True if entry is a directory.""" + if not self.file_redir: + if self.file_flags & RAR5_FILE_FLAG_ISDIR: + return True + return False + class Rar5ServiceInfo(Rar5BaseFile): """RAR5 service record. @@ -1615,24 +1755,25 @@ class RAR5Parser(CommonParser): if self._last_aes256_key[:2] == (kdf_count, salt): return self._last_aes256_key[2] if kdf_count > 24: - raise BadRarFile('Too large kdf_count') - psw = self._password - if isinstance(psw, unicode): - psw = psw.encode('utf8') - key = pbkdf2_sha256(psw, salt, 1 << kdf_count) + raise BadRarFile("Too large kdf_count") + pwd = self._password + if isinstance(pwd, str): + pwd = pwd.encode("utf8") + key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count) self._last_aes256_key = (kdf_count, salt, key) return key def _decrypt_header(self, fd): if not _have_crypto: - raise NoCrypto('Cannot parse encrypted headers - no crypto') + raise NoCrypto("Cannot parse encrypted headers - no crypto") h = self._hdrenc_main key = self._gen_key(h.encryption_kdf_count, h.encryption_salt) iv = fd.read(16) return HeaderDecrypt(fd, key, iv) - # common header def _parse_block_header(self, fd): + """Parse common block header + """ header_offset = fd.tell() preload = 4 + 3 @@ -1646,14 +1787,14 @@ class RAR5Parser(CommonParser): # read full header, check for EOF hdata = start_bytes + fd.read(header_size - len(start_bytes)) if len(hdata) != header_size: - self._set_error('Unexpected EOF when reading header') + self._set_error("Unexpected EOF when reading header") return None data_offset = fd.tell() - calc_crc = rar_crc32(memoryview(hdata)[4:]) + calc_crc = crc32(memoryview(hdata)[4:]) if header_crc != calc_crc: # header parsing failed. - self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)', + self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)", header_crc, calc_crc, len(hdata)) return None @@ -1736,7 +1877,7 @@ class RAR5Parser(CommonParser): h.file_compress_flags, pos = load_vint(hdata, pos) h.file_host_os, pos = load_vint(hdata, pos) h.orig_filename, pos = load_vstr(hdata, pos) - h.filename = h.orig_filename.decode('utf8', 'replace') + h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/") # use compatible values if h.file_host_os == RAR5_OS_WINDOWS: @@ -1761,6 +1902,8 @@ class RAR5Parser(CommonParser): if h.file_compress_flags & RAR5_COMPR_SOLID: h.flags |= RAR_FILE_SOLID + if h.is_dir(): + h.filename = h.filename + "/" return h def _parse_endarc_block(self, h, hdata, pos): @@ -1777,11 +1920,10 @@ class RAR5Parser(CommonParser): if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: h.encryption_check_value = load_bytes(hdata, 12, pos) if h.encryption_algo != RAR5_XENC_CIPHER_AES256: - raise BadRarFile('Unsupported header encryption cipher') + raise BadRarFile("Unsupported header encryption cipher") self._hdrenc_main = h return h - # file extra record def _process_file_extra(self, h, xdata): xtype, pos = load_vint(xdata, 0) if xtype == RAR5_XFILE_TIME: @@ -1804,9 +1946,11 @@ class RAR5Parser(CommonParser): # extra block for file time record def _parse_file_xtime(self, h, xdata, pos): tflags, pos = load_vint(xdata, pos) + ldr = load_windowstime if tflags & RAR5_XTIME_UNIXTIME: ldr = load_unixtime + if tflags & RAR5_XTIME_HAS_MTIME: h.mtime, pos = ldr(xdata, pos) h.date_time = h.mtime.timetuple()[:6] @@ -1815,6 +1959,17 @@ class RAR5Parser(CommonParser): if tflags & RAR5_XTIME_HAS_ATIME: h.atime, pos = ldr(xdata, pos) + if tflags & RAR5_XTIME_UNIXTIME_NS: + if tflags & RAR5_XTIME_HAS_MTIME: + nsec, pos = load_le32(xdata, pos) + h.mtime = to_nsdatetime(h.mtime, nsec) + if tflags & RAR5_XTIME_HAS_CTIME: + nsec, pos = load_le32(xdata, pos) + h.ctime = to_nsdatetime(h.ctime, nsec) + if tflags & RAR5_XTIME_HAS_ATIME: + nsec, pos = load_le32(xdata, pos) + h.atime = to_nsdatetime(h.atime, nsec) + # just remember encryption info def _parse_file_encryption(self, h, xdata, pos): algo, pos = load_vint(xdata, pos) @@ -1836,7 +1991,7 @@ class RAR5Parser(CommonParser): hash_type, pos = load_vint(xdata, pos) if hash_type == RAR5_XHASH_BLAKE2SP: h.blake2sp_hash, pos = load_bytes(xdata, 32, pos) - if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: + if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: h._md_class = Blake2SP h._md_expect = h.blake2sp_hash @@ -1849,7 +2004,7 @@ class RAR5Parser(CommonParser): redir_type, pos = load_vint(xdata, pos) redir_flags, pos = load_vint(xdata, pos) redir_name, pos = load_vstr(xdata, pos) - redir_name = redir_name.decode('utf8', 'replace') + redir_name = redir_name.decode("utf8", "replace") h.file_redir = (redir_type, redir_flags, redir_name) def _parse_file_owner(self, h, xdata, pos): @@ -1869,9 +2024,11 @@ class RAR5Parser(CommonParser): def process_entry(self, fd, item): if item.block_type == RAR5_BLOCK_FILE: - # use only first part - if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: - self._info_map[item.filename] = item + if item.file_version: + pass # skip old versions + elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: + # use only first part + self._info_map[item.filename.rstrip("/")] = item self._info_list.append(item) elif len(self._info_list) > 0: # final crc is in last block @@ -1881,7 +2038,7 @@ class RAR5Parser(CommonParser): old.blake2sp_hash = item.blake2sp_hash old.compress_size += item.compress_size elif item.block_type == RAR5_BLOCK_SERVICE: - if item.filename == 'CMT': + if item.filename == "CMT": self._load_comment(fd, item) def _load_comment(self, fd, item): @@ -1903,23 +2060,24 @@ class RAR5Parser(CommonParser): cmt = cmtstream.read() # rar bug? - appends zero to comment - cmt = cmt.split(ZERO, 1)[0] - self.comment = cmt.decode('utf8') + cmt = cmt.split(b"\0", 1)[0] + self.comment = cmt.decode("utf8") return None - def _open_hack(self, inf, psw): + def _open_hack(self, inf, pwd): # len, type, blk_flags, flags - main_hdr = b'\x03\x01\x00\x00' - endarc_hdr = b'\x03\x05\x00\x00' - main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr - endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr - return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr) + main_hdr = b"\x03\x01\x00\x00" + endarc_hdr = b"\x03\x05\x00\x00" + main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr + endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr + return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr) + ## ## Utility classes ## -class UnicodeFilename(object): +class UnicodeFilename: """Handle RAR3 unicode filename decompression. """ def __init__(self, name, encdata): @@ -1945,7 +2103,7 @@ class UnicodeFilename(object): return self.std_name[self.pos] except IndexError: self.failed = 1 - return ord('?') + return ord("?") def put(self, lo, hi): """Copy 16-bit value to result.""" @@ -1982,7 +2140,7 @@ class UnicodeFilename(object): return self.buf.decode("utf-16le", "replace") -class RarExtFile(RawIOBase): +class RarExtFile(io.RawIOBase): """Base class for file-like object that :meth:`RarFile.open` returns. Provides public methods and common crc checking. @@ -1991,30 +2149,20 @@ class RarExtFile(RawIOBase): - no short reads - .read() and .readinfo() read as much as requested. - no internal buffer, use io.BufferedReader for that. """ + name = None #: Filename of the archive entry + mode = "rb" + _parser = None + _inf = None + _fd = None + _remain = 0 + _returncode = 0 + _md_context = None - #: Filename of the archive entry - name = None - - def __init__(self, parser, inf): - """Open archive entry. - """ - super(RarExtFile, self).__init__() - - # standard io.* properties + def _open_extfile(self, parser, inf): self.name = inf.filename - self.mode = 'rb' - self._parser = parser self._inf = inf - self._fd = None - self._remain = 0 - self._returncode = 0 - self._md_context = None - - self._open() - - def _open(self): if self._fd: self._fd.close() md_class = self._inf._md_class or NoHashContext @@ -2022,24 +2170,31 @@ class RarExtFile(RawIOBase): self._fd = None self._remain = self._inf.file_size - def read(self, cnt=None): + def read(self, n=-1): """Read all or specified amount of data from archive entry.""" - # sanitize cnt - if cnt is None or cnt < 0: - cnt = self._remain - elif cnt > self._remain: - cnt = self._remain - if cnt == 0: - return EMPTY + # sanitize count + if n is None or n < 0: + n = self._remain + elif n > self._remain: + n = self._remain + if n == 0: + return b"" - # actual read - data = self._read(cnt) - if data: + buf = [] + orig = n + while n > 0: + # actual read + data = self._read(n) + if not data: + break + buf.append(data) self._md_context.update(data) self._remain -= len(data) - if len(data) != cnt: - raise BadRarFile("Failed the read enough data") + n -= len(data) + data = b"".join(buf) + if n > 0: + raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data))) # done? if not data or self._remain == 0: @@ -2056,7 +2211,7 @@ class RarExtFile(RawIOBase): if final is None: return if self._returncode: - check_returncode(self, '') + check_returncode(self._returncode, "", tool_setup().get_errmap()) if self._remain != 0: raise BadRarFile("Failed the read enough data") if final != exp: @@ -2070,7 +2225,7 @@ class RarExtFile(RawIOBase): def close(self): """Close open resources.""" - super(RarExtFile, self).close() + super().close() if self._fd: self._fd.close() @@ -2085,13 +2240,13 @@ class RarExtFile(RawIOBase): Returns bytes read. """ - raise NotImplementedError('readinto') + raise NotImplementedError("readinto") def tell(self): """Return current reading position in uncompressed data.""" return self._inf.file_size - self._remain - def seek(self, ofs, whence=0): + def seek(self, offset, whence=0): """Seek in data. On uncompressed files, the seeking works by actual @@ -2107,13 +2262,13 @@ class RarExtFile(RawIOBase): cur_ofs = self.tell() if whence == 0: # seek from beginning of file - new_ofs = ofs + new_ofs = offset elif whence == 1: # seek from current position - new_ofs = cur_ofs + ofs + new_ofs = cur_ofs + offset elif whence == 2: # seek from end of file - new_ofs = fsize + ofs + new_ofs = fsize + offset else: - raise ValueError('Invalid value for whence') + raise ValueError("Invalid value for whence") # sanity check if new_ofs < 0: @@ -2126,20 +2281,13 @@ class RarExtFile(RawIOBase): self._skip(new_ofs - cur_ofs) else: # reopen and seek - self._open() + self._open_extfile(self._parser, self._inf) self._skip(new_ofs) return self.tell() def _skip(self, cnt): """Read and discard data""" - while cnt > 0: - if cnt > 8192: - buf = self.read(8192) - else: - buf = self.read(cnt) - if not buf: - break - cnt -= len(buf) + empty_read(self, cnt, BSIZE) def readable(self): """Returns True""" @@ -2168,27 +2316,25 @@ class RarExtFile(RawIOBase): class PipeReader(RarExtFile): """Read data from pipe, handle tempfile cleanup.""" - def __init__(self, rf, inf, cmd, tempfile=None): + def __init__(self, parser, inf, cmd, tempfile=None): + super().__init__() self._cmd = cmd self._proc = None self._tempfile = tempfile - super(PipeReader, self).__init__(rf, inf) + self._open_extfile(parser, inf) def _close_proc(self): if not self._proc: return - if self._proc.stdout: - self._proc.stdout.close() - if self._proc.stdin: - self._proc.stdin.close() - if self._proc.stderr: - self._proc.stderr.close() + for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin): + if f: + f.close() self._proc.wait() self._returncode = self._proc.returncode self._proc = None - def _open(self): - super(PipeReader, self)._open() + def _open_extfile(self, parser, inf): + super()._open_extfile(parser, inf) # stop old process self._close_proc() @@ -2198,10 +2344,6 @@ class PipeReader(RarExtFile): self._proc = custom_popen(self._cmd) self._fd = self._proc.stdout - # avoid situation where unrar waits on stdin - if self._proc.stdin: - self._proc.stdin.close() - def _read(self, cnt): """Read from pipe.""" @@ -2219,13 +2361,13 @@ class PipeReader(RarExtFile): break cnt -= len(data) buf.append(data) - return EMPTY.join(buf) + return b"".join(buf) def close(self): """Close open resources.""" self._close_proc() - super(PipeReader, self).close() + super().close() if self._tempfile: try: @@ -2242,10 +2384,10 @@ class PipeReader(RarExtFile): vbuf = memoryview(buf) res = got = 0 while got < cnt: - res = self._fd.readinto(vbuf[got : cnt]) + res = self._fd.readinto(vbuf[got: cnt]) if not res: break - self._md_context.update(vbuf[got : got + res]) + self._md_context.update(vbuf[got: got + res]) self._remain -= res got += res return got @@ -2258,8 +2400,12 @@ class DirectReader(RarExtFile): _cur_avail = None _volfile = None - def _open(self): - super(DirectReader, self)._open() + def __init__(self, parser, inf): + super().__init__() + self._open_extfile(parser, inf) + + def _open_extfile(self, parser, inf): + super()._open_extfile(parser, inf) self._volfile = self._inf.volume_file self._fd = XFile(self._volfile, 0) @@ -2313,7 +2459,7 @@ class DirectReader(RarExtFile): if len(buf) == 1: return buf[0] - return EMPTY.join(buf) + return b"".join(buf) def _open_next(self): """Proceed to next volume.""" @@ -2335,7 +2481,7 @@ class DirectReader(RarExtFile): raise BadRarFile("Invalid signature") # loop until first file header - while 1: + while True: cur = self._parser._parse_header(fd) if not cur: raise BadRarFile("Unexpected EOF") @@ -2365,22 +2511,22 @@ class DirectReader(RarExtFile): cnt = self._cur_avail # read into temp view - res = self._fd.readinto(vbuf[got : got + cnt]) + res = self._fd.readinto(vbuf[got: got + cnt]) if not res: break - self._md_context.update(vbuf[got : got + res]) + self._md_context.update(vbuf[got: got + res]) self._cur_avail -= res self._remain -= res got += res return got -class HeaderDecrypt(object): +class HeaderDecrypt: """File-like object that decrypts from another file""" def __init__(self, f, key, iv): self.f = f self.ciph = AES_CBC_Decrypt(key, iv) - self.buf = EMPTY + self.buf = b"" def tell(self): """Current file pos - works only on block boundaries.""" @@ -2389,7 +2535,7 @@ class HeaderDecrypt(object): def read(self, cnt=None): """Read and decrypt.""" if cnt > 8 * 1024: - raise BadRarFile('Bad count to header decrypt - wrong password?') + raise BadRarFile("Bad count to header decrypt - wrong password?") # consume old data if cnt <= len(self.buf): @@ -2397,7 +2543,7 @@ class HeaderDecrypt(object): self.buf = self.buf[cnt:] return res res = self.buf - self.buf = EMPTY + self.buf = b"" cnt -= len(res) # decrypt new data @@ -2418,11 +2564,10 @@ class HeaderDecrypt(object): return res -# handle (filename|filelike) object -class XFile(object): +class XFile: """Input may be filename or file object. """ - __slots__ = ('_fd', '_need_close') + __slots__ = ("_fd", "_need_close") def __init__(self, xfile, bufsize=1024): if is_filelike(xfile): @@ -2431,7 +2576,7 @@ class XFile(object): self._fd.seek(0) else: self._need_close = True - self._fd = open(xfile, 'rb', bufsize) + self._fd = open(xfile, "rb", bufsize) def read(self, n=None): """Read from file.""" @@ -2445,9 +2590,9 @@ class XFile(object): """Move file pos.""" return self._fd.seek(ofs, whence) - def readinto(self, dst): + def readinto(self, buf): """Read into buffer.""" - return self._fd.readinto(dst) + return self._fd.readinto(buf) def close(self): """Close file object.""" @@ -2461,7 +2606,7 @@ class XFile(object): self.close() -class NoHashContext(object): +class NoHashContext: """No-op hash function.""" def __init__(self, data=None): """Initialize""" @@ -2473,9 +2618,9 @@ class NoHashContext(object): """Hexadecimal digest.""" -class CRC32Context(object): +class CRC32Context: """Hash context that uses CRC32.""" - __slots__ = ['_crc'] + __slots__ = ["_crc"] def __init__(self, data=None): self._crc = 0 @@ -2484,7 +2629,7 @@ class CRC32Context(object): def update(self, data): """Process data.""" - self._crc = rar_crc32(data, self._crc) + self._crc = crc32(data, self._crc) def digest(self): """Final hash.""" @@ -2492,19 +2637,19 @@ class CRC32Context(object): def hexdigest(self): """Hexadecimal digest.""" - return '%08x' % self.digest() + return "%08x" % self.digest() -class Blake2SP(object): +class Blake2SP: """Blake2sp hash context. """ - __slots__ = ['_thread', '_buf', '_cur', '_digest'] + __slots__ = ["_thread", "_buf", "_cur", "_digest"] digest_size = 32 block_size = 64 parallelism = 8 def __init__(self, data=None): - self._buf = b'' + self._buf = b"" self._cur = 0 self._digest = None self._thread = [] @@ -2547,7 +2692,7 @@ class Blake2SP(object): if self._digest is None: if self._buf: self._add_block(self._buf) - self._buf = EMPTY + self._buf = b"" ctx = self._blake2s(0, 1, True) for t in self._thread: ctx.update(t.digest()) @@ -2556,21 +2701,21 @@ class Blake2SP(object): def hexdigest(self): """Hexadecimal digest.""" - return tohex(self.digest()) + return hexlify(self.digest()).decode("ascii") -class Rar3Sha1(object): - """Bug-compat for SHA1 +class Rar3Sha1: + """Emulate buggy SHA1 from RAR3. """ digest_size = 20 block_size = 64 - _BLK_BE = struct.Struct(b'>16L') - _BLK_LE = struct.Struct(b'<16L') + _BLK_BE = struct.Struct(b">16L") + _BLK_LE = struct.Struct(b"<16L") - __slots__ = ('_nbytes', '_md', '_rarbug') + __slots__ = ("_nbytes", "_md", "_rarbug") - def __init__(self, data=b'', rarbug=False): + def __init__(self, data=b"", rarbug=False): self._md = sha1() self._nbytes = 0 self._rarbug = rarbug @@ -2609,65 +2754,73 @@ class Rar3Sha1(object): ## Utility functions ## -S_LONG = Struct(' len(buf): - raise BadRarFile('cannot load byte') + raise BadRarFile("cannot load byte") return S_BYTE.unpack_from(buf, pos)[0], end + def load_le32(buf, pos): """Load little-endian 32-bit integer""" end = pos + 4 if end > len(buf): - raise BadRarFile('cannot load le32') + raise BadRarFile("cannot load le32") return S_LONG.unpack_from(buf, pos)[0], pos + 4 + def load_bytes(buf, num, pos): """Load sequence of bytes""" end = pos + num if end > len(buf): - raise BadRarFile('cannot load bytes') - return buf[pos : end], end + raise BadRarFile("cannot load bytes") + return buf[pos: end], end + def load_vstr(buf, pos): """Load bytes prefixed by vint length""" slen, pos = load_vint(buf, pos) return load_bytes(buf, slen, pos) + def load_dostime(buf, pos): """Load LE32 dos timestamp""" stamp, pos = load_le32(buf, pos) tup = parse_dos_time(stamp) return to_datetime(tup), pos + def load_unixtime(buf, pos): """Load LE32 unix timestamp""" secs, pos = load_le32(buf, pos) - dt = datetime.fromtimestamp(secs, UTC) + dt = datetime.fromtimestamp(secs, timezone.utc) return dt, pos + def load_windowstime(buf, pos): """Load LE64 windows timestamp""" # unix epoch (1970) in seconds from windows epoch (1601) @@ -2675,39 +2828,47 @@ def load_windowstime(buf, pos): val1, pos = load_le32(buf, pos) val2, pos = load_le32(buf, pos) secs, n1secs = divmod((val2 << 32) | val1, 10000000) - dt = datetime.fromtimestamp(secs - unix_epoch, UTC) - dt = dt.replace(microsecond=n1secs // 10) + dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc) + dt = to_nsdatetime(dt, n1secs * 100) return dt, pos -# new-style next volume + def _next_newvol(volfile): + """New-style next volume + """ i = len(volfile) - 1 while i >= 0: - if volfile[i] >= '0' and volfile[i] <= '9': + if volfile[i] >= "0" and volfile[i] <= "9": return _inc_volname(volfile, i) i -= 1 raise BadRarName("Cannot construct volume name: " + volfile) -# old-style next volume + def _next_oldvol(volfile): + """Old-style next volume + """ # rar -> r00 - if volfile[-4:].lower() == '.rar': - return volfile[:-2] + '00' + if volfile[-4:].lower() == ".rar": + return volfile[:-2] + "00" return _inc_volname(volfile, len(volfile) - 1) -# increase digits with carry, otherwise just increment char + def _inc_volname(volfile, i): + """increase digits with carry, otherwise just increment char + """ fn = list(volfile) while i >= 0: - if fn[i] != '9': + if fn[i] != "9": fn[i] = chr(ord(fn[i]) + 1) break - fn[i] = '0' + fn[i] = "0" i -= 1 - return ''.join(fn) + return "".join(fn) + -# rar3 extended time fields def _parse_ext_time(h, data, pos): + """Parse all RAR3 extended time fields + """ # flags and rest of data can be missing flags = 0 if pos + 2 <= len(data): @@ -2723,57 +2884,51 @@ def _parse_ext_time(h, data, pos): h.date_time = mtime.timetuple()[:6] return pos -# rar3 one extended time field + def _parse_xtime(flag, data, pos, basetime=None): + """Parse one RAR3 extended time field + """ res = None if flag & 8: if not basetime: basetime, pos = load_dostime(data, pos) - # load second fractions + # load second fractions of 100ns units rem = 0 cnt = flag & 3 for _ in range(cnt): b, pos = load_byte(data, pos) rem = (b << 16) | (rem >> 8) - # convert 100ns units to microseconds - usec = rem // 10 - if usec > 1000000: - usec = 999999 - # dostime has room for 30 seconds only, correct if needed if flag & 4 and basetime.second < 59: - res = basetime.replace(microsecond=usec, second=basetime.second + 1) - else: - res = basetime.replace(microsecond=usec) + basetime = basetime.replace(second=basetime.second + 1) + + res = to_nsdatetime(basetime, rem * 100) return res, pos + def is_filelike(obj): """Filename or file object? """ - if _have_pathlib: - filename_types = (bytes, unicode, Path) - else: - filename_types = (bytes, unicode) - - if isinstance(obj, filename_types): + if isinstance(obj, (bytes, str, Path)): return False res = True - for a in ('read', 'tell', 'seek'): + for a in ("read", "tell", "seek"): res = res and hasattr(obj, a) if not res: raise ValueError("Invalid object passed as file") return True -def rar3_s2k(psw, salt): + +def rar3_s2k(pwd, salt): """String-to-key hash for RAR3. """ - if not isinstance(psw, unicode): - psw = psw.decode('utf8') - seed = bytearray(psw.encode('utf-16le') + salt) + if not isinstance(pwd, str): + pwd = pwd.decode("utf8") + seed = bytearray(pwd.encode("utf-16le") + salt) h = Rar3Sha1(rarbug=True) - iv = EMPTY + iv = b"" for i in range(16): for j in range(0x4000): cnt = S_LONG.pack(i * 0x4000 + j) @@ -2785,7 +2940,8 @@ def rar3_s2k(psw, salt): key_le = pack("LLLL", key_be)) return key_le, iv -def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): + +def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None): """Decompress blob of compressed data. Used for data with non-standard header - eg. comments. @@ -2799,49 +2955,79 @@ def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=N flags |= RAR_LONG_BLOCK # file header - fname = b'data' - date = 0 + fname = b"data" + date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16) mode = 0x20 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, date, vers, meth, len(fname), mode) fhdr += fname - if flags & RAR_FILE_SALT: - if not salt: - return EMPTY + if salt: fhdr += salt # full header hlen = S_BLK_HDR.size + len(fhdr) hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr - hcrc = rar_crc32(hdr[2:]) & 0xFFFF + hcrc = crc32(hdr[2:]) & 0xFFFF hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr # archive main header - mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4) + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4) # decompress via temp rar - tmpfd, tmpname = mkstemp(suffix='.rar') + setup = tool_setup() + tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR) tmpf = os.fdopen(tmpfd, "wb") try: tmpf.write(RAR_ID + mh + hdr + data) tmpf.close() - cmd = [UNRAR_TOOL] + list(OPEN_ARGS) - add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD)) - cmd.append(tmpname) - + curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None + cmd = setup.open_cmdline(curpwd, tmpname) p = custom_popen(cmd) return p.communicate()[0] finally: tmpf.close() os.unlink(tmpname) + +def sanitize_filename(fname, pathsep, is_win32): + """Simulate unrar sanitization. + """ + if is_win32: + if len(fname) > 1 and fname[1] == ":": + fname = fname[2:] + rc = RC_BAD_CHARS_WIN32 + else: + rc = RC_BAD_CHARS_UNIX + if rc.search(fname): + fname = rc.sub("_", fname) + + parts = [] + for seg in fname.split("/"): + if seg in ("", ".", ".."): + continue + if is_win32 and seg[-1] in (" ", "."): + seg = seg[:-1] + "_" + parts.append(seg) + return pathsep.join(parts) + + +def empty_read(src, size, blklen): + """Read and drop fixed amount of data. + """ + while size > 0: + if size > blklen: + res = src.read(blklen) + else: + res = src.read(size) + if not res: + raise BadRarFile("cannot load data") + size -= len(res) + + def to_datetime(t): """Convert 6-part time tuple into datetime object. """ - if t is None: - return None - # extract values year, mon, day, h, m, s = t @@ -2853,20 +3039,11 @@ def to_datetime(t): # sanitize invalid values mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - if mon < 1: - mon = 1 - if mon > 12: - mon = 12 - if day < 1: - day = 1 - if day > mday[mon]: - day = mday[mon] - if h > 23: - h = 23 - if m > 59: - m = 59 - if s > 59: - s = 59 + mon = max(1, min(mon, 12)) + day = max(1, min(day, mday[mon])) + h = min(h, 23) + m = min(m, 59) + s = min(s, 59) if mon == 2 and day == 29: try: return datetime(year, mon, day, h, m, s) @@ -2874,70 +3051,133 @@ def to_datetime(t): day = 28 return datetime(year, mon, day, h, m, s) + def parse_dos_time(stamp): """Parse standard 32-bit DOS timestamp. """ sec, stamp = stamp & 0x1F, stamp >> 5 - mn, stamp = stamp & 0x3F, stamp >> 6 - hr, stamp = stamp & 0x1F, stamp >> 5 + mn, stamp = stamp & 0x3F, stamp >> 6 + hr, stamp = stamp & 0x1F, stamp >> 5 day, stamp = stamp & 0x1F, stamp >> 5 mon, stamp = stamp & 0x0F, stamp >> 4 yr = (stamp & 0x7F) + 1980 return (yr, mon, day, hr, mn, sec * 2) + +# pylint: disable=arguments-differ,signature-differs +class nsdatetime(datetime): + """Datetime that carries nanoseconds. + + Arithmetic not supported, will lose nanoseconds. + + .. versionadded:: 4.0 + """ + __slots__ = ("nanosecond",) + nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999 + + def __new__(cls, year, month, day, hour=0, minute=0, second=0, + microsecond=0, tzinfo=None, *, fold=0, nanosecond=0): + usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0) + if mod == 0: + return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold) + self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold) + self.nanosecond = nanosecond + return self + + def isoformat(self, sep="T", timespec="auto"): + """Formats with nanosecond precision by default. + """ + if timespec == "auto": + pre, post = super().isoformat(sep, "microseconds").split(".", 1) + return f"{pre}.{self.nanosecond:09d}{post[6:]}" + return super().isoformat(sep, timespec) + + def astimezone(self, tz=None): + """Convert to new timezone. + """ + tmp = super().astimezone(tz) + return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second, + nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold) + + def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None, + microsecond=None, tzinfo=None, *, fold=None, nanosecond=None): + """Return new timestamp with specified fields replaced. + """ + return self.__class__( + self.year if year is None else year, + self.month if month is None else month, + self.day if day is None else day, + self.hour if hour is None else hour, + self.minute if minute is None else minute, + self.second if second is None else second, + nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000) + if nanosecond is None else nanosecond), + tzinfo=self.tzinfo if tzinfo is None else tzinfo, + fold=self.fold if fold is None else fold) + + def __hash__(self): + return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__() + + def __eq__(self, other): + return super().__eq__(other) and self.nanosecond == ( + other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000) + + def __gt__(self, other): + return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > ( + other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)) + + def __lt__(self, other): + return not (self > other or self == other) + + def __ge__(self, other): + return not self < other + + def __le__(self, other): + return not self > other + + def __ne__(self, other): + return not self == other + + +def to_nsdatetime(dt, nsec): + """Apply nanoseconds to datetime. + """ + if not nsec: + return dt + return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, + tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec) + + +def to_nsecs(dt): + """Convert datatime instance to nanoseconds. + """ + secs = int(dt.timestamp()) + nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000 + return secs * 1000000000 + nsecs + + def custom_popen(cmd): """Disconnect cmd from parent fds, read only from stdout. """ - # needed for py2exe - creationflags = 0 - if sys.platform == 'win32': - creationflags = 0x08000000 # CREATE_NO_WINDOW - - # run command + creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW try: - p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT, + p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL, creationflags=creationflags) except OSError as ex: if ex.errno == errno.ENOENT: - raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) + raise RarCannotExec("Unrar not installed?") if ex.errno == errno.EACCES or ex.errno == errno.EPERM: - raise RarCannotExec("Cannot execute unrar (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) + raise RarCannotExec("Cannot execute unrar") raise return p -def custom_check(cmd, ignore_retcode=False): - """Run command, collect output, raise error if needed. - """ - p = custom_popen(cmd) - out, _ = p.communicate() - if p.returncode and not ignore_retcode: - raise RarExecError("Check-run failed") - return out -def add_password_arg(cmd, psw, ___required=False): - """Append password switch to commandline. - """ - if UNRAR_TOOL == ALT_TOOL: - return - if psw is not None: - cmd.append('-p' + psw) - else: - cmd.append('-p-') - -def check_returncode(p, out): +def check_returncode(code, out, errmap): """Raise exception according to unrar exit code. """ - code = p.returncode if code == 0: return - # map return code to exception class, codes from rar.txt - errmap = [None, - RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 - RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 - RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 - if UNRAR_TOOL == ALT_TOOL: - errmap = [None] if code > 0 and code < len(errmap): exc = errmap[code] elif code == 255: @@ -2949,92 +3189,175 @@ def check_returncode(p, out): # format message if out: - msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out) + msg = "%s [%d]: %s" % (exc.__doc__, code, out) else: - msg = "%s [%d]" % (exc.__doc__, p.returncode) + msg = "%s [%d]" % (exc.__doc__, code) raise exc(msg) -def hmac_sha256(key, data): - """HMAC-SHA256""" - return HMAC(key, data, sha256).digest() def membuf_tempfile(memfile): - """Write in-memory file object to real file.""" + """Write in-memory file object to real file. + """ memfile.seek(0, 0) - tmpfd, tmpname = mkstemp(suffix='.rar') + tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR) tmpf = os.fdopen(tmpfd, "wb") try: - while True: - buf = memfile.read(BSIZE) - if not buf: - break - tmpf.write(buf) + shutil.copyfileobj(memfile, tmpf, BSIZE) tmpf.close() - except: + except BaseException: tmpf.close() os.unlink(tmpname) raise return tmpname -class XTempFile(object): - """Real file for archive. - """ - __slots__ = ('_tmpfile', '_filename') - - def __init__(self, rarfile): - if is_filelike(rarfile): - self._tmpfile = membuf_tempfile(rarfile) - self._filename = self._tmpfile - else: - self._tmpfile = None - self._filename = rarfile - - def __enter__(self): - return self._filename - - def __exit__(self, exc_type, exc_value, tb): - if self._tmpfile: - try: - os.unlink(self._tmpfile) - except OSError: - pass - self._tmpfile = None # -# Check if unrar works +# Find working command-line tool # -ORIG_UNRAR_TOOL = UNRAR_TOOL -ORIG_OPEN_ARGS = OPEN_ARGS -ORIG_EXTRACT_ARGS = EXTRACT_ARGS -ORIG_TEST_ARGS = TEST_ARGS +class ToolSetup: + def __init__(self, setup): + self.setup = setup -def _check_unrar_tool(): - global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS - try: - # does UNRAR_TOOL work? - custom_check([ORIG_UNRAR_TOOL], True) - - UNRAR_TOOL = ORIG_UNRAR_TOOL - OPEN_ARGS = ORIG_OPEN_ARGS - EXTRACT_ARGS = ORIG_EXTRACT_ARGS - TEST_ARGS = ORIG_TEST_ARGS - except RarCannotExec: + def check(self): + cmdline = self.get_cmdline("check_cmd", None) try: - # does ALT_TOOL work? - custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True) - # replace config - UNRAR_TOOL = ALT_TOOL - OPEN_ARGS = ALT_OPEN_ARGS - EXTRACT_ARGS = ALT_EXTRACT_ARGS - TEST_ARGS = ALT_TEST_ARGS + p = custom_popen(cmdline) + out, _ = p.communicate() + return p.returncode == 0 except RarCannotExec: - # no usable tool, only uncompressed archives work return False - return True -_check_unrar_tool() + def open_cmdline(self, pwd, rarfn, filefn=None): + cmdline = self.get_cmdline("open_cmd", pwd) + cmdline.append(rarfn) + if filefn: + self.add_file_arg(cmdline, filefn) + return cmdline + + def get_errmap(self): + return self.setup["errmap"] + + def get_cmdline(self, key, pwd, nodash=False): + cmdline = list(self.setup[key]) + cmdline[0] = globals()[cmdline[0]] + self.add_password_arg(cmdline, pwd) + if not nodash: + cmdline.append("--") + return cmdline + + def add_file_arg(self, cmdline, filename): + cmdline.append(filename) + + def add_password_arg(self, cmdline, pwd): + """Append password switch to commandline. + """ + if pwd is not None: + if not isinstance(pwd, str): + pwd = pwd.decode("utf8") + args = self.setup["password"] + if isinstance(args, str): + cmdline.append(args + pwd) + else: + cmdline.extend(args) + cmdline.append(pwd) + else: + cmdline.extend(self.setup["no_password"]) + + +UNRAR_CONFIG = { + "open_cmd": ("UNRAR_TOOL", "p", "-inul"), + "check_cmd": ("UNRAR_TOOL", "-inul"), + "password": "-p", + "no_password": ("-p-",), + # map return code to exception class, codes from rar.txt + "errmap": [None, + RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 + RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 + RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 +} + +# Problems with unar RAR backend: +# - Does not support RAR2 locked files [fails to read] +# - Does not support RAR5 Blake2sp hash [reading works] +UNAR_CONFIG = { + "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"), + "check_cmd": ("UNAR_TOOL", "-version"), + "password": ("-p",), + "no_password": ("-p", ""), + "errmap": [None], +} + +# Problems with libarchive RAR backend: +# - Does not support solid archives. +# - Does not support password-protected archives. +# - Does not support RARVM-based compression filters. +BSDTAR_CONFIG = { + "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"), + "check_cmd": ("BSDTAR_TOOL", "--version"), + "password": None, + "no_password": (), + "errmap": [None], +} + +CURRENT_SETUP = None + + +def tool_setup(unrar=True, unar=True, bsdtar=True, force=False): + """Pick a tool, return cached ToolSetup. + """ + global CURRENT_SETUP + if force: + CURRENT_SETUP = None + if CURRENT_SETUP is not None: + return CURRENT_SETUP + lst = [] + if unrar: + lst.append(UNRAR_CONFIG) + if unar: + lst.append(UNAR_CONFIG) + if bsdtar: + lst.append(BSDTAR_CONFIG) + + for conf in lst: + setup = ToolSetup(conf) + if setup.check(): + CURRENT_SETUP = setup + break + if CURRENT_SETUP is None: + raise RarCannotExec("Cannot find working tool") + return CURRENT_SETUP + + +def main(args): + """Minimal command-line interface for rarfile module. + """ + import argparse + p = argparse.ArgumentParser(description=main.__doc__) + g = p.add_mutually_exclusive_group(required=True) + g.add_argument("-l", "--list", metavar="", + help="Show archive listing") + g.add_argument("-e", "--extract", nargs=2, + metavar=("", ""), + help="Extract archive into target dir") + g.add_argument("-t", "--test", metavar="", + help="Test if a archive is valid") + cmd = p.parse_args(args) + + if cmd.list: + with RarFile(cmd.list) as rf: + rf.printdir() + elif cmd.test: + with RarFile(cmd.test) as rf: + rf.testrar() + elif cmd.extract: + with RarFile(cmd.extract[0]) as rf: + rf.extractall(cmd.extract[1]) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/libs/common/rebulk/__init__.py b/libs/common/rebulk/__init__.py index 93d5e477..360731bc 100644 --- a/libs/common/rebulk/__init__.py +++ b/libs/common/rebulk/__init__.py @@ -7,4 +7,4 @@ Define simple search patterns in bulk to perform advanced matching on any string from .rebulk import Rebulk from .rules import Rule, CustomRule, AppendMatch, RemoveMatch, RenameMatch, AppendTags, RemoveTags from .processors import ConflictSolver, PrivateRemover, POST_PROCESS, PRE_PROCESS -from .pattern import REGEX_AVAILABLE +from .pattern import REGEX_ENABLED diff --git a/libs/common/rebulk/__version__.py b/libs/common/rebulk/__version__.py index 939c554c..52316dd4 100644 --- a/libs/common/rebulk/__version__.py +++ b/libs/common/rebulk/__version__.py @@ -4,4 +4,4 @@ Version module """ # pragma: no cover -__version__ = '2.0.1' +__version__ = '3.1.0' diff --git a/libs/common/rebulk/builder.py b/libs/common/rebulk/builder.py index c91420aa..78cd2423 100644 --- a/libs/common/rebulk/builder.py +++ b/libs/common/rebulk/builder.py @@ -4,19 +4,35 @@ Base builder class for Rebulk """ from abc import ABCMeta, abstractmethod +from contextlib import contextmanager from copy import deepcopy from logging import getLogger -from six import add_metaclass - from .loose import set_defaults from .pattern import RePattern, StringPattern, FunctionalPattern log = getLogger(__name__).log -@add_metaclass(ABCMeta) -class Builder(object): +@contextmanager +def overrides(kwargs): + """ + Implements override kwarg to restore initial kwarg arguments from overrides list after set_defaults calls. + :param kwargs: + :return: + """ + override_keys = kwargs.pop('overrides', None) + backup = {} + if override_keys: + for override_key in override_keys: + backup[override_key] = kwargs[override_key] + + yield backup + + kwargs.update(backup) + + +class Builder(metaclass=ABCMeta): """ Base builder class for patterns """ @@ -102,8 +118,10 @@ class Builder(object): :return: :rtype: """ - set_defaults(self._regex_defaults, kwargs) - set_defaults(self._defaults, kwargs) + with overrides(kwargs): + set_defaults(self._regex_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return RePattern(*pattern, **kwargs) def build_string(self, *pattern, **kwargs): @@ -117,8 +135,10 @@ class Builder(object): :return: :rtype: """ - set_defaults(self._string_defaults, kwargs) - set_defaults(self._defaults, kwargs) + with overrides(kwargs): + set_defaults(self._string_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return StringPattern(*pattern, **kwargs) def build_functional(self, *pattern, **kwargs): @@ -132,8 +152,10 @@ class Builder(object): :return: :rtype: """ - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) + with overrides(kwargs): + set_defaults(self._functional_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return FunctionalPattern(*pattern, **kwargs) def build_chain(self, **kwargs): @@ -147,15 +169,19 @@ class Builder(object): :return: :rtype: """ - from .chain import Chain - set_defaults(self._chain_defaults, kwargs) - set_defaults(self._defaults, kwargs) + from .chain import Chain # pylint:disable=import-outside-toplevel + + with overrides(kwargs): + set_defaults(self._chain_defaults, kwargs) + set_defaults(self._defaults, kwargs) + chain = Chain(self, **kwargs) chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access + return chain @abstractmethod diff --git a/libs/common/rebulk/chain.py b/libs/common/rebulk/chain.py index ba31ec9a..2e2a959e 100644 --- a/libs/common/rebulk/chain.py +++ b/libs/common/rebulk/chain.py @@ -125,7 +125,7 @@ class Chain(Pattern, Builder): :rtype: """ # pylint: disable=too-many-locals - ret = super(Chain, self)._process_match(match, match_index, child=child) + ret = super()._process_match(match, match_index, child=child) if ret: return True @@ -144,7 +144,7 @@ class Chain(Pattern, Builder): for last_match in last_matches: match.children.remove(last_match) match.end = match.children[-1].end if match.children else match.start - ret = super(Chain, self)._process_match(match, match_index, child=child) + ret = super()._process_match(match, match_index, child=child) if ret: return True @@ -186,7 +186,7 @@ class Chain(Pattern, Builder): @staticmethod def _group_by_match_index(matches): - grouped_matches_dict = dict() + grouped_matches_dict = {} for match_index, match in itertools.groupby(matches, lambda m: m.match_index): grouped_matches_dict[match_index] = list(match) return grouped_matches_dict @@ -202,8 +202,8 @@ class Chain(Pattern, Builder): def __repr__(self): defined = "" if self.defined_at: - defined = "@%s" % (self.defined_at,) - return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts) + defined = f"@{self.defined_at}" + return f"<{self.__class__.__name__}{defined}:{self.parts}>" class ChainPart(BasePattern): @@ -377,4 +377,4 @@ class ChainPart(BasePattern): return self def __repr__(self): - return "%s({%s,%s})" % (self.pattern, self.repeater_start, self.repeater_end) + return f"{self.pattern}({{{self.repeater_start},{self.repeater_end}}})" diff --git a/libs/common/rebulk/debug.py b/libs/common/rebulk/debug.py index 2384b26e..3229252e 100644 --- a/libs/common/rebulk/debug.py +++ b/libs/common/rebulk/debug.py @@ -29,7 +29,7 @@ class Frame(namedtuple('Frame', ['lineno', 'package', 'name', 'filename'])): __slots__ = () def __repr__(self): - return "%s#L%s" % (os.path.basename(self.filename), self.lineno) + return f"{os.path.basename(self.filename)}#L{self.lineno}" def defined_at(): diff --git a/libs/common/rebulk/introspector.py b/libs/common/rebulk/introspector.py index bfefcb75..f4a4f70a 100644 --- a/libs/common/rebulk/introspector.py +++ b/libs/common/rebulk/introspector.py @@ -6,13 +6,11 @@ Introspect rebulk object to retrieve capabilities. from abc import ABCMeta, abstractmethod from collections import defaultdict -import six from .pattern import StringPattern, RePattern, FunctionalPattern from .utils import extend_safe -@six.add_metaclass(ABCMeta) -class Description(object): +class Description(metaclass=ABCMeta): """ Abstract class for a description. """ diff --git a/libs/common/rebulk/loose.py b/libs/common/rebulk/loose.py index 423b4ea7..b66b3705 100644 --- a/libs/common/rebulk/loose.py +++ b/libs/common/rebulk/loose.py @@ -112,7 +112,7 @@ def argspec_args(argspec, constructor, *args, **kwargs): if argspec.varkw: call_kwarg = kwargs else: - call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension + call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # pylint:disable=consider-using-dict-items if argspec.varargs: call_args = args else: @@ -139,7 +139,7 @@ if not _fullargspec_supported: if argspec.keywords: call_kwarg = kwargs else: - call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension + call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # pylint:disable=consider-using-dict-items if argspec.varargs: call_args = args else: diff --git a/libs/common/rebulk/match.py b/libs/common/rebulk/match.py index d8e72df4..4dbf5130 100644 --- a/libs/common/rebulk/match.py +++ b/libs/common/rebulk/match.py @@ -5,21 +5,13 @@ Classes and functions related to matches """ import copy import itertools +from collections import OrderedDict from collections import defaultdict -try: - from collections.abc import MutableSequence -except ImportError: - from collections import MutableSequence - -try: - from collections import OrderedDict # pylint:disable=ungrouped-imports -except ImportError: # pragma: no cover - from ordereddict import OrderedDict # pylint:disable=import-error -import six +from collections.abc import MutableSequence +from .debug import defined_at from .loose import ensure_list, filter_index from .utils import is_iterable -from .debug import defined_at class MatchesDict(OrderedDict): @@ -28,7 +20,7 @@ class MatchesDict(OrderedDict): """ def __init__(self): - super(MatchesDict, self).__init__() + super().__init__() self.matches = defaultdict(list) self.values_list = defaultdict(list) @@ -67,7 +59,7 @@ class _BaseMatches(MutableSequence): def _start_dict(self): if self.__start_dict is None: self.__start_dict = defaultdict(_BaseMatches._base) - for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.start): + for start, values in itertools.groupby(list(self._delegate), lambda item: item.start): _BaseMatches._base_extend(self.__start_dict[start], values) return self.__start_dict @@ -76,7 +68,7 @@ class _BaseMatches(MutableSequence): def _end_dict(self): if self.__end_dict is None: self.__end_dict = defaultdict(_BaseMatches._base) - for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.end): + for start, values in itertools.groupby(list(self._delegate), lambda item: item.end): _BaseMatches._base_extend(self.__end_dict[start], values) return self.__end_dict @@ -534,13 +526,6 @@ class _BaseMatches(MutableSequence): ret[match.name] = value return ret - if six.PY2: # pragma: no cover - def clear(self): - """ - Python 3 backport - """ - del self[:] - def __len__(self): return len(self._delegate) @@ -583,11 +568,11 @@ class Matches(_BaseMatches): def __init__(self, matches=None, input_string=None): self.markers = Markers(input_string=input_string) - super(Matches, self).__init__(matches=matches, input_string=input_string) + super().__init__(matches=matches, input_string=input_string) def _add_match(self, match): assert not match.marker, "A marker match should not be added to object" - super(Matches, self)._add_match(match) + super()._add_match(match) class Markers(_BaseMatches): @@ -596,11 +581,11 @@ class Markers(_BaseMatches): """ def __init__(self, matches=None, input_string=None): - super(Markers, self).__init__(matches=None, input_string=input_string) + super().__init__(matches=None, input_string=input_string) def _add_match(self, match): assert match.marker, "A non-marker match should not be added to object" - super(Markers, self)._add_match(match) + super()._add_match(match) class Match(object): @@ -800,8 +785,8 @@ class Match(object): current_match = split_match ret = [] - for i in range(0, len(self.raw)): - if self.raw[i] in seps: + for i, char in enumerate(self.raw): + if char in seps: if not split_match: split_match = copy.deepcopy(current_match) current_match.end = self.start + i @@ -878,13 +863,13 @@ class Match(object): defined = "" initiator = "" if self.initiator.value != self.value: - initiator = "+initiator=" + self.initiator.value + initiator = f"+initiator={self.initiator.value}" if self.private: flags += '+private' if self.name: - name = "+name=%s" % (self.name,) + name = f"+name={self.name}" if self.tags: - tags = "+tags=%s" % (self.tags,) + tags = f"+tags={self.tags}" if self.defined_at: - defined += "@%s" % (self.defined_at,) - return "<%s:%s%s%s%s%s%s>" % (self.value, self.span, flags, name, tags, initiator, defined) + defined += f"@{self.defined_at}" + return f"<{self.value}:{self.span}{flags}{name}{tags}{initiator}{defined}>" diff --git a/libs/common/rebulk/pattern.py b/libs/common/rebulk/pattern.py index beb8b273..b7f2ea5a 100644 --- a/libs/common/rebulk/pattern.py +++ b/libs/common/rebulk/pattern.py @@ -5,21 +5,18 @@ Abstract pattern class definition along with various implementations (regexp, st """ # pylint: disable=super-init-not-called,wrong-import-position -from abc import ABCMeta, abstractmethod, abstractproperty - -import six +from abc import ABCMeta, abstractmethod from . import debug from .formatters import default_formatter from .loose import call, ensure_list, ensure_dict from .match import Match -from .remodule import re, REGEX_AVAILABLE +from .remodule import re, REGEX_ENABLED from .utils import find_all, is_iterable, get_first_defined from .validators import allways_true -@six.add_metaclass(ABCMeta) -class BasePattern(object): +class BasePattern(metaclass=ABCMeta): """ Base class for Pattern like objects """ @@ -41,8 +38,7 @@ class BasePattern(object): pass -@six.add_metaclass(ABCMeta) -class Pattern(BasePattern): +class Pattern(BasePattern, metaclass=ABCMeta): """ Definition of a particular pattern to search for. """ @@ -333,7 +329,8 @@ class Pattern(BasePattern): return self.post_processor(matches, self) return matches - @abstractproperty + @property + @abstractmethod def patterns(self): # pragma: no cover """ List of base patterns defined @@ -354,7 +351,8 @@ class Pattern(BasePattern): return self._properties return {} - @abstractproperty + @property + @abstractmethod def match_options(self): # pragma: no cover """ dict of default options for generated Match objects @@ -382,8 +380,8 @@ class Pattern(BasePattern): def __repr__(self): defined = "" if self.defined_at: - defined = "@%s" % (self.defined_at,) - return "<%s%s:%s>" % (self.__class__.__name__, defined, self.__repr__patterns__) + defined = f"@{self.defined_at}" + return f"<{self.__class__.__name__}{defined}:{self.__repr__patterns__}>" @property def __repr__patterns__(self): @@ -396,7 +394,7 @@ class StringPattern(Pattern): """ def __init__(self, *patterns, **kwargs): - super(StringPattern, self).__init__(**kwargs) + super().__init__(**kwargs) self._patterns = patterns self._kwargs = kwargs self._match_kwargs = filter_match_kwargs(kwargs) @@ -422,11 +420,11 @@ class RePattern(Pattern): """ def __init__(self, *patterns, **kwargs): - super(RePattern, self).__init__(**kwargs) - self.repeated_captures = REGEX_AVAILABLE + super().__init__(**kwargs) + self.repeated_captures = REGEX_ENABLED if 'repeated_captures' in kwargs: self.repeated_captures = kwargs.get('repeated_captures') - if self.repeated_captures and not REGEX_AVAILABLE: # pragma: no cover + if self.repeated_captures and not REGEX_ENABLED: # pragma: no cover raise NotImplementedError("repeated_capture is available only with regex module.") self.abbreviations = kwargs.get('abbreviations', []) self._kwargs = kwargs @@ -434,7 +432,7 @@ class RePattern(Pattern): self._children_match_kwargs = filter_match_kwargs(kwargs, children=True) self._patterns = [] for pattern in patterns: - if isinstance(pattern, six.string_types): + if isinstance(pattern, str): if self.abbreviations and pattern: for key, replacement in self.abbreviations: pattern = pattern.replace(key, replacement) @@ -494,7 +492,7 @@ class FunctionalPattern(Pattern): """ def __init__(self, *patterns, **kwargs): - super(FunctionalPattern, self).__init__(**kwargs) + super().__init__(**kwargs) self._patterns = patterns self._kwargs = kwargs self._match_kwargs = filter_match_kwargs(kwargs) diff --git a/libs/common/rebulk/rebulk.py b/libs/common/rebulk/rebulk.py index a6a0fd2f..972b92ab 100644 --- a/libs/common/rebulk/rebulk.py +++ b/libs/common/rebulk/rebulk.py @@ -53,7 +53,7 @@ class Rebulk(Builder): :return: :rtype: """ - super(Rebulk, self).__init__() + super().__init__() if not callable(disabled): self.disabled = lambda context: disabled else: diff --git a/libs/common/rebulk/remodule.py b/libs/common/rebulk/remodule.py index d1d68d19..7bdd42d6 100644 --- a/libs/common/rebulk/remodule.py +++ b/libs/common/rebulk/remodule.py @@ -5,13 +5,17 @@ Uniform re module """ # pylint: disable-all import os +import logging -REGEX_AVAILABLE = False -if os.environ.get('REGEX_DISABLED') in ["1", "true", "True", "Y"]: - import re -else: +log = logging.getLogger(__name__).log + +REGEX_ENABLED = False +if os.environ.get('REBULK_REGEX_ENABLED') in ["1", "true", "True", "Y"]: try: import regex as re - REGEX_AVAILABLE = True + REGEX_ENABLED = True except ImportError: + log.warning('regex module is not available. Unset REBULK_REGEX_ENABLED environment variable, or install regex module to enabled it.') import re +else: + import re diff --git a/libs/common/rebulk/rules.py b/libs/common/rebulk/rules.py index 2514904f..d853b402 100644 --- a/libs/common/rebulk/rules.py +++ b/libs/common/rebulk/rules.py @@ -8,7 +8,6 @@ import inspect from itertools import groupby from logging import getLogger -import six from .utils import is_iterable from .toposort import toposort @@ -18,8 +17,7 @@ from . import debug log = getLogger(__name__).log -@six.add_metaclass(ABCMeta) -class Consequence(object): +class Consequence(metaclass=ABCMeta): """ Definition of a consequence to apply. """ @@ -40,8 +38,7 @@ class Consequence(object): pass -@six.add_metaclass(ABCMeta) -class Condition(object): +class Condition(metaclass=ABCMeta): """ Definition of a condition to check. """ @@ -60,8 +57,7 @@ class Condition(object): pass -@six.add_metaclass(ABCMeta) -class CustomRule(Condition, Consequence): +class CustomRule(Condition, Consequence, metaclass=ABCMeta): """ Definition of a rule to apply """ @@ -93,8 +89,8 @@ class CustomRule(Condition, Consequence): def __repr__(self): defined = "" if self.defined_at: - defined = "@%s" % (self.defined_at,) - return "<%s%s>" % (self.name if self.name else self.__class__.__name__, defined) + defined = f"@{self.defined_at}" + return f"<{self.name if self.name else self.__class__.__name__}{defined}>" def __eq__(self, other): return self.__class__ == other.__class__ @@ -243,7 +239,7 @@ class Rules(list): """ def __init__(self, *rules): - super(Rules, self).__init__() + super().__init__() self.load(*rules) def load(self, *rules): @@ -355,7 +351,7 @@ def toposort_rules(rules): class_dict = {} for rule in rules: if rule.__class__ in class_dict: - raise ValueError("Duplicate class rules are not allowed: %s" % rule.__class__) + raise ValueError(f"Duplicate class rules are not allowed: {rule.__class__}") class_dict[rule.__class__] = rule for rule in rules: if not is_iterable(rule.dependency) and rule.dependency: diff --git a/libs/common/rebulk/test/test_match.py b/libs/common/rebulk/test/test_match.py index 8750733a..2359149b 100644 --- a/libs/common/rebulk/test/test_match.py +++ b/libs/common/rebulk/test/test_match.py @@ -3,7 +3,6 @@ # pylint: disable=no-self-use, pointless-statement, missing-docstring, unneeded-not, len-as-condition import pytest -import six from ..match import Match, Matches from ..pattern import StringPattern, RePattern @@ -72,23 +71,18 @@ class TestMatchClass(object): assert match2 > match1 assert match2 >= match1 - if six.PY3: - with pytest.raises(TypeError): - match1 < other + with pytest.raises(TypeError): + match1 < other - with pytest.raises(TypeError): - match1 <= other + with pytest.raises(TypeError): + match1 <= other - with pytest.raises(TypeError): - match1 > other + with pytest.raises(TypeError): + match1 > other + + with pytest.raises(TypeError): + match1 >= other - with pytest.raises(TypeError): - match1 >= other - else: - assert match1 < other - assert match1 <= other - assert not match1 > other - assert not match1 >= other def test_value(self): match1 = Match(1, 3) diff --git a/libs/common/rebulk/test/test_pattern.py b/libs/common/rebulk/test/test_pattern.py index beee1704..b80b034c 100644 --- a/libs/common/rebulk/test/test_pattern.py +++ b/libs/common/rebulk/test/test_pattern.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition +# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition, no-member import re import pytest -from ..pattern import StringPattern, RePattern, FunctionalPattern, REGEX_AVAILABLE +from ..pattern import StringPattern, RePattern, FunctionalPattern, REGEX_ENABLED from ..match import Match class TestStringPattern(object): @@ -706,7 +706,7 @@ class TestFormatter(object): assert len(matches) == 1 match = matches[0] - if REGEX_AVAILABLE: + if REGEX_ENABLED: assert len(match.children) == 5 assert [child.value for child in match.children] == ["02", "03", "04", "05", "06"] else: diff --git a/libs/common/rebulk/test/test_rebulk.py b/libs/common/rebulk/test/test_rebulk.py index bafa01d4..281461ba 100644 --- a/libs/common/rebulk/test/test_rebulk.py +++ b/libs/common/rebulk/test/test_rebulk.py @@ -120,6 +120,44 @@ def test_rebulk_defaults(): assert matches[2].tags == ["0"] +def test_rebulk_defaults_overrides(): + input_string = "The quick brown fox jumps over the lazy dog" + + def func(input_string): + i = input_string.find("fox") + if i > -1: + return i, i + len("fox") + + matches = Rebulk() \ + .string_defaults(name="string", tags=["a", "b"]) \ + .regex_defaults(name="regex", tags=["d"]) \ + .functional_defaults(name="functional") \ + .string("quick", tags=["c"], overrides=["tags"]) \ + .functional(func) \ + .regex("br.{2}n") \ + .matches(input_string) + assert matches[0].name == "string" + assert matches[0].tags == ["c"] + assert matches[1].name == "functional" + assert matches[2].name == "regex" + assert matches[2].tags == ["d"] + + matches = Rebulk() \ + .defaults(name="default", tags=["0"]) \ + .string_defaults(name="string", tags=["a", "b"]) \ + .functional_defaults(name="functional", tags=["1"]) \ + .string("quick", tags=["c"]) \ + .functional(func) \ + .regex("br.{2}n") \ + .matches(input_string) + assert matches[0].name == "string" + assert matches[0].tags == ["0", "a", "b", "c"] + assert matches[1].name == "functional" + assert matches[1].tags == ["0", "1"] + assert matches[2].name == "default" + assert matches[2].tags == ["0"] + + def test_rebulk_rebulk(): input_string = "The quick brown fox jumps over the lazy dog" @@ -289,7 +327,7 @@ class TestMarkers(object): def when(self, matches, context): word_match = matches.named("word", 0) marker = matches.markers.at_match(word_match, - lambda marker: marker.name == "mark1" or marker.name == "mark2") + lambda marker: marker.name in ["mark1", "mark2"]) if len(marker) < 2: return word_match @@ -362,26 +400,26 @@ class TestMarkers(object): class TestUnicode(object): def test_rebulk_simple(self): - input_string = u"敏捷的棕色狐狸跳過懶狗" + input_string = "敏捷的棕色狐狸跳過懶狗" rebulk = Rebulk() - rebulk.string(u"敏") - rebulk.regex(u"捷") + rebulk.string("敏") + rebulk.regex("捷") def func(input_string): - i = input_string.find(u"的") + i = input_string.find("的") if i > -1: - return i, i + len(u"的") + return i, i + len("的") rebulk.functional(func) matches = rebulk.matches(input_string) assert len(matches) == 3 - assert matches[0].value == u"敏" - assert matches[1].value == u"捷" - assert matches[2].value == u"的" + assert matches[0].value == "敏" + assert matches[1].value == "捷" + assert matches[2].value == "的" class TestImmutable(object): diff --git a/libs/common/rebulk/toposort.py b/libs/common/rebulk/toposort.py index 2bcba9ae..9f3a7310 100644 --- a/libs/common/rebulk/toposort.py +++ b/libs/common/rebulk/toposort.py @@ -22,7 +22,7 @@ from functools import reduce class CyclicDependency(ValueError): def __init__(self, cyclic): s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items())) - super(CyclicDependency, self).__init__(s) + super().__init__(s) self.cyclic = cyclic diff --git a/libs/common/rebulk/utils.py b/libs/common/rebulk/utils.py index 85ddd41e..29cfda19 100644 --- a/libs/common/rebulk/utils.py +++ b/libs/common/rebulk/utils.py @@ -3,10 +3,7 @@ """ Various utilities functions """ -try: - from collections.abc import MutableSet -except ImportError: - from collections import MutableSet +from collections.abc import MutableSet from types import GeneratorType @@ -153,4 +150,4 @@ class IdentitySet(MutableSet): # pragma: no cover self.add(elem) def __repr__(self): # pragma: no cover - return "%s(%s)" % (type(self).__name__, list(self)) + return f"{type(self).__name__}({list(self)})" diff --git a/libs/common/requests/__init__.py b/libs/common/requests/__init__.py index f8f94295..7ac8e297 100644 --- a/libs/common/requests/__init__.py +++ b/libs/common/requests/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - # __ # /__) _ _ _ _ _/ _ # / ( (- (/ (/ (- _) / _) @@ -40,19 +38,30 @@ is at . :license: Apache 2.0, see LICENSE for more details. """ -import urllib3 -import chardet import warnings + +import urllib3 + from .exceptions import RequestsDependencyWarning +try: + from charset_normalizer import __version__ as charset_normalizer_version +except ImportError: + charset_normalizer_version = None -def check_compatibility(urllib3_version, chardet_version): - urllib3_version = urllib3_version.split('.') - assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git. +try: + from chardet import __version__ as chardet_version +except ImportError: + chardet_version = None + + +def check_compatibility(urllib3_version, chardet_version, charset_normalizer_version): + urllib3_version = urllib3_version.split(".") + assert urllib3_version != ["dev"] # Verify urllib3 isn't installed from git. # Sometimes, urllib3 only reports its version as 16.1. if len(urllib3_version) == 2: - urllib3_version.append('0') + urllib3_version.append("0") # Check urllib3 for compatibility. major, minor, patch = urllib3_version # noqa: F811 @@ -62,31 +71,48 @@ def check_compatibility(urllib3_version, chardet_version): assert minor >= 21 assert minor <= 26 - # Check chardet for compatibility. - major, minor, patch = chardet_version.split('.')[:3] - major, minor, patch = int(major), int(minor), int(patch) - # chardet >= 3.0.2, < 5.0.0 - assert (3, 0, 2) <= (major, minor, patch) < (5, 0, 0) + # Check charset_normalizer for compatibility. + if chardet_version: + major, minor, patch = chardet_version.split(".")[:3] + major, minor, patch = int(major), int(minor), int(patch) + # chardet_version >= 3.0.2, < 6.0.0 + assert (3, 0, 2) <= (major, minor, patch) < (6, 0, 0) + elif charset_normalizer_version: + major, minor, patch = charset_normalizer_version.split(".")[:3] + major, minor, patch = int(major), int(minor), int(patch) + # charset_normalizer >= 2.0.0 < 3.0.0 + assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0) + else: + raise Exception("You need either charset_normalizer or chardet installed") def _check_cryptography(cryptography_version): # cryptography < 1.3.4 try: - cryptography_version = list(map(int, cryptography_version.split('.'))) + cryptography_version = list(map(int, cryptography_version.split("."))) except ValueError: return if cryptography_version < [1, 3, 4]: - warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version) + warning = "Old version of cryptography ({}) may cause slowdown.".format( + cryptography_version + ) warnings.warn(warning, RequestsDependencyWarning) + # Check imported dependencies for compatibility. try: - check_compatibility(urllib3.__version__, chardet.__version__) + check_compatibility( + urllib3.__version__, chardet_version, charset_normalizer_version + ) except (AssertionError, ValueError): - warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported " - "version!".format(urllib3.__version__, chardet.__version__), - RequestsDependencyWarning) + warnings.warn( + "urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported " + "version!".format( + urllib3.__version__, chardet_version, charset_normalizer_version + ), + RequestsDependencyWarning, + ) # Attempt to enable urllib3's fallback for SNI support # if the standard library doesn't support SNI or the @@ -99,39 +125,56 @@ try: if not getattr(ssl, "HAS_SNI", False): from urllib3.contrib import pyopenssl + pyopenssl.inject_into_urllib3() # Check cryptography version from cryptography import __version__ as cryptography_version + _check_cryptography(cryptography_version) except ImportError: pass # urllib3's DependencyWarnings should be silenced. from urllib3.exceptions import DependencyWarning -warnings.simplefilter('ignore', DependencyWarning) -from .__version__ import __title__, __description__, __url__, __version__ -from .__version__ import __build__, __author__, __author_email__, __license__ -from .__version__ import __copyright__, __cake__ - -from . import utils -from . import packages -from .models import Request, Response, PreparedRequest -from .api import request, get, head, post, patch, put, delete, options -from .sessions import session, Session -from .status_codes import codes -from .exceptions import ( - RequestException, Timeout, URLRequired, - TooManyRedirects, HTTPError, ConnectionError, - FileModeWarning, ConnectTimeout, ReadTimeout -) +warnings.simplefilter("ignore", DependencyWarning) # Set default logging handler to avoid "No handler found" warnings. import logging from logging import NullHandler +from . import packages, utils +from .__version__ import ( + __author__, + __author_email__, + __build__, + __cake__, + __copyright__, + __description__, + __license__, + __title__, + __url__, + __version__, +) +from .api import delete, get, head, options, patch, post, put, request +from .exceptions import ( + ConnectionError, + ConnectTimeout, + FileModeWarning, + HTTPError, + JSONDecodeError, + ReadTimeout, + RequestException, + Timeout, + TooManyRedirects, + URLRequired, +) +from .models import PreparedRequest, Request, Response +from .sessions import Session, session +from .status_codes import codes + logging.getLogger(__name__).addHandler(NullHandler()) # FileModeWarnings go off per the default. -warnings.simplefilter('default', FileModeWarning, append=True) +warnings.simplefilter("default", FileModeWarning, append=True) diff --git a/libs/common/requests/__version__.py b/libs/common/requests/__version__.py index 1267488d..e725ada6 100644 --- a/libs/common/requests/__version__.py +++ b/libs/common/requests/__version__.py @@ -2,13 +2,13 @@ # |( |- |.| | | |- `-. | `-. # ' ' `-' `-`.`-' `-' `-' ' `-' -__title__ = 'requests' -__description__ = 'Python HTTP for Humans.' -__url__ = 'https://requests.readthedocs.io' -__version__ = '2.25.1' -__build__ = 0x022501 -__author__ = 'Kenneth Reitz' -__author_email__ = 'me@kennethreitz.org' -__license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2020 Kenneth Reitz' -__cake__ = u'\u2728 \U0001f370 \u2728' +__title__ = "requests" +__description__ = "Python HTTP for Humans." +__url__ = "https://requests.readthedocs.io" +__version__ = "2.28.1" +__build__ = 0x022801 +__author__ = "Kenneth Reitz" +__author_email__ = "me@kennethreitz.org" +__license__ = "Apache 2.0" +__copyright__ = "Copyright 2022 Kenneth Reitz" +__cake__ = "\u2728 \U0001f370 \u2728" diff --git a/libs/common/requests/_internal_utils.py b/libs/common/requests/_internal_utils.py index 759d9a56..7dc9bc53 100644 --- a/libs/common/requests/_internal_utils.py +++ b/libs/common/requests/_internal_utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests._internal_utils ~~~~~~~~~~~~~~ @@ -7,11 +5,22 @@ requests._internal_utils Provides utility functions that are consumed internally by Requests which depend on extremely few external helpers (such as compat) """ +import re -from .compat import is_py2, builtin_str, str +from .compat import builtin_str + +_VALID_HEADER_NAME_RE_BYTE = re.compile(rb"^[^:\s][^:\r\n]*$") +_VALID_HEADER_NAME_RE_STR = re.compile(r"^[^:\s][^:\r\n]*$") +_VALID_HEADER_VALUE_RE_BYTE = re.compile(rb"^\S[^\r\n]*$|^$") +_VALID_HEADER_VALUE_RE_STR = re.compile(r"^\S[^\r\n]*$|^$") + +HEADER_VALIDATORS = { + bytes: (_VALID_HEADER_NAME_RE_BYTE, _VALID_HEADER_VALUE_RE_BYTE), + str: (_VALID_HEADER_NAME_RE_STR, _VALID_HEADER_VALUE_RE_STR), +} -def to_native_string(string, encoding='ascii'): +def to_native_string(string, encoding="ascii"): """Given a string object, regardless of type, returns a representation of that string in the native string type, encoding and decoding where necessary. This assumes ASCII unless told otherwise. @@ -19,10 +28,7 @@ def to_native_string(string, encoding='ascii'): if isinstance(string, builtin_str): out = string else: - if is_py2: - out = string.encode(encoding) - else: - out = string.decode(encoding) + out = string.decode(encoding) return out @@ -36,7 +42,7 @@ def unicode_is_ascii(u_string): """ assert isinstance(u_string, str) try: - u_string.encode('ascii') + u_string.encode("ascii") return True except UnicodeEncodeError: return False diff --git a/libs/common/requests/adapters.py b/libs/common/requests/adapters.py index fa4d9b3c..d3b2d5bb 100644 --- a/libs/common/requests/adapters.py +++ b/libs/common/requests/adapters.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.adapters ~~~~~~~~~~~~~~~~~ @@ -9,57 +7,76 @@ and maintain connections. """ import os.path -import socket +import socket # noqa: F401 +from urllib3.exceptions import ClosedPoolError, ConnectTimeoutError +from urllib3.exceptions import HTTPError as _HTTPError +from urllib3.exceptions import InvalidHeader as _InvalidHeader +from urllib3.exceptions import ( + LocationValueError, + MaxRetryError, + NewConnectionError, + ProtocolError, +) +from urllib3.exceptions import ProxyError as _ProxyError +from urllib3.exceptions import ReadTimeoutError, ResponseError +from urllib3.exceptions import SSLError as _SSLError from urllib3.poolmanager import PoolManager, proxy_from_url from urllib3.response import HTTPResponse -from urllib3.util import parse_url from urllib3.util import Timeout as TimeoutSauce +from urllib3.util import parse_url from urllib3.util.retry import Retry -from urllib3.exceptions import ClosedPoolError -from urllib3.exceptions import ConnectTimeoutError -from urllib3.exceptions import HTTPError as _HTTPError -from urllib3.exceptions import MaxRetryError -from urllib3.exceptions import NewConnectionError -from urllib3.exceptions import ProxyError as _ProxyError -from urllib3.exceptions import ProtocolError -from urllib3.exceptions import ReadTimeoutError -from urllib3.exceptions import SSLError as _SSLError -from urllib3.exceptions import ResponseError -from urllib3.exceptions import LocationValueError -from .models import Response -from .compat import urlparse, basestring -from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths, - get_encoding_from_headers, prepend_scheme_if_needed, - get_auth_from_url, urldefragauth, select_proxy) -from .structures import CaseInsensitiveDict -from .cookies import extract_cookies_to_jar -from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError, RetryError, InvalidSchema, InvalidProxyURL, - InvalidURL) from .auth import _basic_auth_str +from .compat import basestring, urlparse +from .cookies import extract_cookies_to_jar +from .exceptions import ( + ConnectionError, + ConnectTimeout, + InvalidHeader, + InvalidProxyURL, + InvalidSchema, + InvalidURL, + ProxyError, + ReadTimeout, + RetryError, + SSLError, +) +from .models import Response +from .structures import CaseInsensitiveDict +from .utils import ( + DEFAULT_CA_BUNDLE_PATH, + extract_zipped_paths, + get_auth_from_url, + get_encoding_from_headers, + prepend_scheme_if_needed, + select_proxy, + urldefragauth, +) try: from urllib3.contrib.socks import SOCKSProxyManager except ImportError: + def SOCKSProxyManager(*args, **kwargs): raise InvalidSchema("Missing dependencies for SOCKS support.") + DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 DEFAULT_RETRIES = 0 DEFAULT_POOL_TIMEOUT = None -class BaseAdapter(object): +class BaseAdapter: """The Base Transport Adapter""" def __init__(self): - super(BaseAdapter, self).__init__() + super().__init__() - def send(self, request, stream=False, timeout=None, verify=True, - cert=None, proxies=None): + def send( + self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None + ): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest ` being sent. @@ -107,12 +124,22 @@ class HTTPAdapter(BaseAdapter): >>> a = requests.adapters.HTTPAdapter(max_retries=3) >>> s.mount('http://', a) """ - __attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize', - '_pool_block'] - def __init__(self, pool_connections=DEFAULT_POOLSIZE, - pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, - pool_block=DEFAULT_POOLBLOCK): + __attrs__ = [ + "max_retries", + "config", + "_pool_connections", + "_pool_maxsize", + "_pool_block", + ] + + def __init__( + self, + pool_connections=DEFAULT_POOLSIZE, + pool_maxsize=DEFAULT_POOLSIZE, + max_retries=DEFAULT_RETRIES, + pool_block=DEFAULT_POOLBLOCK, + ): if max_retries == DEFAULT_RETRIES: self.max_retries = Retry(0, read=False) else: @@ -120,7 +147,7 @@ class HTTPAdapter(BaseAdapter): self.config = {} self.proxy_manager = {} - super(HTTPAdapter, self).__init__() + super().__init__() self._pool_connections = pool_connections self._pool_maxsize = pool_maxsize @@ -140,10 +167,13 @@ class HTTPAdapter(BaseAdapter): for attr, value in state.items(): setattr(self, attr, value) - self.init_poolmanager(self._pool_connections, self._pool_maxsize, - block=self._pool_block) + self.init_poolmanager( + self._pool_connections, self._pool_maxsize, block=self._pool_block + ) - def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs): + def init_poolmanager( + self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs + ): """Initializes a urllib3 PoolManager. This method should not be called from user code, and is only @@ -160,8 +190,13 @@ class HTTPAdapter(BaseAdapter): self._pool_maxsize = maxsize self._pool_block = block - self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, - block=block, strict=True, **pool_kwargs) + self.poolmanager = PoolManager( + num_pools=connections, + maxsize=maxsize, + block=block, + strict=True, + **pool_kwargs, + ) def proxy_manager_for(self, proxy, **proxy_kwargs): """Return urllib3 ProxyManager for the given proxy. @@ -177,7 +212,7 @@ class HTTPAdapter(BaseAdapter): """ if proxy in self.proxy_manager: manager = self.proxy_manager[proxy] - elif proxy.lower().startswith('socks'): + elif proxy.lower().startswith("socks"): username, password = get_auth_from_url(proxy) manager = self.proxy_manager[proxy] = SOCKSProxyManager( proxy, @@ -186,7 +221,7 @@ class HTTPAdapter(BaseAdapter): num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, - **proxy_kwargs + **proxy_kwargs, ) else: proxy_headers = self.proxy_headers(proxy) @@ -196,7 +231,8 @@ class HTTPAdapter(BaseAdapter): num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, - **proxy_kwargs) + **proxy_kwargs, + ) return manager @@ -212,7 +248,7 @@ class HTTPAdapter(BaseAdapter): to a CA bundle to use :param cert: The SSL certificate to verify. """ - if url.lower().startswith('https') and verify: + if url.lower().startswith("https") and verify: cert_loc = None @@ -224,17 +260,19 @@ class HTTPAdapter(BaseAdapter): cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH) if not cert_loc or not os.path.exists(cert_loc): - raise IOError("Could not find a suitable TLS CA certificate bundle, " - "invalid path: {}".format(cert_loc)) + raise OSError( + f"Could not find a suitable TLS CA certificate bundle, " + f"invalid path: {cert_loc}" + ) - conn.cert_reqs = 'CERT_REQUIRED' + conn.cert_reqs = "CERT_REQUIRED" if not os.path.isdir(cert_loc): conn.ca_certs = cert_loc else: conn.ca_cert_dir = cert_loc else: - conn.cert_reqs = 'CERT_NONE' + conn.cert_reqs = "CERT_NONE" conn.ca_certs = None conn.ca_cert_dir = None @@ -246,11 +284,14 @@ class HTTPAdapter(BaseAdapter): conn.cert_file = cert conn.key_file = None if conn.cert_file and not os.path.exists(conn.cert_file): - raise IOError("Could not find the TLS certificate file, " - "invalid path: {}".format(conn.cert_file)) + raise OSError( + f"Could not find the TLS certificate file, " + f"invalid path: {conn.cert_file}" + ) if conn.key_file and not os.path.exists(conn.key_file): - raise IOError("Could not find the TLS key file, " - "invalid path: {}".format(conn.key_file)) + raise OSError( + f"Could not find the TLS key file, invalid path: {conn.key_file}" + ) def build_response(self, req, resp): """Builds a :class:`Response ` object from a urllib3 @@ -265,10 +306,10 @@ class HTTPAdapter(BaseAdapter): response = Response() # Fallback to None if there's no status_code, for whatever reason. - response.status_code = getattr(resp, 'status', None) + response.status_code = getattr(resp, "status", None) # Make headers case-insensitive. - response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) + response.headers = CaseInsensitiveDict(getattr(resp, "headers", {})) # Set encoding. response.encoding = get_encoding_from_headers(response.headers) @@ -276,7 +317,7 @@ class HTTPAdapter(BaseAdapter): response.reason = response.raw.reason if isinstance(req.url, bytes): - response.url = req.url.decode('utf-8') + response.url = req.url.decode("utf-8") else: response.url = req.url @@ -301,11 +342,13 @@ class HTTPAdapter(BaseAdapter): proxy = select_proxy(url, proxies) if proxy: - proxy = prepend_scheme_if_needed(proxy, 'http') + proxy = prepend_scheme_if_needed(proxy, "http") proxy_url = parse_url(proxy) if not proxy_url.host: - raise InvalidProxyURL("Please check proxy URL. It is malformed" - " and could be missing the host.") + raise InvalidProxyURL( + "Please check proxy URL. It is malformed " + "and could be missing the host." + ) proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: @@ -343,11 +386,11 @@ class HTTPAdapter(BaseAdapter): proxy = select_proxy(request.url, proxies) scheme = urlparse(request.url).scheme - is_proxied_http_request = (proxy and scheme != 'https') + is_proxied_http_request = proxy and scheme != "https" using_socks_proxy = False if proxy: proxy_scheme = urlparse(proxy).scheme.lower() - using_socks_proxy = proxy_scheme.startswith('socks') + using_socks_proxy = proxy_scheme.startswith("socks") url = request.path_url if is_proxied_http_request and not using_socks_proxy: @@ -386,12 +429,13 @@ class HTTPAdapter(BaseAdapter): username, password = get_auth_from_url(proxy) if username: - headers['Proxy-Authorization'] = _basic_auth_str(username, - password) + headers["Proxy-Authorization"] = _basic_auth_str(username, password) return headers - def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): + def send( + self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None + ): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest ` being sent. @@ -415,20 +459,26 @@ class HTTPAdapter(BaseAdapter): self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) + self.add_headers( + request, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies, + ) - chunked = not (request.body is None or 'Content-Length' in request.headers) + chunked = not (request.body is None or "Content-Length" in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) - except ValueError as e: - # this may raise a string formatting error. - err = ("Invalid timeout {}. Pass a (connect, read) " - "timeout tuple, or a single float to set " - "both timeouts to the same value".format(timeout)) - raise ValueError(err) + except ValueError: + raise ValueError( + f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, " + f"or a single float to set both timeouts to the same value." + ) elif isinstance(timeout, TimeoutSauce): pass else: @@ -446,20 +496,24 @@ class HTTPAdapter(BaseAdapter): preload_content=False, decode_content=False, retries=self.max_retries, - timeout=timeout + timeout=timeout, ) # Send the request. else: - if hasattr(conn, 'proxy_pool'): + if hasattr(conn, "proxy_pool"): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: - low_conn.putrequest(request.method, - url, - skip_accept_encoding=True) + skip_host = "Host" in request.headers + low_conn.putrequest( + request.method, + url, + skip_accept_encoding=True, + skip_host=skip_host, + ) for header, value in request.headers.items(): low_conn.putheader(header, value) @@ -467,34 +521,29 @@ class HTTPAdapter(BaseAdapter): low_conn.endheaders() for i in request.body: - low_conn.send(hex(len(i))[2:].encode('utf-8')) - low_conn.send(b'\r\n') + low_conn.send(hex(len(i))[2:].encode("utf-8")) + low_conn.send(b"\r\n") low_conn.send(i) - low_conn.send(b'\r\n') - low_conn.send(b'0\r\n\r\n') + low_conn.send(b"\r\n") + low_conn.send(b"0\r\n\r\n") # Receive the response from the server - try: - # For Python 2.7, use buffering of HTTP responses - r = low_conn.getresponse(buffering=True) - except TypeError: - # For compatibility with Python 3.3+ - r = low_conn.getresponse() + r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, - decode_content=False + decode_content=False, ) - except: + except Exception: # If we hit any problems here, clean up the connection. - # Then, reraise so that we can handle the actual exception. + # Then, raise so that we can handle the actual exception. low_conn.close() raise - except (ProtocolError, socket.error) as err: + except (ProtocolError, OSError) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: @@ -527,6 +576,8 @@ class HTTPAdapter(BaseAdapter): raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) + elif isinstance(e, _InvalidHeader): + raise InvalidHeader(e, request=request) else: raise diff --git a/libs/common/requests/api.py b/libs/common/requests/api.py index e978e203..2f71aaed 100644 --- a/libs/common/requests/api.py +++ b/libs/common/requests/api.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.api ~~~~~~~~~~~~ @@ -72,8 +70,7 @@ def get(url, params=None, **kwargs): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return request('get', url, params=params, **kwargs) + return request("get", url, params=params, **kwargs) def options(url, **kwargs): @@ -85,8 +82,7 @@ def options(url, **kwargs): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return request('options', url, **kwargs) + return request("options", url, **kwargs) def head(url, **kwargs): @@ -100,8 +96,8 @@ def head(url, **kwargs): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', False) - return request('head', url, **kwargs) + kwargs.setdefault("allow_redirects", False) + return request("head", url, **kwargs) def post(url, data=None, json=None, **kwargs): @@ -116,7 +112,7 @@ def post(url, data=None, json=None, **kwargs): :rtype: requests.Response """ - return request('post', url, data=data, json=json, **kwargs) + return request("post", url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): @@ -131,7 +127,7 @@ def put(url, data=None, **kwargs): :rtype: requests.Response """ - return request('put', url, data=data, **kwargs) + return request("put", url, data=data, **kwargs) def patch(url, data=None, **kwargs): @@ -146,7 +142,7 @@ def patch(url, data=None, **kwargs): :rtype: requests.Response """ - return request('patch', url, data=data, **kwargs) + return request("patch", url, data=data, **kwargs) def delete(url, **kwargs): @@ -158,4 +154,4 @@ def delete(url, **kwargs): :rtype: requests.Response """ - return request('delete', url, **kwargs) + return request("delete", url, **kwargs) diff --git a/libs/common/requests/auth.py b/libs/common/requests/auth.py index eeface39..9733686d 100644 --- a/libs/common/requests/auth.py +++ b/libs/common/requests/auth.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.auth ~~~~~~~~~~~~~ @@ -7,22 +5,21 @@ requests.auth This module contains the authentication handlers for Requests. """ +import hashlib import os import re -import time -import hashlib import threading +import time import warnings - from base64 import b64encode -from .compat import urlparse, str, basestring -from .cookies import extract_cookies_to_jar from ._internal_utils import to_native_string +from .compat import basestring, str, urlparse +from .cookies import extract_cookies_to_jar from .utils import parse_dict_header -CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' -CONTENT_TYPE_MULTI_PART = 'multipart/form-data' +CONTENT_TYPE_FORM_URLENCODED = "application/x-www-form-urlencoded" +CONTENT_TYPE_MULTI_PART = "multipart/form-data" def _basic_auth_str(username, password): @@ -57,23 +54,23 @@ def _basic_auth_str(username, password): # -- End Removal -- if isinstance(username, str): - username = username.encode('latin1') + username = username.encode("latin1") if isinstance(password, str): - password = password.encode('latin1') + password = password.encode("latin1") - authstr = 'Basic ' + to_native_string( - b64encode(b':'.join((username, password))).strip() + authstr = "Basic " + to_native_string( + b64encode(b":".join((username, password))).strip() ) return authstr -class AuthBase(object): +class AuthBase: """Base class that all auth implementations derive from""" def __call__(self, r): - raise NotImplementedError('Auth hooks must be callable.') + raise NotImplementedError("Auth hooks must be callable.") class HTTPBasicAuth(AuthBase): @@ -84,16 +81,18 @@ class HTTPBasicAuth(AuthBase): self.password = password def __eq__(self, other): - return all([ - self.username == getattr(other, 'username', None), - self.password == getattr(other, 'password', None) - ]) + return all( + [ + self.username == getattr(other, "username", None), + self.password == getattr(other, "password", None), + ] + ) def __ne__(self, other): return not self == other def __call__(self, r): - r.headers['Authorization'] = _basic_auth_str(self.username, self.password) + r.headers["Authorization"] = _basic_auth_str(self.username, self.password) return r @@ -101,7 +100,7 @@ class HTTPProxyAuth(HTTPBasicAuth): """Attaches HTTP Proxy Authentication to a given Request object.""" def __call__(self, r): - r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) + r.headers["Proxy-Authorization"] = _basic_auth_str(self.username, self.password) return r @@ -116,9 +115,9 @@ class HTTPDigestAuth(AuthBase): def init_per_thread_state(self): # Ensure state is initialized just once per-thread - if not hasattr(self._thread_local, 'init'): + if not hasattr(self._thread_local, "init"): self._thread_local.init = True - self._thread_local.last_nonce = '' + self._thread_local.last_nonce = "" self._thread_local.nonce_count = 0 self._thread_local.chal = {} self._thread_local.pos = None @@ -129,44 +128,52 @@ class HTTPDigestAuth(AuthBase): :rtype: str """ - realm = self._thread_local.chal['realm'] - nonce = self._thread_local.chal['nonce'] - qop = self._thread_local.chal.get('qop') - algorithm = self._thread_local.chal.get('algorithm') - opaque = self._thread_local.chal.get('opaque') + realm = self._thread_local.chal["realm"] + nonce = self._thread_local.chal["nonce"] + qop = self._thread_local.chal.get("qop") + algorithm = self._thread_local.chal.get("algorithm") + opaque = self._thread_local.chal.get("opaque") hash_utf8 = None if algorithm is None: - _algorithm = 'MD5' + _algorithm = "MD5" else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level - if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': + if _algorithm == "MD5" or _algorithm == "MD5-SESS": + def md5_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.md5(x).hexdigest() + hash_utf8 = md5_utf8 - elif _algorithm == 'SHA': + elif _algorithm == "SHA": + def sha_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.sha1(x).hexdigest() + hash_utf8 = sha_utf8 - elif _algorithm == 'SHA-256': + elif _algorithm == "SHA-256": + def sha256_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.sha256(x).hexdigest() + hash_utf8 = sha256_utf8 - elif _algorithm == 'SHA-512': + elif _algorithm == "SHA-512": + def sha512_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.sha512(x).hexdigest() + hash_utf8 = sha512_utf8 - KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) + KD = lambda s, d: hash_utf8(f"{s}:{d}") # noqa:E731 if hash_utf8 is None: return None @@ -177,10 +184,10 @@ class HTTPDigestAuth(AuthBase): #: path is request-uri defined in RFC 2616 which should not be empty path = p_parsed.path or "/" if p_parsed.query: - path += '?' + p_parsed.query + path += f"?{p_parsed.query}" - A1 = '%s:%s:%s' % (self.username, realm, self.password) - A2 = '%s:%s' % (method, path) + A1 = f"{self.username}:{realm}:{self.password}" + A2 = f"{method}:{path}" HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) @@ -189,22 +196,20 @@ class HTTPDigestAuth(AuthBase): self._thread_local.nonce_count += 1 else: self._thread_local.nonce_count = 1 - ncvalue = '%08x' % self._thread_local.nonce_count - s = str(self._thread_local.nonce_count).encode('utf-8') - s += nonce.encode('utf-8') - s += time.ctime().encode('utf-8') + ncvalue = f"{self._thread_local.nonce_count:08x}" + s = str(self._thread_local.nonce_count).encode("utf-8") + s += nonce.encode("utf-8") + s += time.ctime().encode("utf-8") s += os.urandom(8) - cnonce = (hashlib.sha1(s).hexdigest()[:16]) - if _algorithm == 'MD5-SESS': - HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) + cnonce = hashlib.sha1(s).hexdigest()[:16] + if _algorithm == "MD5-SESS": + HA1 = hash_utf8(f"{HA1}:{nonce}:{cnonce}") if not qop: - respdig = KD(HA1, "%s:%s" % (nonce, HA2)) - elif qop == 'auth' or 'auth' in qop.split(','): - noncebit = "%s:%s:%s:%s:%s" % ( - nonce, ncvalue, cnonce, 'auth', HA2 - ) + respdig = KD(HA1, f"{nonce}:{HA2}") + elif qop == "auth" or "auth" in qop.split(","): + noncebit = f"{nonce}:{ncvalue}:{cnonce}:auth:{HA2}" respdig = KD(HA1, noncebit) else: # XXX handle auth-int. @@ -213,18 +218,20 @@ class HTTPDigestAuth(AuthBase): self._thread_local.last_nonce = nonce # XXX should the partial digests be encoded too? - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (self.username, realm, nonce, path, respdig) + base = ( + f'username="{self.username}", realm="{realm}", nonce="{nonce}", ' + f'uri="{path}", response="{respdig}"' + ) if opaque: - base += ', opaque="%s"' % opaque + base += f', opaque="{opaque}"' if algorithm: - base += ', algorithm="%s"' % algorithm + base += f', algorithm="{algorithm}"' if entdig: - base += ', digest="%s"' % entdig + base += f', digest="{entdig}"' if qop: - base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) + base += f', qop="auth", nc={ncvalue}, cnonce="{cnonce}"' - return 'Digest %s' % (base) + return f"Digest {base}" def handle_redirect(self, r, **kwargs): """Reset num_401_calls counter on redirects.""" @@ -248,13 +255,13 @@ class HTTPDigestAuth(AuthBase): # Rewind the file position indicator of the body to where # it was to resend the request. r.request.body.seek(self._thread_local.pos) - s_auth = r.headers.get('www-authenticate', '') + s_auth = r.headers.get("www-authenticate", "") - if 'digest' in s_auth.lower() and self._thread_local.num_401_calls < 2: + if "digest" in s_auth.lower() and self._thread_local.num_401_calls < 2: self._thread_local.num_401_calls += 1 - pat = re.compile(r'digest ', flags=re.IGNORECASE) - self._thread_local.chal = parse_dict_header(pat.sub('', s_auth, count=1)) + pat = re.compile(r"digest ", flags=re.IGNORECASE) + self._thread_local.chal = parse_dict_header(pat.sub("", s_auth, count=1)) # Consume content and release the original connection # to allow our new request to reuse the same one. @@ -264,8 +271,9 @@ class HTTPDigestAuth(AuthBase): extract_cookies_to_jar(prep._cookies, r.request, r.raw) prep.prepare_cookies(prep._cookies) - prep.headers['Authorization'] = self.build_digest_header( - prep.method, prep.url) + prep.headers["Authorization"] = self.build_digest_header( + prep.method, prep.url + ) _r = r.connection.send(prep, **kwargs) _r.history.append(r) _r.request = prep @@ -280,7 +288,7 @@ class HTTPDigestAuth(AuthBase): self.init_per_thread_state() # If we have a saved nonce, skip the 401 if self._thread_local.last_nonce: - r.headers['Authorization'] = self.build_digest_header(r.method, r.url) + r.headers["Authorization"] = self.build_digest_header(r.method, r.url) try: self._thread_local.pos = r.body.tell() except AttributeError: @@ -289,17 +297,19 @@ class HTTPDigestAuth(AuthBase): # file position of the previous body. Ensure it's set to # None. self._thread_local.pos = None - r.register_hook('response', self.handle_401) - r.register_hook('response', self.handle_redirect) + r.register_hook("response", self.handle_401) + r.register_hook("response", self.handle_redirect) self._thread_local.num_401_calls = 1 return r def __eq__(self, other): - return all([ - self.username == getattr(other, 'username', None), - self.password == getattr(other, 'password', None) - ]) + return all( + [ + self.username == getattr(other, "username", None), + self.password == getattr(other, "password", None), + ] + ) def __ne__(self, other): return not self == other diff --git a/libs/common/requests/certs.py b/libs/common/requests/certs.py index d1a378d7..be422c3e 100644 --- a/libs/common/requests/certs.py +++ b/libs/common/requests/certs.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ requests.certs @@ -14,5 +13,5 @@ packaged CA bundle. """ from certifi import where -if __name__ == '__main__': +if __name__ == "__main__": print(where()) diff --git a/libs/common/requests/compat.py b/libs/common/requests/compat.py index 5de0769f..6776163c 100644 --- a/libs/common/requests/compat.py +++ b/libs/common/requests/compat.py @@ -1,14 +1,16 @@ -# -*- coding: utf-8 -*- - """ requests.compat ~~~~~~~~~~~~~~~ -This module handles import compatibility issues between Python 2 and -Python 3. +This module previously handled import compatibility issues +between Python 2 and Python 3. It remains for backwards +compatibility until the next major version. """ -import chardet +try: + import chardet +except ImportError: + import charset_normalizer as chardet import sys @@ -20,53 +22,58 @@ import sys _ver = sys.version_info #: Python 2.x? -is_py2 = (_ver[0] == 2) +is_py2 = _ver[0] == 2 #: Python 3.x? -is_py3 = (_ver[0] == 3) +is_py3 = _ver[0] == 3 +# json/simplejson module import resolution +has_simplejson = False try: import simplejson as json + + has_simplejson = True except ImportError: import json -# --------- -# Specifics -# --------- +if has_simplejson: + from simplejson import JSONDecodeError +else: + from json import JSONDecodeError -if is_py2: - from urllib import ( - quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, - proxy_bypass, proxy_bypass_environment, getproxies_environment) - from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag - from urllib2 import parse_http_list - import cookielib - from Cookie import Morsel - from StringIO import StringIO - # Keep OrderedDict for backwards compatibility. - from collections import Callable, Mapping, MutableMapping, OrderedDict +# Keep OrderedDict for backwards compatibility. +from collections import OrderedDict +from collections.abc import Callable, Mapping, MutableMapping +from http import cookiejar as cookielib +from http.cookies import Morsel +from io import StringIO +# -------------- +# Legacy Imports +# -------------- +from urllib.parse import ( + quote, + quote_plus, + unquote, + unquote_plus, + urldefrag, + urlencode, + urljoin, + urlparse, + urlsplit, + urlunparse, +) +from urllib.request import ( + getproxies, + getproxies_environment, + parse_http_list, + proxy_bypass, + proxy_bypass_environment, +) - builtin_str = str - bytes = str - str = unicode - basestring = basestring - numeric_types = (int, long, float) - integer_types = (int, long) - -elif is_py3: - from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag - from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment - from http import cookiejar as cookielib - from http.cookies import Morsel - from io import StringIO - # Keep OrderedDict for backwards compatibility. - from collections import OrderedDict - from collections.abc import Callable, Mapping, MutableMapping - - builtin_str = str - str = str - bytes = bytes - basestring = (str, bytes) - numeric_types = (int, float) - integer_types = (int,) +builtin_str = str +str = str +bytes = bytes +basestring = (str, bytes) +numeric_types = (int, float) +integer_types = (int,) diff --git a/libs/common/requests/cookies.py b/libs/common/requests/cookies.py index 56fccd9c..bf54ab23 100644 --- a/libs/common/requests/cookies.py +++ b/libs/common/requests/cookies.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.cookies ~~~~~~~~~~~~~~~~ @@ -9,12 +7,12 @@ Compatibility code to be able to use `cookielib.CookieJar` with requests. requests.utils imports from here, so be careful with imports. """ +import calendar import copy import time -import calendar from ._internal_utils import to_native_string -from .compat import cookielib, urlparse, urlunparse, Morsel, MutableMapping +from .compat import Morsel, MutableMapping, cookielib, urlparse, urlunparse try: import threading @@ -22,7 +20,7 @@ except ImportError: import dummy_threading as threading -class MockRequest(object): +class MockRequest: """Wraps a `requests.Request` to mimic a `urllib2.Request`. The code in `cookielib.CookieJar` expects this interface in order to correctly @@ -51,16 +49,22 @@ class MockRequest(object): def get_full_url(self): # Only return the response's URL if the user hadn't set the Host # header - if not self._r.headers.get('Host'): + if not self._r.headers.get("Host"): return self._r.url # If they did set it, retrieve it and reconstruct the expected domain - host = to_native_string(self._r.headers['Host'], encoding='utf-8') + host = to_native_string(self._r.headers["Host"], encoding="utf-8") parsed = urlparse(self._r.url) # Reconstruct the URL as we expect it - return urlunparse([ - parsed.scheme, host, parsed.path, parsed.params, parsed.query, - parsed.fragment - ]) + return urlunparse( + [ + parsed.scheme, + host, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ] + ) def is_unverifiable(self): return True @@ -73,7 +77,9 @@ class MockRequest(object): def add_header(self, key, val): """cookielib has no legitimate use for this method; add it back if you find one.""" - raise NotImplementedError("Cookie headers should be added with add_unredirected_header()") + raise NotImplementedError( + "Cookie headers should be added with add_unredirected_header()" + ) def add_unredirected_header(self, name, value): self._new_headers[name] = value @@ -94,7 +100,7 @@ class MockRequest(object): return self.get_host() -class MockResponse(object): +class MockResponse: """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. ...what? Basically, expose the parsed HTTP headers from the server response @@ -122,8 +128,7 @@ def extract_cookies_to_jar(jar, request, response): :param request: our own requests.Request object :param response: urllib3.HTTPResponse object """ - if not (hasattr(response, '_original_response') and - response._original_response): + if not (hasattr(response, "_original_response") and response._original_response): return # the _original_response field is the wrapped httplib.HTTPResponse object, req = MockRequest(request) @@ -140,7 +145,7 @@ def get_cookie_header(jar, request): """ r = MockRequest(request) jar.add_cookie_header(r) - return r.get_new_headers().get('Cookie') + return r.get_new_headers().get("Cookie") def remove_cookie_by_name(cookiejar, name, domain=None, path=None): @@ -205,7 +210,9 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): """ # support client code that unsets cookies by assignment of a None value: if value is None: - remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path')) + remove_cookie_by_name( + self, name, domain=kwargs.get("domain"), path=kwargs.get("path") + ) return if isinstance(value, Morsel): @@ -305,16 +312,15 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): """ dictionary = {} for cookie in iter(self): - if ( - (domain is None or cookie.domain == domain) and - (path is None or cookie.path == path) + if (domain is None or cookie.domain == domain) and ( + path is None or cookie.path == path ): dictionary[cookie.name] = cookie.value return dictionary def __contains__(self, name): try: - return super(RequestsCookieJar, self).__contains__(name) + return super().__contains__(name) except CookieConflictError: return True @@ -341,9 +347,13 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): - if hasattr(cookie.value, 'startswith') and cookie.value.startswith('"') and cookie.value.endswith('"'): - cookie.value = cookie.value.replace('\\"', '') - return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs) + if ( + hasattr(cookie.value, "startswith") + and cookie.value.startswith('"') + and cookie.value.endswith('"') + ): + cookie.value = cookie.value.replace('\\"', "") + return super().set_cookie(cookie, *args, **kwargs) def update(self, other): """Updates this jar with cookies from another CookieJar or dict-like""" @@ -351,7 +361,7 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): for cookie in other: self.set_cookie(copy.copy(cookie)) else: - super(RequestsCookieJar, self).update(other) + super().update(other) def _find(self, name, domain=None, path=None): """Requests uses this method internally to get cookie values. @@ -371,7 +381,7 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): if path is None or cookie.path == path: return cookie.value - raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) + raise KeyError(f"name={name!r}, domain={domain!r}, path={path!r}") def _find_no_duplicates(self, name, domain=None, path=None): """Both ``__get_item__`` and ``get`` call this function: it's never @@ -390,25 +400,29 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): if cookie.name == name: if domain is None or cookie.domain == domain: if path is None or cookie.path == path: - if toReturn is not None: # if there are multiple cookies that meet passed in criteria - raise CookieConflictError('There are multiple cookies with name, %r' % (name)) - toReturn = cookie.value # we will eventually return this as long as no cookie conflict + if toReturn is not None: + # if there are multiple cookies that meet passed in criteria + raise CookieConflictError( + f"There are multiple cookies with name, {name!r}" + ) + # we will eventually return this as long as no cookie conflict + toReturn = cookie.value if toReturn: return toReturn - raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) + raise KeyError(f"name={name!r}, domain={domain!r}, path={path!r}") def __getstate__(self): """Unlike a normal CookieJar, this class is pickleable.""" state = self.__dict__.copy() # remove the unpickleable RLock object - state.pop('_cookies_lock') + state.pop("_cookies_lock") return state def __setstate__(self, state): """Unlike a normal CookieJar, this class is pickleable.""" self.__dict__.update(state) - if '_cookies_lock' not in self.__dict__: + if "_cookies_lock" not in self.__dict__: self._cookies_lock = threading.RLock() def copy(self): @@ -427,7 +441,7 @@ def _copy_cookie_jar(jar): if jar is None: return None - if hasattr(jar, 'copy'): + if hasattr(jar, "copy"): # We're dealing with an instance of RequestsCookieJar return jar.copy() # We're dealing with a generic CookieJar instance @@ -445,31 +459,32 @@ def create_cookie(name, value, **kwargs): and sent on every request (this is sometimes called a "supercookie"). """ result = { - 'version': 0, - 'name': name, - 'value': value, - 'port': None, - 'domain': '', - 'path': '/', - 'secure': False, - 'expires': None, - 'discard': True, - 'comment': None, - 'comment_url': None, - 'rest': {'HttpOnly': None}, - 'rfc2109': False, + "version": 0, + "name": name, + "value": value, + "port": None, + "domain": "", + "path": "/", + "secure": False, + "expires": None, + "discard": True, + "comment": None, + "comment_url": None, + "rest": {"HttpOnly": None}, + "rfc2109": False, } badargs = set(kwargs) - set(result) if badargs: - err = 'create_cookie() got unexpected keyword arguments: %s' - raise TypeError(err % list(badargs)) + raise TypeError( + f"create_cookie() got unexpected keyword arguments: {list(badargs)}" + ) result.update(kwargs) - result['port_specified'] = bool(result['port']) - result['domain_specified'] = bool(result['domain']) - result['domain_initial_dot'] = result['domain'].startswith('.') - result['path_specified'] = bool(result['path']) + result["port_specified"] = bool(result["port"]) + result["domain_specified"] = bool(result["domain"]) + result["domain_initial_dot"] = result["domain"].startswith(".") + result["path_specified"] = bool(result["path"]) return cookielib.Cookie(**result) @@ -478,30 +493,28 @@ def morsel_to_cookie(morsel): """Convert a Morsel object into a Cookie containing the one k/v pair.""" expires = None - if morsel['max-age']: + if morsel["max-age"]: try: - expires = int(time.time() + int(morsel['max-age'])) + expires = int(time.time() + int(morsel["max-age"])) except ValueError: - raise TypeError('max-age: %s must be integer' % morsel['max-age']) - elif morsel['expires']: - time_template = '%a, %d-%b-%Y %H:%M:%S GMT' - expires = calendar.timegm( - time.strptime(morsel['expires'], time_template) - ) + raise TypeError(f"max-age: {morsel['max-age']} must be integer") + elif morsel["expires"]: + time_template = "%a, %d-%b-%Y %H:%M:%S GMT" + expires = calendar.timegm(time.strptime(morsel["expires"], time_template)) return create_cookie( - comment=morsel['comment'], - comment_url=bool(morsel['comment']), + comment=morsel["comment"], + comment_url=bool(morsel["comment"]), discard=False, - domain=morsel['domain'], + domain=morsel["domain"], expires=expires, name=morsel.key, - path=morsel['path'], + path=morsel["path"], port=None, - rest={'HttpOnly': morsel['httponly']}, + rest={"HttpOnly": morsel["httponly"]}, rfc2109=False, - secure=bool(morsel['secure']), + secure=bool(morsel["secure"]), value=morsel.value, - version=morsel['version'] or 0, + version=morsel["version"] or 0, ) @@ -534,11 +547,10 @@ def merge_cookies(cookiejar, cookies): :rtype: CookieJar """ if not isinstance(cookiejar, cookielib.CookieJar): - raise ValueError('You can only merge into CookieJar') + raise ValueError("You can only merge into CookieJar") if isinstance(cookies, dict): - cookiejar = cookiejar_from_dict( - cookies, cookiejar=cookiejar, overwrite=False) + cookiejar = cookiejar_from_dict(cookies, cookiejar=cookiejar, overwrite=False) elif isinstance(cookies, cookielib.CookieJar): try: cookiejar.update(cookies) diff --git a/libs/common/requests/exceptions.py b/libs/common/requests/exceptions.py index 0e9c820c..e1cedf88 100644 --- a/libs/common/requests/exceptions.py +++ b/libs/common/requests/exceptions.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.exceptions ~~~~~~~~~~~~~~~~~~~ @@ -8,6 +6,8 @@ This module contains the set of Requests' exceptions. """ from urllib3.exceptions import HTTPError as BaseHTTPError +from .compat import JSONDecodeError as CompatJSONDecodeError + class RequestException(IOError): """There was an ambiguous exception that occurred while handling your @@ -16,13 +16,30 @@ class RequestException(IOError): def __init__(self, *args, **kwargs): """Initialize RequestException with `request` and `response` objects.""" - response = kwargs.pop('response', None) + response = kwargs.pop("response", None) self.response = response - self.request = kwargs.pop('request', None) - if (response is not None and not self.request and - hasattr(response, 'request')): + self.request = kwargs.pop("request", None) + if response is not None and not self.request and hasattr(response, "request"): self.request = self.response.request - super(RequestException, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) + + +class InvalidJSONError(RequestException): + """A JSON error occurred.""" + + +class JSONDecodeError(InvalidJSONError, CompatJSONDecodeError): + """Couldn't decode the text into json""" + + def __init__(self, *args, **kwargs): + """ + Construct the JSONDecodeError instance first with all + args. Then use it's args to construct the IOError so that + the json specific args aren't used as IOError specific args + and the error message from JSONDecodeError is preserved. + """ + CompatJSONDecodeError.__init__(self, *args) + InvalidJSONError.__init__(self, *self.args, **kwargs) class HTTPError(RequestException): @@ -70,11 +87,11 @@ class TooManyRedirects(RequestException): class MissingSchema(RequestException, ValueError): - """The URL schema (e.g. http or https) is missing.""" + """The URL scheme (e.g. http or https) is missing.""" class InvalidSchema(RequestException, ValueError): - """See defaults.py for valid schemas.""" + """The URL scheme provided is either invalid or unsupported.""" class InvalidURL(RequestException, ValueError): @@ -108,6 +125,7 @@ class RetryError(RequestException): class UnrewindableBodyError(RequestException): """Requests encountered an error when trying to rewind a body.""" + # Warnings diff --git a/libs/common/requests/help.py b/libs/common/requests/help.py index e53d35ef..8fbcd656 100644 --- a/libs/common/requests/help.py +++ b/libs/common/requests/help.py @@ -1,17 +1,25 @@ """Module containing bug report helper(s).""" -from __future__ import print_function import json import platform -import sys import ssl +import sys import idna import urllib3 -import chardet from . import __version__ as requests_version +try: + import charset_normalizer +except ImportError: + charset_normalizer = None + +try: + import chardet +except ImportError: + chardet = None + try: from urllib3.contrib import pyopenssl except ImportError: @@ -19,16 +27,16 @@ except ImportError: OpenSSL = None cryptography = None else: - import OpenSSL import cryptography + import OpenSSL def _implementation(): """Return a dict with the Python implementation and version. Provide both the name and the version of the Python implementation - currently running. For example, on CPython 2.7.5 it will return - {'name': 'CPython', 'version': '2.7.5'}. + currently running. For example, on CPython 3.10.3 it will return + {'name': 'CPython', 'version': '3.10.3'}. This function works best on CPython and PyPy: in particular, it probably doesn't work for Jython or IronPython. Future investigation should be done @@ -36,76 +44,83 @@ def _implementation(): """ implementation = platform.python_implementation() - if implementation == 'CPython': + if implementation == "CPython": implementation_version = platform.python_version() - elif implementation == 'PyPy': - implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, - sys.pypy_version_info.minor, - sys.pypy_version_info.micro) - if sys.pypy_version_info.releaselevel != 'final': - implementation_version = ''.join([ - implementation_version, sys.pypy_version_info.releaselevel - ]) - elif implementation == 'Jython': + elif implementation == "PyPy": + implementation_version = "{}.{}.{}".format( + sys.pypy_version_info.major, + sys.pypy_version_info.minor, + sys.pypy_version_info.micro, + ) + if sys.pypy_version_info.releaselevel != "final": + implementation_version = "".join( + [implementation_version, sys.pypy_version_info.releaselevel] + ) + elif implementation == "Jython": implementation_version = platform.python_version() # Complete Guess - elif implementation == 'IronPython': + elif implementation == "IronPython": implementation_version = platform.python_version() # Complete Guess else: - implementation_version = 'Unknown' + implementation_version = "Unknown" - return {'name': implementation, 'version': implementation_version} + return {"name": implementation, "version": implementation_version} def info(): """Generate information for a bug report.""" try: platform_info = { - 'system': platform.system(), - 'release': platform.release(), + "system": platform.system(), + "release": platform.release(), } - except IOError: + except OSError: platform_info = { - 'system': 'Unknown', - 'release': 'Unknown', + "system": "Unknown", + "release": "Unknown", } implementation_info = _implementation() - urllib3_info = {'version': urllib3.__version__} - chardet_info = {'version': chardet.__version__} + urllib3_info = {"version": urllib3.__version__} + charset_normalizer_info = {"version": None} + chardet_info = {"version": None} + if charset_normalizer: + charset_normalizer_info = {"version": charset_normalizer.__version__} + if chardet: + chardet_info = {"version": chardet.__version__} pyopenssl_info = { - 'version': None, - 'openssl_version': '', + "version": None, + "openssl_version": "", } if OpenSSL: pyopenssl_info = { - 'version': OpenSSL.__version__, - 'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER, + "version": OpenSSL.__version__, + "openssl_version": f"{OpenSSL.SSL.OPENSSL_VERSION_NUMBER:x}", } cryptography_info = { - 'version': getattr(cryptography, '__version__', ''), + "version": getattr(cryptography, "__version__", ""), } idna_info = { - 'version': getattr(idna, '__version__', ''), + "version": getattr(idna, "__version__", ""), } system_ssl = ssl.OPENSSL_VERSION_NUMBER - system_ssl_info = { - 'version': '%x' % system_ssl if system_ssl is not None else '' - } + system_ssl_info = {"version": f"{system_ssl:x}" if system_ssl is not None else ""} return { - 'platform': platform_info, - 'implementation': implementation_info, - 'system_ssl': system_ssl_info, - 'using_pyopenssl': pyopenssl is not None, - 'pyOpenSSL': pyopenssl_info, - 'urllib3': urllib3_info, - 'chardet': chardet_info, - 'cryptography': cryptography_info, - 'idna': idna_info, - 'requests': { - 'version': requests_version, + "platform": platform_info, + "implementation": implementation_info, + "system_ssl": system_ssl_info, + "using_pyopenssl": pyopenssl is not None, + "using_charset_normalizer": chardet is None, + "pyOpenSSL": pyopenssl_info, + "urllib3": urllib3_info, + "chardet": chardet_info, + "charset_normalizer": charset_normalizer_info, + "cryptography": cryptography_info, + "idna": idna_info, + "requests": { + "version": requests_version, }, } @@ -115,5 +130,5 @@ def main(): print(json.dumps(info(), sort_keys=True, indent=2)) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/libs/common/requests/hooks.py b/libs/common/requests/hooks.py index 7a51f212..d181ba2e 100644 --- a/libs/common/requests/hooks.py +++ b/libs/common/requests/hooks.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.hooks ~~~~~~~~~~~~~~ @@ -11,12 +9,13 @@ Available hooks: ``response``: The response generated from a Request. """ -HOOKS = ['response'] +HOOKS = ["response"] def default_hooks(): return {event: [] for event in HOOKS} + # TODO: response is the only one @@ -25,7 +24,7 @@ def dispatch_hook(key, hooks, hook_data, **kwargs): hooks = hooks or {} hooks = hooks.get(key) if hooks: - if hasattr(hooks, '__call__'): + if hasattr(hooks, "__call__"): hooks = [hooks] for hook in hooks: _hook_data = hook(hook_data, **kwargs) diff --git a/libs/common/requests/models.py b/libs/common/requests/models.py index ec2edc20..3cd49f5b 100644 --- a/libs/common/requests/models.py +++ b/libs/common/requests/models.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.models ~~~~~~~~~~~~~~~ @@ -8,46 +6,72 @@ This module contains the primary objects that power Requests. """ import datetime -import sys # Import encoding now, to avoid implicit import later. # Implicit import within threads may cause LookupError when standard library is in a ZIP, # such as in Embedded Python. See https://github.com/psf/requests/issues/3578. -import encodings.idna +import encodings.idna # noqa: F401 +from io import UnsupportedOperation +from urllib3.exceptions import ( + DecodeError, + LocationParseError, + ProtocolError, + ReadTimeoutError, + SSLError, +) from urllib3.fields import RequestField from urllib3.filepost import encode_multipart_formdata from urllib3.util import parse_url -from urllib3.exceptions import ( - DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) -from io import UnsupportedOperation -from .hooks import default_hooks -from .structures import CaseInsensitiveDict - -from .auth import HTTPBasicAuth -from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar -from .exceptions import ( - HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, - ContentDecodingError, ConnectionError, StreamConsumedError) from ._internal_utils import to_native_string, unicode_is_ascii -from .utils import ( - guess_filename, get_auth_from_url, requote_uri, - stream_decode_response_unicode, to_key_val_list, parse_header_links, - iter_slices, guess_json_utf, super_len, check_header_validity) +from .auth import HTTPBasicAuth from .compat import ( - Callable, Mapping, - cookielib, urlunparse, urlsplit, urlencode, str, bytes, - is_py2, chardet, builtin_str, basestring) + Callable, + JSONDecodeError, + Mapping, + basestring, + builtin_str, + chardet, + cookielib, +) from .compat import json as complexjson +from .compat import urlencode, urlsplit, urlunparse +from .cookies import _copy_cookie_jar, cookiejar_from_dict, get_cookie_header +from .exceptions import ( + ChunkedEncodingError, + ConnectionError, + ContentDecodingError, + HTTPError, + InvalidJSONError, + InvalidURL, +) +from .exceptions import JSONDecodeError as RequestsJSONDecodeError +from .exceptions import MissingSchema +from .exceptions import SSLError as RequestsSSLError +from .exceptions import StreamConsumedError +from .hooks import default_hooks from .status_codes import codes +from .structures import CaseInsensitiveDict +from .utils import ( + check_header_validity, + get_auth_from_url, + guess_filename, + guess_json_utf, + iter_slices, + parse_header_links, + requote_uri, + stream_decode_response_unicode, + super_len, + to_key_val_list, +) #: The set of HTTP status codes that indicate an automatically #: processable redirect. REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 codes.temporary_redirect, # 307 codes.permanent_redirect, # 308 ) @@ -57,7 +81,7 @@ CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -class RequestEncodingMixin(object): +class RequestEncodingMixin: @property def path_url(self): """Build the path URL to use.""" @@ -68,16 +92,16 @@ class RequestEncodingMixin(object): path = p.path if not path: - path = '/' + path = "/" url.append(path) query = p.query if query: - url.append('?') + url.append("?") url.append(query) - return ''.join(url) + return "".join(url) @staticmethod def _encode_params(data): @@ -90,18 +114,21 @@ class RequestEncodingMixin(object): if isinstance(data, (str, bytes)): return data - elif hasattr(data, 'read'): + elif hasattr(data, "read"): return data - elif hasattr(data, '__iter__'): + elif hasattr(data, "__iter__"): result = [] for k, vs in to_key_val_list(data): - if isinstance(vs, basestring) or not hasattr(vs, '__iter__'): + if isinstance(vs, basestring) or not hasattr(vs, "__iter__"): vs = [vs] for v in vs: if v is not None: result.append( - (k.encode('utf-8') if isinstance(k, str) else k, - v.encode('utf-8') if isinstance(v, str) else v)) + ( + k.encode("utf-8") if isinstance(k, str) else k, + v.encode("utf-8") if isinstance(v, str) else v, + ) + ) return urlencode(result, doseq=True) else: return data @@ -116,7 +143,7 @@ class RequestEncodingMixin(object): The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype) or 4-tuples (filename, fileobj, contentype, custom_headers). """ - if (not files): + if not files: raise ValueError("Files must be provided.") elif isinstance(data, basestring): raise ValueError("Data must not be a string.") @@ -126,7 +153,7 @@ class RequestEncodingMixin(object): files = to_key_val_list(files or {}) for field, val in fields: - if isinstance(val, basestring) or not hasattr(val, '__iter__'): + if isinstance(val, basestring) or not hasattr(val, "__iter__"): val = [val] for v in val: if v is not None: @@ -135,8 +162,13 @@ class RequestEncodingMixin(object): v = str(v) new_fields.append( - (field.decode('utf-8') if isinstance(field, bytes) else field, - v.encode('utf-8') if isinstance(v, str) else v)) + ( + field.decode("utf-8") + if isinstance(field, bytes) + else field, + v.encode("utf-8") if isinstance(v, str) else v, + ) + ) for (k, v) in files: # support for explicit filename @@ -155,7 +187,7 @@ class RequestEncodingMixin(object): if isinstance(fp, (str, bytes, bytearray)): fdata = fp - elif hasattr(fp, 'read'): + elif hasattr(fp, "read"): fdata = fp.read() elif fp is None: continue @@ -171,16 +203,16 @@ class RequestEncodingMixin(object): return body, content_type -class RequestHooksMixin(object): +class RequestHooksMixin: def register_hook(self, event, hook): """Properly register a hook.""" if event not in self.hooks: - raise ValueError('Unsupported event specified, with event name "%s"' % (event)) + raise ValueError(f'Unsupported event specified, with event name "{event}"') if isinstance(hook, Callable): self.hooks[event].append(hook) - elif hasattr(hook, '__iter__'): + elif hasattr(hook, "__iter__"): self.hooks[event].extend(h for h in hook if isinstance(h, Callable)) def deregister_hook(self, event, hook): @@ -223,9 +255,19 @@ class Request(RequestHooksMixin): """ - def __init__(self, - method=None, url=None, headers=None, files=None, data=None, - params=None, auth=None, cookies=None, hooks=None, json=None): + def __init__( + self, + method=None, + url=None, + headers=None, + files=None, + data=None, + params=None, + auth=None, + cookies=None, + hooks=None, + json=None, + ): # Default empty dicts for dict params. data = [] if data is None else data @@ -249,7 +291,7 @@ class Request(RequestHooksMixin): self.cookies = cookies def __repr__(self): - return '' % (self.method) + return f"" def prepare(self): """Constructs a :class:`PreparedRequest ` for transmission and returns it.""" @@ -307,9 +349,19 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): #: integer denoting starting position of a readable file-like body. self._body_position = None - def prepare(self, - method=None, url=None, headers=None, files=None, data=None, - params=None, auth=None, cookies=None, hooks=None, json=None): + def prepare( + self, + method=None, + url=None, + headers=None, + files=None, + data=None, + params=None, + auth=None, + cookies=None, + hooks=None, + json=None, + ): """Prepares the entire request with the given parameters.""" self.prepare_method(method) @@ -326,7 +378,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_hooks(hooks) def __repr__(self): - return '' % (self.method) + return f"" def copy(self): p = PreparedRequest() @@ -350,7 +402,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): import idna try: - host = idna.encode(host, uts46=True).decode('utf-8') + host = idna.encode(host, uts46=True).decode("utf-8") except idna.IDNAError: raise UnicodeError return host @@ -363,9 +415,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): #: on python 3.x. #: https://github.com/psf/requests/pull/2238 if isinstance(url, bytes): - url = url.decode('utf8') + url = url.decode("utf8") else: - url = unicode(url) if is_py2 else str(url) + url = str(url) # Remove leading whitespaces from url url = url.lstrip() @@ -373,7 +425,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. - if ':' in url and not url.lower().startswith('http'): + if ":" in url and not url.lower().startswith("http"): self.url = url return @@ -384,13 +436,13 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): raise InvalidURL(*e.args) if not scheme: - error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") - error = error.format(to_native_string(url, 'utf8')) - - raise MissingSchema(error) + raise MissingSchema( + f"Invalid URL {url!r}: No scheme supplied. " + f"Perhaps you meant http://{url}?" + ) if not host: - raise InvalidURL("Invalid URL %r: No host supplied" % url) + raise InvalidURL(f"Invalid URL {url!r}: No host supplied") # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA @@ -400,33 +452,21 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): try: host = self._get_idna_encoded_host(host) except UnicodeError: - raise InvalidURL('URL has an invalid label.') - elif host.startswith(u'*'): - raise InvalidURL('URL has an invalid label.') + raise InvalidURL("URL has an invalid label.") + elif host.startswith(("*", ".")): + raise InvalidURL("URL has an invalid label.") # Carefully reconstruct the network location - netloc = auth or '' + netloc = auth or "" if netloc: - netloc += '@' + netloc += "@" netloc += host if port: - netloc += ':' + str(port) + netloc += f":{port}" # Bare domains aren't valid URLs. if not path: - path = '/' - - if is_py2: - if isinstance(scheme, str): - scheme = scheme.encode('utf-8') - if isinstance(netloc, str): - netloc = netloc.encode('utf-8') - if isinstance(path, str): - path = path.encode('utf-8') - if isinstance(query, str): - query = query.encode('utf-8') - if isinstance(fragment, str): - fragment = fragment.encode('utf-8') + path = "/" if isinstance(params, (str, bytes)): params = to_native_string(params) @@ -434,7 +474,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): enc_params = self._encode_params(params) if enc_params: if query: - query = '%s&%s' % (query, enc_params) + query = f"{query}&{enc_params}" else: query = enc_params @@ -465,15 +505,22 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if not data and json is not None: # urllib3 requires a bytes-like body. Python 2's json.dumps # provides this natively, but Python 3 gives a Unicode string. - content_type = 'application/json' - body = complexjson.dumps(json) - if not isinstance(body, bytes): - body = body.encode('utf-8') + content_type = "application/json" - is_stream = all([ - hasattr(data, '__iter__'), - not isinstance(data, (basestring, list, tuple, Mapping)) - ]) + try: + body = complexjson.dumps(json, allow_nan=False) + except ValueError as ve: + raise InvalidJSONError(ve, request=self) + + if not isinstance(body, bytes): + body = body.encode("utf-8") + + is_stream = all( + [ + hasattr(data, "__iter__"), + not isinstance(data, (basestring, list, tuple, Mapping)), + ] + ) if is_stream: try: @@ -483,24 +530,26 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): body = data - if getattr(body, 'tell', None) is not None: + if getattr(body, "tell", None) is not None: # Record the current file position before reading. # This will allow us to rewind a file in the event # of a redirect. try: self._body_position = body.tell() - except (IOError, OSError): + except OSError: # This differentiates from None, allowing us to catch # a failed `tell()` later when trying to rewind the body self._body_position = object() if files: - raise NotImplementedError('Streamed bodies and files are mutually exclusive.') + raise NotImplementedError( + "Streamed bodies and files are mutually exclusive." + ) if length: - self.headers['Content-Length'] = builtin_str(length) + self.headers["Content-Length"] = builtin_str(length) else: - self.headers['Transfer-Encoding'] = 'chunked' + self.headers["Transfer-Encoding"] = "chunked" else: # Multi-part file uploads. if files: @@ -508,16 +557,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): else: if data: body = self._encode_params(data) - if isinstance(data, basestring) or hasattr(data, 'read'): + if isinstance(data, basestring) or hasattr(data, "read"): content_type = None else: - content_type = 'application/x-www-form-urlencoded' + content_type = "application/x-www-form-urlencoded" self.prepare_content_length(body) # Add content-type if it wasn't explicitly provided. - if content_type and ('content-type' not in self.headers): - self.headers['Content-Type'] = content_type + if content_type and ("content-type" not in self.headers): + self.headers["Content-Type"] = content_type self.body = body @@ -528,13 +577,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if length: # If length exists, set it. Otherwise, we fallback # to Transfer-Encoding: chunked. - self.headers['Content-Length'] = builtin_str(length) - elif self.method not in ('GET', 'HEAD') and self.headers.get('Content-Length') is None: + self.headers["Content-Length"] = builtin_str(length) + elif ( + self.method not in ("GET", "HEAD") + and self.headers.get("Content-Length") is None + ): # Set Content-Length to 0 for methods that can have a body # but don't provide one. (i.e. not GET or HEAD) - self.headers['Content-Length'] = '0' + self.headers["Content-Length"] = "0" - def prepare_auth(self, auth, url=''): + def prepare_auth(self, auth, url=""): """Prepares the given HTTP auth data.""" # If no Auth is explicitly provided, extract it from the URL first. @@ -574,7 +626,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): cookie_header = get_cookie_header(self._cookies, self) if cookie_header is not None: - self.headers['Cookie'] = cookie_header + self.headers["Cookie"] = cookie_header def prepare_hooks(self, hooks): """Prepares the given hooks.""" @@ -586,14 +638,22 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.register_hook(event, hooks[event]) -class Response(object): +class Response: """The :class:`Response ` object, which contains a server's response to an HTTP request. """ __attrs__ = [ - '_content', 'status_code', 'headers', 'url', 'history', - 'encoding', 'reason', 'cookies', 'elapsed', 'request' + "_content", + "status_code", + "headers", + "url", + "history", + "encoding", + "reason", + "cookies", + "elapsed", + "request", ] def __init__(self): @@ -662,11 +722,11 @@ class Response(object): setattr(self, name, value) # pickled objects do not have .raw - setattr(self, '_content_consumed', True) - setattr(self, 'raw', None) + setattr(self, "_content_consumed", True) + setattr(self, "raw", None) def __repr__(self): - return '' % (self.status_code) + return f"" def __bool__(self): """Returns True if :attr:`status_code` is less than 400. @@ -712,12 +772,15 @@ class Response(object): """True if this Response is a well-formed HTTP redirect that could have been processed automatically (by :meth:`Session.resolve_redirects`). """ - return ('location' in self.headers and self.status_code in REDIRECT_STATI) + return "location" in self.headers and self.status_code in REDIRECT_STATI @property def is_permanent_redirect(self): """True if this Response one of the permanent versions of redirect.""" - return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) + return "location" in self.headers and self.status_code in ( + codes.moved_permanently, + codes.permanent_redirect, + ) @property def next(self): @@ -726,8 +789,8 @@ class Response(object): @property def apparent_encoding(self): - """The apparent encoding, provided by the chardet library.""" - return chardet.detect(self.content)['encoding'] + """The apparent encoding, provided by the charset_normalizer or chardet libraries.""" + return chardet.detect(self.content)["encoding"] def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the @@ -748,16 +811,17 @@ class Response(object): def generate(): # Special case for urllib3. - if hasattr(self.raw, 'stream'): + if hasattr(self.raw, "stream"): try: - for chunk in self.raw.stream(chunk_size, decode_content=True): - yield chunk + yield from self.raw.stream(chunk_size, decode_content=True) except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) + except SSLError as e: + raise RequestsSSLError(e) else: # Standard file-like object. while True: @@ -771,7 +835,9 @@ class Response(object): if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): - raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) + raise TypeError( + f"chunk_size must be an int, it is instead a {type(chunk_size)}." + ) # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) @@ -784,7 +850,9 @@ class Response(object): return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None): + def iter_lines( + self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None + ): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. @@ -794,7 +862,9 @@ class Response(object): pending = None - for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): + for chunk in self.iter_content( + chunk_size=chunk_size, decode_unicode=decode_unicode + ): if pending is not None: chunk = pending + chunk @@ -809,8 +879,7 @@ class Response(object): else: pending = None - for line in lines: - yield line + yield from lines if pending is not None: yield pending @@ -822,13 +891,12 @@ class Response(object): if self._content is False: # Read the contents. if self._content_consumed: - raise RuntimeError( - 'The content for this response was already consumed') + raise RuntimeError("The content for this response was already consumed") if self.status_code == 0 or self.raw is None: self._content = None else: - self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b'' + self._content = b"".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b"" self._content_consumed = True # don't need to release the connection; that's been handled by urllib3 @@ -840,7 +908,7 @@ class Response(object): """Content of the response, in unicode. If Response.encoding is None, encoding will be guessed using - ``chardet``. + ``charset_normalizer`` or ``chardet``. The encoding of the response content is determined based solely on HTTP headers, following RFC 2616 to the letter. If you can take advantage of @@ -853,7 +921,7 @@ class Response(object): encoding = self.encoding if not self.content: - return str('') + return "" # Fallback to auto-detected encoding. if self.encoding is None: @@ -861,7 +929,7 @@ class Response(object): # Decode unicode from given encoding. try: - content = str(self.content, encoding, errors='replace') + content = str(self.content, encoding, errors="replace") except (LookupError, TypeError): # A LookupError is raised if the encoding was not found which could # indicate a misspelling or similar mistake. @@ -869,7 +937,7 @@ class Response(object): # A TypeError can be raised if encoding is None # # So we try blindly encoding. - content = str(self.content, errors='replace') + content = str(self.content, errors="replace") return content @@ -877,67 +945,77 @@ class Response(object): r"""Returns the json-encoded content of a response, if any. :param \*\*kwargs: Optional arguments that ``json.loads`` takes. - :raises ValueError: If the response body does not contain valid json. + :raises requests.exceptions.JSONDecodeError: If the response body does not + contain valid json. """ if not self.encoding and self.content and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or - # decoding fails, fall back to `self.text` (using chardet to make + # decoding fails, fall back to `self.text` (using charset_normalizer to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: try: - return complexjson.loads( - self.content.decode(encoding), **kwargs - ) + return complexjson.loads(self.content.decode(encoding), **kwargs) except UnicodeDecodeError: # Wrong UTF codec detected; usually because it's not UTF-8 # but some other 8-bit codec. This is an RFC violation, # and the server didn't bother to tell us what codec *was* # used. pass - return complexjson.loads(self.text, **kwargs) + except JSONDecodeError as e: + raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) + + try: + return complexjson.loads(self.text, **kwargs) + except JSONDecodeError as e: + # Catch JSON-related errors and raise as requests.JSONDecodeError + # This aliases json.JSONDecodeError and simplejson.JSONDecodeError + raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) @property def links(self): """Returns the parsed header links of the response, if any.""" - header = self.headers.get('link') + header = self.headers.get("link") - # l = MultiDict() - l = {} + resolved_links = {} if header: links = parse_header_links(header) for link in links: - key = link.get('rel') or link.get('url') - l[key] = link + key = link.get("rel") or link.get("url") + resolved_links[key] = link - return l + return resolved_links def raise_for_status(self): """Raises :class:`HTTPError`, if one occurred.""" - http_error_msg = '' + http_error_msg = "" if isinstance(self.reason, bytes): # We attempt to decode utf-8 first because some servers # choose to localize their reason strings. If the string # isn't utf-8, we fall back to iso-8859-1 for all other # encodings. (See PR #3538) try: - reason = self.reason.decode('utf-8') + reason = self.reason.decode("utf-8") except UnicodeDecodeError: - reason = self.reason.decode('iso-8859-1') + reason = self.reason.decode("iso-8859-1") else: reason = self.reason if 400 <= self.status_code < 500: - http_error_msg = u'%s Client Error: %s for url: %s' % (self.status_code, reason, self.url) + http_error_msg = ( + f"{self.status_code} Client Error: {reason} for url: {self.url}" + ) elif 500 <= self.status_code < 600: - http_error_msg = u'%s Server Error: %s for url: %s' % (self.status_code, reason, self.url) + http_error_msg = ( + f"{self.status_code} Server Error: {reason} for url: {self.url}" + ) if http_error_msg: raise HTTPError(http_error_msg, response=self) @@ -951,6 +1029,6 @@ class Response(object): if not self._content_consumed: self.raw.close() - release_conn = getattr(self.raw, 'release_conn', None) + release_conn = getattr(self.raw, "release_conn", None) if release_conn is not None: release_conn() diff --git a/libs/common/requests/packages.py b/libs/common/requests/packages.py index 7232fe0f..77c45c9e 100644 --- a/libs/common/requests/packages.py +++ b/libs/common/requests/packages.py @@ -1,14 +1,28 @@ import sys +try: + import chardet +except ImportError: + import warnings + + import charset_normalizer as chardet + + warnings.filterwarnings("ignore", "Trying to detect", module="charset_normalizer") + # This code exists for backwards compatibility reasons. # I don't like it either. Just look the other way. :) -for package in ('urllib3', 'idna', 'chardet'): +for package in ("urllib3", "idna"): locals()[package] = __import__(package) # This traversal is apparently necessary such that the identities are # preserved (requests.packages.urllib3.* is urllib3.*) for mod in list(sys.modules): - if mod == package or mod.startswith(package + '.'): - sys.modules['requests.packages.' + mod] = sys.modules[mod] + if mod == package or mod.startswith(f"{package}."): + sys.modules[f"requests.packages.{mod}"] = sys.modules[mod] +target = chardet.__name__ +for mod in list(sys.modules): + if mod == target or mod.startswith(f"{target}."): + target = target.replace(target, "chardet") + sys.modules[f"requests.packages.{target}"] = sys.modules[mod] # Kinda cool, though, right? diff --git a/libs/common/requests/sessions.py b/libs/common/requests/sessions.py index 45ab8a5d..6cb3b4da 100644 --- a/libs/common/requests/sessions.py +++ b/libs/common/requests/sessions.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.sessions ~~~~~~~~~~~~~~~~~ @@ -10,39 +8,52 @@ requests (cookies, auth, proxies). import os import sys import time -from datetime import timedelta from collections import OrderedDict +from datetime import timedelta -from .auth import _basic_auth_str -from .compat import cookielib, is_py3, urljoin, urlparse, Mapping -from .cookies import ( - cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) -from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT -from .hooks import default_hooks, dispatch_hook from ._internal_utils import to_native_string -from .utils import to_key_val_list, default_headers, DEFAULT_PORTS -from .exceptions import ( - TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) - -from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter - -from .utils import ( - requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, - get_auth_from_url, rewind_body +from .auth import _basic_auth_str +from .compat import Mapping, cookielib, urljoin, urlparse +from .cookies import ( + RequestsCookieJar, + cookiejar_from_dict, + extract_cookies_to_jar, + merge_cookies, ) - -from .status_codes import codes +from .exceptions import ( + ChunkedEncodingError, + ContentDecodingError, + InvalidSchema, + TooManyRedirects, +) +from .hooks import default_hooks, dispatch_hook # formerly defined here, reexposed here for backward compatibility -from .models import REDIRECT_STATI +from .models import ( # noqa: F401 + DEFAULT_REDIRECT_LIMIT, + REDIRECT_STATI, + PreparedRequest, + Request, +) +from .status_codes import codes +from .structures import CaseInsensitiveDict +from .utils import ( # noqa: F401 + DEFAULT_PORTS, + default_headers, + get_auth_from_url, + get_environ_proxies, + get_netrc_auth, + requote_uri, + resolve_proxies, + rewind_body, + should_bypass_proxies, + to_key_val_list, +) # Preferred clock, based on which one is more accurate on a given system. -if sys.platform == 'win32': - try: # Python 3.4+ - preferred_clock = time.perf_counter - except AttributeError: # Earlier than Python 3. - preferred_clock = time.clock +if sys.platform == "win32": + preferred_clock = time.perf_counter else: preferred_clock = time.time @@ -61,8 +72,7 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): # Bypass if not a dictionary (e.g. verify) if not ( - isinstance(session_setting, Mapping) and - isinstance(request_setting, Mapping) + isinstance(session_setting, Mapping) and isinstance(request_setting, Mapping) ): return request_setting @@ -84,17 +94,16 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): This is necessary because when request_hooks == {'response': []}, the merge breaks Session hooks entirely. """ - if session_hooks is None or session_hooks.get('response') == []: + if session_hooks is None or session_hooks.get("response") == []: return request_hooks - if request_hooks is None or request_hooks.get('response') == []: + if request_hooks is None or request_hooks.get("response") == []: return session_hooks return merge_setting(request_hooks, session_hooks, dict_class) -class SessionRedirectMixin(object): - +class SessionRedirectMixin: def get_redirect_target(self, resp): """Receives a Response. Returns a redirect URI or ``None``""" # Due to the nature of how requests processes redirects this method will @@ -104,16 +113,15 @@ class SessionRedirectMixin(object): # to cache the redirect location onto the response object as a private # attribute. if resp.is_redirect: - location = resp.headers['location'] + location = resp.headers["location"] # Currently the underlying http module on py3 decode headers # in latin1, but empirical evidence suggests that latin1 is very # rarely used with non-ASCII characters in HTTP headers. # It is more likely to get UTF8 header rather than latin1. # This causes incorrect handling of UTF8 encoded location headers. # To solve this, we re-encode the location in latin1. - if is_py3: - location = location.encode('latin1') - return to_native_string(location, 'utf8') + location = location.encode("latin1") + return to_native_string(location, "utf8") return None def should_strip_auth(self, old_url, new_url): @@ -126,23 +134,40 @@ class SessionRedirectMixin(object): # ports. This isn't specified by RFC 7235, but is kept to avoid # breaking backwards compatibility with older versions of requests # that allowed any redirects on the same host. - if (old_parsed.scheme == 'http' and old_parsed.port in (80, None) - and new_parsed.scheme == 'https' and new_parsed.port in (443, None)): + if ( + old_parsed.scheme == "http" + and old_parsed.port in (80, None) + and new_parsed.scheme == "https" + and new_parsed.port in (443, None) + ): return False # Handle default port usage corresponding to scheme. changed_port = old_parsed.port != new_parsed.port changed_scheme = old_parsed.scheme != new_parsed.scheme default_port = (DEFAULT_PORTS.get(old_parsed.scheme, None), None) - if (not changed_scheme and old_parsed.port in default_port - and new_parsed.port in default_port): + if ( + not changed_scheme + and old_parsed.port in default_port + and new_parsed.port in default_port + ): return False # Standard case: root URI must match return changed_port or changed_scheme - def resolve_redirects(self, resp, req, stream=False, timeout=None, - verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): + def resolve_redirects( + self, + resp, + req, + stream=False, + timeout=None, + verify=True, + cert=None, + proxies=None, + yield_requests=False, + **adapter_kwargs, + ): """Receives a Response. Returns a generator of Responses or Requests.""" hist = [] # keep track of history @@ -163,19 +188,21 @@ class SessionRedirectMixin(object): resp.raw.read(decode_content=False) if len(resp.history) >= self.max_redirects: - raise TooManyRedirects('Exceeded {} redirects.'.format(self.max_redirects), response=resp) + raise TooManyRedirects( + f"Exceeded {self.max_redirects} redirects.", response=resp + ) # Release the connection back into the pool. resp.close() # Handle redirection without scheme (see: RFC 1808 Section 4) - if url.startswith('//'): + if url.startswith("//"): parsed_rurl = urlparse(resp.url) - url = ':'.join([to_native_string(parsed_rurl.scheme), url]) + url = ":".join([to_native_string(parsed_rurl.scheme), url]) # Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2) parsed = urlparse(url) - if parsed.fragment == '' and previous_fragment: + if parsed.fragment == "" and previous_fragment: parsed = parsed._replace(fragment=previous_fragment) elif parsed.fragment: previous_fragment = parsed.fragment @@ -194,15 +221,18 @@ class SessionRedirectMixin(object): self.rebuild_method(prepared_request, resp) # https://github.com/psf/requests/issues/1084 - if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): + if resp.status_code not in ( + codes.temporary_redirect, + codes.permanent_redirect, + ): # https://github.com/psf/requests/issues/3490 - purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') + purged_headers = ("Content-Length", "Content-Type", "Transfer-Encoding") for header in purged_headers: prepared_request.headers.pop(header, None) prepared_request.body = None headers = prepared_request.headers - headers.pop('Cookie', None) + headers.pop("Cookie", None) # Extract any cookies sent on the response to the cookiejar # in the new request. Because we've mutated our copied prepared @@ -218,9 +248,8 @@ class SessionRedirectMixin(object): # A failed tell() sets `_body_position` to `object()`. This non-None # value ensures `rewindable` will be True, allowing us to raise an # UnrewindableBodyError, instead of hanging the connection. - rewindable = ( - prepared_request._body_position is not None and - ('Content-Length' in headers or 'Transfer-Encoding' in headers) + rewindable = prepared_request._body_position is not None and ( + "Content-Length" in headers or "Transfer-Encoding" in headers ) # Attempt to rewind consumed file-like object. @@ -242,7 +271,7 @@ class SessionRedirectMixin(object): cert=cert, proxies=proxies, allow_redirects=False, - **adapter_kwargs + **adapter_kwargs, ) extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) @@ -259,17 +288,18 @@ class SessionRedirectMixin(object): headers = prepared_request.headers url = prepared_request.url - if 'Authorization' in headers and self.should_strip_auth(response.request.url, url): + if "Authorization" in headers and self.should_strip_auth( + response.request.url, url + ): # If we get redirected to a new host, we should strip out any # authentication headers. - del headers['Authorization'] + del headers["Authorization"] # .netrc might have more auth for us on our new host. new_auth = get_netrc_auth(url) if self.trust_env else None if new_auth is not None: prepared_request.prepare_auth(new_auth) - def rebuild_proxies(self, prepared_request, proxies): """This method re-evaluates the proxy configuration by considering the environment variables. If we are redirected to a URL covered by @@ -282,24 +312,12 @@ class SessionRedirectMixin(object): :rtype: dict """ - proxies = proxies if proxies is not None else {} headers = prepared_request.headers - url = prepared_request.url - scheme = urlparse(url).scheme - new_proxies = proxies.copy() - no_proxy = proxies.get('no_proxy') + scheme = urlparse(prepared_request.url).scheme + new_proxies = resolve_proxies(prepared_request, proxies, self.trust_env) - bypass_proxy = should_bypass_proxies(url, no_proxy=no_proxy) - if self.trust_env and not bypass_proxy: - environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) - - proxy = environ_proxies.get(scheme, environ_proxies.get('all')) - - if proxy: - new_proxies.setdefault(scheme, proxy) - - if 'Proxy-Authorization' in headers: - del headers['Proxy-Authorization'] + if "Proxy-Authorization" in headers: + del headers["Proxy-Authorization"] try: username, password = get_auth_from_url(new_proxies[scheme]) @@ -307,7 +325,7 @@ class SessionRedirectMixin(object): username, password = None, None if username and password: - headers['Proxy-Authorization'] = _basic_auth_str(username, password) + headers["Proxy-Authorization"] = _basic_auth_str(username, password) return new_proxies @@ -318,18 +336,18 @@ class SessionRedirectMixin(object): method = prepared_request.method # https://tools.ietf.org/html/rfc7231#section-6.4.4 - if response.status_code == codes.see_other and method != 'HEAD': - method = 'GET' + if response.status_code == codes.see_other and method != "HEAD": + method = "GET" # Do what the browsers do, despite standards... # First, turn 302s into GETs. - if response.status_code == codes.found and method != 'HEAD': - method = 'GET' + if response.status_code == codes.found and method != "HEAD": + method = "GET" # Second, if a POST is responded to with a 301, turn it into a GET. # This bizarre behaviour is explained in Issue 1704. - if response.status_code == codes.moved and method == 'POST': - method = 'GET' + if response.status_code == codes.moved and method == "POST": + method = "GET" prepared_request.method = method @@ -354,9 +372,18 @@ class Session(SessionRedirectMixin): """ __attrs__ = [ - 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'adapters', 'stream', 'trust_env', - 'max_redirects', + "headers", + "cookies", + "auth", + "proxies", + "hooks", + "params", + "verify", + "cert", + "adapters", + "stream", + "trust_env", + "max_redirects", ] def __init__(self): @@ -418,8 +445,8 @@ class Session(SessionRedirectMixin): # Default connection adapters. self.adapters = OrderedDict() - self.mount('https://', HTTPAdapter()) - self.mount('http://', HTTPAdapter()) + self.mount("https://", HTTPAdapter()) + self.mount("http://", HTTPAdapter()) def __enter__(self): return self @@ -445,7 +472,8 @@ class Session(SessionRedirectMixin): # Merge with session cookies merged_cookies = merge_cookies( - merge_cookies(RequestsCookieJar(), self.cookies), cookies) + merge_cookies(RequestsCookieJar(), self.cookies), cookies + ) # Set environment's basic authentication if not explicitly set. auth = request.auth @@ -459,7 +487,9 @@ class Session(SessionRedirectMixin): files=request.files, data=request.data, json=request.json, - headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), + headers=merge_setting( + request.headers, self.headers, dict_class=CaseInsensitiveDict + ), params=merge_setting(request.params, self.params), auth=merge_setting(auth, self.auth), cookies=merged_cookies, @@ -467,10 +497,25 @@ class Session(SessionRedirectMixin): ) return p - def request(self, method, url, - params=None, data=None, headers=None, cookies=None, files=None, - auth=None, timeout=None, allow_redirects=True, proxies=None, - hooks=None, stream=None, verify=None, cert=None, json=None): + def request( + self, + method, + url, + params=None, + data=None, + headers=None, + cookies=None, + files=None, + auth=None, + timeout=None, + allow_redirects=True, + proxies=None, + hooks=None, + stream=None, + verify=None, + cert=None, + json=None, + ): """Constructs a :class:`Request `, prepares it and sends it. Returns :class:`Response ` object. @@ -506,7 +551,7 @@ class Session(SessionRedirectMixin): ``False``, requests will accept any TLS certificate presented by the server, and will ignore hostname mismatches and/or expired certificates, which will make your application vulnerable to - man-in-the-middle (MitM) attacks. Setting verify to ``False`` + man-in-the-middle (MitM) attacks. Setting verify to ``False`` may be useful during local development or testing. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. @@ -535,8 +580,8 @@ class Session(SessionRedirectMixin): # Send the request. send_kwargs = { - 'timeout': timeout, - 'allow_redirects': allow_redirects, + "timeout": timeout, + "allow_redirects": allow_redirects, } send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) @@ -551,8 +596,8 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return self.request('GET', url, **kwargs) + kwargs.setdefault("allow_redirects", True) + return self.request("GET", url, **kwargs) def options(self, url, **kwargs): r"""Sends a OPTIONS request. Returns :class:`Response` object. @@ -562,8 +607,8 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return self.request('OPTIONS', url, **kwargs) + kwargs.setdefault("allow_redirects", True) + return self.request("OPTIONS", url, **kwargs) def head(self, url, **kwargs): r"""Sends a HEAD request. Returns :class:`Response` object. @@ -573,8 +618,8 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', False) - return self.request('HEAD', url, **kwargs) + kwargs.setdefault("allow_redirects", False) + return self.request("HEAD", url, **kwargs) def post(self, url, data=None, json=None, **kwargs): r"""Sends a POST request. Returns :class:`Response` object. @@ -587,7 +632,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('POST', url, data=data, json=json, **kwargs) + return self.request("POST", url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): r"""Sends a PUT request. Returns :class:`Response` object. @@ -599,7 +644,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('PUT', url, data=data, **kwargs) + return self.request("PUT", url, data=data, **kwargs) def patch(self, url, data=None, **kwargs): r"""Sends a PATCH request. Returns :class:`Response` object. @@ -611,7 +656,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('PATCH', url, data=data, **kwargs) + return self.request("PATCH", url, data=data, **kwargs) def delete(self, url, **kwargs): r"""Sends a DELETE request. Returns :class:`Response` object. @@ -621,7 +666,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('DELETE', url, **kwargs) + return self.request("DELETE", url, **kwargs) def send(self, request, **kwargs): """Send a given PreparedRequest. @@ -630,19 +675,20 @@ class Session(SessionRedirectMixin): """ # Set defaults that the hooks can utilize to ensure they always have # the correct parameters to reproduce the previous request. - kwargs.setdefault('stream', self.stream) - kwargs.setdefault('verify', self.verify) - kwargs.setdefault('cert', self.cert) - kwargs.setdefault('proxies', self.proxies) + kwargs.setdefault("stream", self.stream) + kwargs.setdefault("verify", self.verify) + kwargs.setdefault("cert", self.cert) + if "proxies" not in kwargs: + kwargs["proxies"] = resolve_proxies(request, self.proxies, self.trust_env) # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. if isinstance(request, Request): - raise ValueError('You can only send PreparedRequests.') + raise ValueError("You can only send PreparedRequests.") # Set up variables needed for resolve_redirects and dispatching of hooks - allow_redirects = kwargs.pop('allow_redirects', True) - stream = kwargs.get('stream') + allow_redirects = kwargs.pop("allow_redirects", True) + stream = kwargs.get("stream") hooks = request.hooks # Get the appropriate adapter to use @@ -659,7 +705,7 @@ class Session(SessionRedirectMixin): r.elapsed = timedelta(seconds=elapsed) # Response manipulation hooks - r = dispatch_hook('response', hooks, r, **kwargs) + r = dispatch_hook("response", hooks, r, **kwargs) # Persist cookies if r.history: @@ -689,7 +735,9 @@ class Session(SessionRedirectMixin): # If redirects aren't being followed, store the response on the Request for Response.next(). if not allow_redirects: try: - r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs)) + r._next = next( + self.resolve_redirects(r, request, yield_requests=True, **kwargs) + ) except StopIteration: pass @@ -707,16 +755,19 @@ class Session(SessionRedirectMixin): # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. - no_proxy = proxies.get('no_proxy') if proxies is not None else None + no_proxy = proxies.get("no_proxy") if proxies is not None else None env_proxies = get_environ_proxies(url, no_proxy=no_proxy) for (k, v) in env_proxies.items(): proxies.setdefault(k, v) - # Look for requests environment configuration and be compatible - # with cURL. + # Look for requests environment configuration + # and be compatible with cURL. if verify is True or verify is None: - verify = (os.environ.get('REQUESTS_CA_BUNDLE') or - os.environ.get('CURL_CA_BUNDLE')) + verify = ( + os.environ.get("REQUESTS_CA_BUNDLE") + or os.environ.get("CURL_CA_BUNDLE") + or verify + ) # Merge all the kwargs. proxies = merge_setting(proxies, self.proxies) @@ -724,8 +775,7 @@ class Session(SessionRedirectMixin): verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert) - return {'verify': verify, 'proxies': proxies, 'stream': stream, - 'cert': cert} + return {"proxies": proxies, "stream": stream, "verify": verify, "cert": cert} def get_adapter(self, url): """ @@ -739,7 +789,7 @@ class Session(SessionRedirectMixin): return adapter # Nothing matches :-/ - raise InvalidSchema("No connection adapters were found for {!r}".format(url)) + raise InvalidSchema(f"No connection adapters were found for {url!r}") def close(self): """Closes all adapters and as such the session""" diff --git a/libs/common/requests/status_codes.py b/libs/common/requests/status_codes.py index d80a7cd4..4bd072be 100644 --- a/libs/common/requests/status_codes.py +++ b/libs/common/requests/status_codes.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - r""" The ``codes`` object defines a mapping from common names for HTTP statuses to their numerical codes, accessible either as attributes or as dictionary @@ -23,101 +21,108 @@ the names are allowed. For example, ``codes.ok``, ``codes.OK``, and from .structures import LookupDict _codes = { - # Informational. - 100: ('continue',), - 101: ('switching_protocols',), - 102: ('processing',), - 103: ('checkpoint',), - 122: ('uri_too_long', 'request_uri_too_long'), - 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), - 201: ('created',), - 202: ('accepted',), - 203: ('non_authoritative_info', 'non_authoritative_information'), - 204: ('no_content',), - 205: ('reset_content', 'reset'), - 206: ('partial_content', 'partial'), - 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), - 208: ('already_reported',), - 226: ('im_used',), - + 100: ("continue",), + 101: ("switching_protocols",), + 102: ("processing",), + 103: ("checkpoint",), + 122: ("uri_too_long", "request_uri_too_long"), + 200: ("ok", "okay", "all_ok", "all_okay", "all_good", "\\o/", "✓"), + 201: ("created",), + 202: ("accepted",), + 203: ("non_authoritative_info", "non_authoritative_information"), + 204: ("no_content",), + 205: ("reset_content", "reset"), + 206: ("partial_content", "partial"), + 207: ("multi_status", "multiple_status", "multi_stati", "multiple_stati"), + 208: ("already_reported",), + 226: ("im_used",), # Redirection. - 300: ('multiple_choices',), - 301: ('moved_permanently', 'moved', '\\o-'), - 302: ('found',), - 303: ('see_other', 'other'), - 304: ('not_modified',), - 305: ('use_proxy',), - 306: ('switch_proxy',), - 307: ('temporary_redirect', 'temporary_moved', 'temporary'), - 308: ('permanent_redirect', - 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 - + 300: ("multiple_choices",), + 301: ("moved_permanently", "moved", "\\o-"), + 302: ("found",), + 303: ("see_other", "other"), + 304: ("not_modified",), + 305: ("use_proxy",), + 306: ("switch_proxy",), + 307: ("temporary_redirect", "temporary_moved", "temporary"), + 308: ( + "permanent_redirect", + "resume_incomplete", + "resume", + ), # "resume" and "resume_incomplete" to be removed in 3.0 # Client Error. - 400: ('bad_request', 'bad'), - 401: ('unauthorized',), - 402: ('payment_required', 'payment'), - 403: ('forbidden',), - 404: ('not_found', '-o-'), - 405: ('method_not_allowed', 'not_allowed'), - 406: ('not_acceptable',), - 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), - 408: ('request_timeout', 'timeout'), - 409: ('conflict',), - 410: ('gone',), - 411: ('length_required',), - 412: ('precondition_failed', 'precondition'), - 413: ('request_entity_too_large',), - 414: ('request_uri_too_large',), - 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), - 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), - 417: ('expectation_failed',), - 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), - 421: ('misdirected_request',), - 422: ('unprocessable_entity', 'unprocessable'), - 423: ('locked',), - 424: ('failed_dependency', 'dependency'), - 425: ('unordered_collection', 'unordered'), - 426: ('upgrade_required', 'upgrade'), - 428: ('precondition_required', 'precondition'), - 429: ('too_many_requests', 'too_many'), - 431: ('header_fields_too_large', 'fields_too_large'), - 444: ('no_response', 'none'), - 449: ('retry_with', 'retry'), - 450: ('blocked_by_windows_parental_controls', 'parental_controls'), - 451: ('unavailable_for_legal_reasons', 'legal_reasons'), - 499: ('client_closed_request',), - + 400: ("bad_request", "bad"), + 401: ("unauthorized",), + 402: ("payment_required", "payment"), + 403: ("forbidden",), + 404: ("not_found", "-o-"), + 405: ("method_not_allowed", "not_allowed"), + 406: ("not_acceptable",), + 407: ("proxy_authentication_required", "proxy_auth", "proxy_authentication"), + 408: ("request_timeout", "timeout"), + 409: ("conflict",), + 410: ("gone",), + 411: ("length_required",), + 412: ("precondition_failed", "precondition"), + 413: ("request_entity_too_large",), + 414: ("request_uri_too_large",), + 415: ("unsupported_media_type", "unsupported_media", "media_type"), + 416: ( + "requested_range_not_satisfiable", + "requested_range", + "range_not_satisfiable", + ), + 417: ("expectation_failed",), + 418: ("im_a_teapot", "teapot", "i_am_a_teapot"), + 421: ("misdirected_request",), + 422: ("unprocessable_entity", "unprocessable"), + 423: ("locked",), + 424: ("failed_dependency", "dependency"), + 425: ("unordered_collection", "unordered"), + 426: ("upgrade_required", "upgrade"), + 428: ("precondition_required", "precondition"), + 429: ("too_many_requests", "too_many"), + 431: ("header_fields_too_large", "fields_too_large"), + 444: ("no_response", "none"), + 449: ("retry_with", "retry"), + 450: ("blocked_by_windows_parental_controls", "parental_controls"), + 451: ("unavailable_for_legal_reasons", "legal_reasons"), + 499: ("client_closed_request",), # Server Error. - 500: ('internal_server_error', 'server_error', '/o\\', '✗'), - 501: ('not_implemented',), - 502: ('bad_gateway',), - 503: ('service_unavailable', 'unavailable'), - 504: ('gateway_timeout',), - 505: ('http_version_not_supported', 'http_version'), - 506: ('variant_also_negotiates',), - 507: ('insufficient_storage',), - 509: ('bandwidth_limit_exceeded', 'bandwidth'), - 510: ('not_extended',), - 511: ('network_authentication_required', 'network_auth', 'network_authentication'), + 500: ("internal_server_error", "server_error", "/o\\", "✗"), + 501: ("not_implemented",), + 502: ("bad_gateway",), + 503: ("service_unavailable", "unavailable"), + 504: ("gateway_timeout",), + 505: ("http_version_not_supported", "http_version"), + 506: ("variant_also_negotiates",), + 507: ("insufficient_storage",), + 509: ("bandwidth_limit_exceeded", "bandwidth"), + 510: ("not_extended",), + 511: ("network_authentication_required", "network_auth", "network_authentication"), } -codes = LookupDict(name='status_codes') +codes = LookupDict(name="status_codes") + def _init(): for code, titles in _codes.items(): for title in titles: setattr(codes, title, code) - if not title.startswith(('\\', '/')): + if not title.startswith(("\\", "/")): setattr(codes, title.upper(), code) def doc(code): - names = ', '.join('``%s``' % n for n in _codes[code]) - return '* %d: %s' % (code, names) + names = ", ".join(f"``{n}``" for n in _codes[code]) + return "* %d: %s" % (code, names) global __doc__ - __doc__ = (__doc__ + '\n' + - '\n'.join(doc(code) for code in sorted(_codes)) - if __doc__ is not None else None) + __doc__ = ( + __doc__ + "\n" + "\n".join(doc(code) for code in sorted(_codes)) + if __doc__ is not None + else None + ) + _init() diff --git a/libs/common/requests/structures.py b/libs/common/requests/structures.py index 8ee0ba7a..188e13e4 100644 --- a/libs/common/requests/structures.py +++ b/libs/common/requests/structures.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.structures ~~~~~~~~~~~~~~~~~~~ @@ -64,11 +62,7 @@ class CaseInsensitiveDict(MutableMapping): def lower_items(self): """Like iteritems(), but with all lowercase keys.""" - return ( - (lowerkey, keyval[1]) - for (lowerkey, keyval) - in self._store.items() - ) + return ((lowerkey, keyval[1]) for (lowerkey, keyval) in self._store.items()) def __eq__(self, other): if isinstance(other, Mapping): @@ -91,10 +85,10 @@ class LookupDict(dict): def __init__(self, name=None): self.name = name - super(LookupDict, self).__init__() + super().__init__() def __repr__(self): - return '' % (self.name) + return f"" def __getitem__(self, key): # We allow fall-through here, so values default to None diff --git a/libs/common/requests/utils.py b/libs/common/requests/utils.py index db67938e..ad535838 100644 --- a/libs/common/requests/utils.py +++ b/libs/common/requests/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.utils ~~~~~~~~~~~~~~ @@ -21,49 +19,71 @@ import warnings import zipfile from collections import OrderedDict -from .__version__ import __version__ +from urllib3.util import make_headers, parse_url + from . import certs +from .__version__ import __version__ + # to_native_string is unused here, but imported here for backwards compatibility -from ._internal_utils import to_native_string +from ._internal_utils import HEADER_VALIDATORS, to_native_string # noqa: F401 +from .compat import ( + Mapping, + basestring, + bytes, + getproxies, + getproxies_environment, + integer_types, +) from .compat import parse_http_list as _parse_list_header from .compat import ( - quote, urlparse, bytes, str, unquote, getproxies, - proxy_bypass, urlunparse, basestring, integer_types, is_py3, - proxy_bypass_environment, getproxies_environment, Mapping) + proxy_bypass, + proxy_bypass_environment, + quote, + str, + unquote, + urlparse, + urlunparse, +) from .cookies import cookiejar_from_dict -from .structures import CaseInsensitiveDict from .exceptions import ( - InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) + FileModeWarning, + InvalidHeader, + InvalidURL, + UnrewindableBodyError, +) +from .structures import CaseInsensitiveDict -NETRC_FILES = ('.netrc', '_netrc') +NETRC_FILES = (".netrc", "_netrc") DEFAULT_CA_BUNDLE_PATH = certs.where() -DEFAULT_PORTS = {'http': 80, 'https': 443} +DEFAULT_PORTS = {"http": 80, "https": 443} + +# Ensure that ', ' is used to preserve previous delimiter behavior. +DEFAULT_ACCEPT_ENCODING = ", ".join( + re.split(r",\s*", make_headers(accept_encoding=True)["accept-encoding"]) +) -if sys.platform == 'win32': +if sys.platform == "win32": # provide a proxy_bypass version on Windows without DNS lookups def proxy_bypass_registry(host): try: - if is_py3: - import winreg - else: - import _winreg as winreg + import winreg except ImportError: return False try: - internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + internetSettings = winreg.OpenKey( + winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Internet Settings", + ) # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it - proxyEnable = int(winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0]) + proxyEnable = int(winreg.QueryValueEx(internetSettings, "ProxyEnable")[0]) # ProxyOverride is almost always a string - proxyOverride = winreg.QueryValueEx(internetSettings, - 'ProxyOverride')[0] - except OSError: + proxyOverride = winreg.QueryValueEx(internetSettings, "ProxyOverride")[0] + except (OSError, ValueError): return False if not proxyEnable or not proxyOverride: return False @@ -71,15 +91,15 @@ if sys.platform == 'win32': # make a check value list from the registry entry: replace the # '' string by the localhost entry and the corresponding # canonical entry. - proxyOverride = proxyOverride.split(';') + proxyOverride = proxyOverride.split(";") # now check if we match one of the registry values. for test in proxyOverride: - if test == '': - if '.' not in host: + if test == "": + if "." not in host: return True - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char + test = test.replace(".", r"\.") # mask dots + test = test.replace("*", r".*") # change glob sequence + test = test.replace("?", r".") # change glob char if re.match(test, host, re.I): return True return False @@ -99,7 +119,7 @@ if sys.platform == 'win32': def dict_to_sequence(d): """Returns an internal sequence dictionary update.""" - if hasattr(d, 'items'): + if hasattr(d, "items"): d = d.items() return d @@ -109,37 +129,42 @@ def super_len(o): total_length = None current_position = 0 - if hasattr(o, '__len__'): + if hasattr(o, "__len__"): total_length = len(o) - elif hasattr(o, 'len'): + elif hasattr(o, "len"): total_length = o.len - elif hasattr(o, 'fileno'): + elif hasattr(o, "fileno"): try: fileno = o.fileno() - except io.UnsupportedOperation: + except (io.UnsupportedOperation, AttributeError): + # AttributeError is a surprising exception, seeing as how we've just checked + # that `hasattr(o, 'fileno')`. It happens for objects obtained via + # `Tarfile.extractfile()`, per issue 5229. pass else: total_length = os.fstat(fileno).st_size # Having used fstat to determine the file length, we need to # confirm that this file was opened up in binary mode. - if 'b' not in o.mode: - warnings.warn(( - "Requests has determined the content-length for this " - "request using the binary size of the file: however, the " - "file has been opened in text mode (i.e. without the 'b' " - "flag in the mode). This may lead to an incorrect " - "content-length. In Requests 3.0, support will be removed " - "for files in text mode."), - FileModeWarning + if "b" not in o.mode: + warnings.warn( + ( + "Requests has determined the content-length for this " + "request using the binary size of the file: however, the " + "file has been opened in text mode (i.e. without the 'b' " + "flag in the mode). This may lead to an incorrect " + "content-length. In Requests 3.0, support will be removed " + "for files in text mode." + ), + FileModeWarning, ) - if hasattr(o, 'tell'): + if hasattr(o, "tell"): try: current_position = o.tell() - except (OSError, IOError): + except OSError: # This can happen in some weird situations, such as when the file # is actually a special file descriptor like stdin. In this # instance, we don't know what the length is, so set it to zero and @@ -147,8 +172,8 @@ def super_len(o): if total_length is not None: current_position = total_length else: - if hasattr(o, 'seek') and total_length is None: - # StringIO and BytesIO have seek but no useable fileno + if hasattr(o, "seek") and total_length is None: + # StringIO and BytesIO have seek but no usable fileno try: # seek to end of file o.seek(0, 2) @@ -157,7 +182,7 @@ def super_len(o): # seek back to current position to support # partially read file-like objects o.seek(current_position or 0) - except (OSError, IOError): + except OSError: total_length = 0 if total_length is None: @@ -169,14 +194,14 @@ def super_len(o): def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" - netrc_file = os.environ.get('NETRC') + netrc_file = os.environ.get("NETRC") if netrc_file is not None: netrc_locations = (netrc_file,) else: - netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES) + netrc_locations = (f"~/{f}" for f in NETRC_FILES) try: - from netrc import netrc, NetrcParseError + from netrc import NetrcParseError, netrc netrc_path = None @@ -201,18 +226,18 @@ def get_netrc_auth(url, raise_errors=False): # Strip port numbers from netloc. This weird `if...encode`` dance is # used for Python 3.2, which doesn't support unicode literals. - splitstr = b':' + splitstr = b":" if isinstance(url, str): - splitstr = splitstr.decode('ascii') + splitstr = splitstr.decode("ascii") host = ri.netloc.split(splitstr)[0] try: _netrc = netrc(netrc_path).authenticators(host) if _netrc: # Return with login / password - login_i = (0 if _netrc[0] else 1) + login_i = 0 if _netrc[0] else 1 return (_netrc[login_i], _netrc[2]) - except (NetrcParseError, IOError): + except (NetrcParseError, OSError): # If there was a parsing error or a permissions issue reading the file, # we'll just skip netrc auth unless explicitly asked to raise errors. if raise_errors: @@ -225,9 +250,8 @@ def get_netrc_auth(url, raise_errors=False): def guess_filename(obj): """Tries to guess the filename of the given object.""" - name = getattr(obj, 'name', None) - if (name and isinstance(name, basestring) and name[0] != '<' and - name[-1] != '>'): + name = getattr(obj, "name", None) + if name and isinstance(name, basestring) and name[0] != "<" and name[-1] != ">": return os.path.basename(name) @@ -245,7 +269,11 @@ def extract_zipped_paths(path): archive, member = os.path.split(path) while archive and not os.path.exists(archive): archive, prefix = os.path.split(archive) - member = '/'.join([prefix, member]) + if not prefix: + # If we don't check for an empty prefix after the split (in other words, archive remains unchanged after the split), + # we _can_ end up in an infinite loop on a rare corner case affecting a small number of users + break + member = "/".join([prefix, member]) if not zipfile.is_zipfile(archive): return path @@ -256,13 +284,27 @@ def extract_zipped_paths(path): # we have a valid zip archive and a valid member of that archive tmp = tempfile.gettempdir() - extracted_path = os.path.join(tmp, *member.split('/')) + extracted_path = os.path.join(tmp, member.split("/")[-1]) if not os.path.exists(extracted_path): - extracted_path = zip_file.extract(member, path=tmp) - + # use read + write to avoid the creating nested folders, we only want the file, avoids mkdir racing condition + with atomic_open(extracted_path) as file_handler: + file_handler.write(zip_file.read(member)) return extracted_path +@contextlib.contextmanager +def atomic_open(filename): + """Write a file to the disk in an atomic fashion""" + tmp_descriptor, tmp_name = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + with os.fdopen(tmp_descriptor, "wb") as tmp_handler: + yield tmp_handler + os.replace(tmp_name, filename) + except BaseException: + os.remove(tmp_name) + raise + + def from_key_val_list(value): """Take an object and test to see if it can be represented as a dictionary. Unless it can not be represented as such, return an @@ -285,7 +327,7 @@ def from_key_val_list(value): return None if isinstance(value, (str, bytes, bool, int)): - raise ValueError('cannot encode objects that are not 2-tuples') + raise ValueError("cannot encode objects that are not 2-tuples") return OrderedDict(value) @@ -311,7 +353,7 @@ def to_key_val_list(value): return None if isinstance(value, (str, bytes, bool, int)): - raise ValueError('cannot encode objects that are not 2-tuples') + raise ValueError("cannot encode objects that are not 2-tuples") if isinstance(value, Mapping): value = value.items() @@ -376,10 +418,10 @@ def parse_dict_header(value): """ result = {} for item in _parse_list_header(value): - if '=' not in item: + if "=" not in item: result[item] = None continue - name, value = item.split('=', 1) + name, value = item.split("=", 1) if value[:1] == value[-1:] == '"': value = unquote_header_value(value[1:-1]) result[name] = value @@ -407,8 +449,8 @@ def unquote_header_value(value, is_filename=False): # replace sequence below on a UNC path has the effect of turning # the leading double slash into a single slash and then # _fix_ie_filename() doesn't work correctly. See #458. - if not is_filename or value[:2] != '\\\\': - return value.replace('\\\\', '\\').replace('\\"', '"') + if not is_filename or value[:2] != "\\\\": + return value.replace("\\\\", "\\").replace('\\"', '"') return value @@ -443,19 +485,24 @@ def get_encodings_from_content(content): :param content: bytestring to extract encodings from. """ - warnings.warn(( - 'In requests 3.0, get_encodings_from_content will be removed. For ' - 'more information, please see the discussion on issue #2266. (This' - ' warning should only appear once.)'), - DeprecationWarning) + warnings.warn( + ( + "In requests 3.0, get_encodings_from_content will be removed. For " + "more information, please see the discussion on issue #2266. (This" + " warning should only appear once.)" + ), + DeprecationWarning, + ) charset_re = re.compile(r']', flags=re.I) pragma_re = re.compile(r']', flags=re.I) xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') - return (charset_re.findall(content) + - pragma_re.findall(content) + - xml_re.findall(content)) + return ( + charset_re.findall(content) + + pragma_re.findall(content) + + xml_re.findall(content) + ) def _parse_content_type_header(header): @@ -466,7 +513,7 @@ def _parse_content_type_header(header): parameters """ - tokens = header.split(';') + tokens = header.split(";") content_type, params = tokens[0].strip(), tokens[1:] params_dict = {} items_to_strip = "\"' " @@ -478,7 +525,7 @@ def _parse_content_type_header(header): index_of_equals = param.find("=") if index_of_equals != -1: key = param[:index_of_equals].strip(items_to_strip) - value = param[index_of_equals + 1:].strip(items_to_strip) + value = param[index_of_equals + 1 :].strip(items_to_strip) params_dict[key.lower()] = value return content_type, params_dict @@ -490,38 +537,37 @@ def get_encoding_from_headers(headers): :rtype: str """ - content_type = headers.get('content-type') + content_type = headers.get("content-type") if not content_type: return None content_type, params = _parse_content_type_header(content_type) - if 'charset' in params: - return params['charset'].strip("'\"") + if "charset" in params: + return params["charset"].strip("'\"") - if 'text' in content_type: - return 'ISO-8859-1' + if "text" in content_type: + return "ISO-8859-1" - if 'application/json' in content_type: + if "application/json" in content_type: # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset - return 'utf-8' + return "utf-8" def stream_decode_response_unicode(iterator, r): - """Stream decodes a iterator.""" + """Stream decodes an iterator.""" if r.encoding is None: - for item in iterator: - yield item + yield from iterator return - decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') + decoder = codecs.getincrementaldecoder(r.encoding)(errors="replace") for chunk in iterator: rv = decoder.decode(chunk) if rv: yield rv - rv = decoder.decode(b'', final=True) + rv = decoder.decode(b"", final=True) if rv: yield rv @@ -532,7 +578,7 @@ def iter_slices(string, slice_length): if slice_length is None or slice_length <= 0: slice_length = len(string) while pos < len(string): - yield string[pos:pos + slice_length] + yield string[pos : pos + slice_length] pos += slice_length @@ -548,11 +594,14 @@ def get_unicode_from_response(r): :rtype: str """ - warnings.warn(( - 'In requests 3.0, get_unicode_from_response will be removed. For ' - 'more information, please see the discussion on issue #2266. (This' - ' warning should only appear once.)'), - DeprecationWarning) + warnings.warn( + ( + "In requests 3.0, get_unicode_from_response will be removed. For " + "more information, please see the discussion on issue #2266. (This" + " warning should only appear once.)" + ), + DeprecationWarning, + ) tried_encodings = [] @@ -567,14 +616,15 @@ def get_unicode_from_response(r): # Fall back: try: - return str(r.content, encoding, errors='replace') + return str(r.content, encoding, errors="replace") except TypeError: return r.content # The unreserved URI characters (RFC 3986) UNRESERVED_SET = frozenset( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~" +) def unquote_unreserved(uri): @@ -583,22 +633,22 @@ def unquote_unreserved(uri): :rtype: str """ - parts = uri.split('%') + parts = uri.split("%") for i in range(1, len(parts)): h = parts[i][0:2] if len(h) == 2 and h.isalnum(): try: c = chr(int(h, 16)) except ValueError: - raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) + raise InvalidURL(f"Invalid percent-escape sequence: '{h}'") if c in UNRESERVED_SET: parts[i] = c + parts[i][2:] else: - parts[i] = '%' + parts[i] + parts[i] = f"%{parts[i]}" else: - parts[i] = '%' + parts[i] - return ''.join(parts) + parts[i] = f"%{parts[i]}" + return "".join(parts) def requote_uri(uri): @@ -631,10 +681,10 @@ def address_in_network(ip, net): :rtype: bool """ - ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] - netaddr, bits = net.split('/') - netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0] - network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask + ipaddr = struct.unpack("=L", socket.inet_aton(ip))[0] + netaddr, bits = net.split("/") + netmask = struct.unpack("=L", socket.inet_aton(dotted_netmask(int(bits))))[0] + network = struct.unpack("=L", socket.inet_aton(netaddr))[0] & netmask return (ipaddr & netmask) == (network & netmask) @@ -645,8 +695,8 @@ def dotted_netmask(mask): :rtype: str """ - bits = 0xffffffff ^ (1 << 32 - mask) - 1 - return socket.inet_ntoa(struct.pack('>I', bits)) + bits = 0xFFFFFFFF ^ (1 << 32 - mask) - 1 + return socket.inet_ntoa(struct.pack(">I", bits)) def is_ipv4_address(string_ip): @@ -655,7 +705,7 @@ def is_ipv4_address(string_ip): """ try: socket.inet_aton(string_ip) - except socket.error: + except OSError: return False return True @@ -666,9 +716,9 @@ def is_valid_cidr(string_network): :rtype: bool """ - if string_network.count('/') == 1: + if string_network.count("/") == 1: try: - mask = int(string_network.split('/')[1]) + mask = int(string_network.split("/")[1]) except ValueError: return False @@ -676,8 +726,8 @@ def is_valid_cidr(string_network): return False try: - socket.inet_aton(string_network.split('/')[0]) - except socket.error: + socket.inet_aton(string_network.split("/")[0]) + except OSError: return False else: return False @@ -714,13 +764,14 @@ def should_bypass_proxies(url, no_proxy): """ # Prioritize lowercase environment variables over uppercase # to keep a consistent behaviour with other http projects (curl, wget). - get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) + def get_proxy(key): + return os.environ.get(key) or os.environ.get(key.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy_arg = no_proxy if no_proxy is None: - no_proxy = get_proxy('no_proxy') + no_proxy = get_proxy("no_proxy") parsed = urlparse(url) if parsed.hostname is None: @@ -730,9 +781,7 @@ def should_bypass_proxies(url, no_proxy): if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the hostname, both with and without the port. - no_proxy = ( - host for host in no_proxy.replace(' ', '').split(',') if host - ) + no_proxy = (host for host in no_proxy.replace(" ", "").split(",") if host) if is_ipv4_address(parsed.hostname): for proxy_ip in no_proxy: @@ -746,7 +795,7 @@ def should_bypass_proxies(url, no_proxy): else: host_with_port = parsed.hostname if parsed.port: - host_with_port += ':{}'.format(parsed.port) + host_with_port += f":{parsed.port}" for host in no_proxy: if parsed.hostname.endswith(host) or host_with_port.endswith(host): @@ -754,7 +803,7 @@ def should_bypass_proxies(url, no_proxy): # to apply the proxies on this URL. return True - with set_environ('no_proxy', no_proxy_arg): + with set_environ("no_proxy", no_proxy_arg): # parsed.hostname can be `None` in cases such as a file URI. try: bypass = proxy_bypass(parsed.hostname) @@ -788,13 +837,13 @@ def select_proxy(url, proxies): proxies = proxies or {} urlparts = urlparse(url) if urlparts.hostname is None: - return proxies.get(urlparts.scheme, proxies.get('all')) + return proxies.get(urlparts.scheme, proxies.get("all")) proxy_keys = [ - urlparts.scheme + '://' + urlparts.hostname, + urlparts.scheme + "://" + urlparts.hostname, urlparts.scheme, - 'all://' + urlparts.hostname, - 'all', + "all://" + urlparts.hostname, + "all", ] proxy = None for proxy_key in proxy_keys: @@ -805,25 +854,54 @@ def select_proxy(url, proxies): return proxy +def resolve_proxies(request, proxies, trust_env=True): + """This method takes proxy information from a request and configuration + input to resolve a mapping of target proxies. This will consider settings + such a NO_PROXY to strip proxy configurations. + + :param request: Request or PreparedRequest + :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs + :param trust_env: Boolean declaring whether to trust environment configs + + :rtype: dict + """ + proxies = proxies if proxies is not None else {} + url = request.url + scheme = urlparse(url).scheme + no_proxy = proxies.get("no_proxy") + new_proxies = proxies.copy() + + if trust_env and not should_bypass_proxies(url, no_proxy=no_proxy): + environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) + + proxy = environ_proxies.get(scheme, environ_proxies.get("all")) + + if proxy: + new_proxies.setdefault(scheme, proxy) + return new_proxies + + def default_user_agent(name="python-requests"): """ Return a string representing the default user agent. :rtype: str """ - return '%s/%s' % (name, __version__) + return f"{name}/{__version__}" def default_headers(): """ :rtype: requests.structures.CaseInsensitiveDict """ - return CaseInsensitiveDict({ - 'User-Agent': default_user_agent(), - 'Accept-Encoding': ', '.join(('gzip', 'deflate')), - 'Accept': '*/*', - 'Connection': 'keep-alive', - }) + return CaseInsensitiveDict( + { + "User-Agent": default_user_agent(), + "Accept-Encoding": DEFAULT_ACCEPT_ENCODING, + "Accept": "*/*", + "Connection": "keep-alive", + } + ) def parse_header_links(value): @@ -836,23 +914,23 @@ def parse_header_links(value): links = [] - replace_chars = ' \'"' + replace_chars = " '\"" value = value.strip(replace_chars) if not value: return links - for val in re.split(', *<', value): + for val in re.split(", *<", value): try: - url, params = val.split(';', 1) + url, params = val.split(";", 1) except ValueError: - url, params = val, '' + url, params = val, "" - link = {'url': url.strip('<> \'"')} + link = {"url": url.strip("<> '\"")} - for param in params.split(';'): + for param in params.split(";"): try: - key, value = param.split('=') + key, value = param.split("=") except ValueError: break @@ -864,7 +942,7 @@ def parse_header_links(value): # Null bytes; no need to recreate these on each call to guess_json_utf -_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 +_null = "\x00".encode("ascii") # encoding to ASCII for Python 3 _null2 = _null * 2 _null3 = _null * 3 @@ -878,25 +956,25 @@ def guess_json_utf(data): # determine the encoding. Also detect a BOM, if present. sample = data[:4] if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): - return 'utf-32' # BOM included + return "utf-32" # BOM included if sample[:3] == codecs.BOM_UTF8: - return 'utf-8-sig' # BOM included, MS style (discouraged) + return "utf-8-sig" # BOM included, MS style (discouraged) if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): - return 'utf-16' # BOM included + return "utf-16" # BOM included nullcount = sample.count(_null) if nullcount == 0: - return 'utf-8' + return "utf-8" if nullcount == 2: - if sample[::2] == _null2: # 1st and 3rd are null - return 'utf-16-be' + if sample[::2] == _null2: # 1st and 3rd are null + return "utf-16-be" if sample[1::2] == _null2: # 2nd and 4th are null - return 'utf-16-le' + return "utf-16-le" # Did not detect 2 valid UTF-16 ascii-range characters if nullcount == 3: if sample[:3] == _null3: - return 'utf-32-be' + return "utf-32-be" if sample[1:] == _null3: - return 'utf-32-le' + return "utf-32-le" # Did not detect a valid UTF-32 ascii-range character return None @@ -907,15 +985,27 @@ def prepend_scheme_if_needed(url, new_scheme): :rtype: str """ - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) + parsed = parse_url(url) + scheme, auth, host, port, path, query, fragment = parsed - # urlparse is a finicky beast, and sometimes decides that there isn't a - # netloc present. Assume that it's being over-cautious, and switch netloc - # and path if urlparse decided there was no netloc. + # A defect in urlparse determines that there isn't a netloc present in some + # urls. We previously assumed parsing was overly cautious, and swapped the + # netloc and path. Due to a lack of tests on the original defect, this is + # maintained with parse_url for backwards compatibility. + netloc = parsed.netloc if not netloc: netloc, path = path, netloc - return urlunparse((scheme, netloc, path, params, query, fragment)) + if auth: + # parse_url doesn't provide the netloc with auth + # so we'll add it ourselves. + netloc = "@".join([auth, netloc]) + if scheme is None: + scheme = new_scheme + if path is None: + path = "" + + return urlunparse((scheme, netloc, path, "", query, fragment)) def get_auth_from_url(url): @@ -929,35 +1019,36 @@ def get_auth_from_url(url): try: auth = (unquote(parsed.username), unquote(parsed.password)) except (AttributeError, TypeError): - auth = ('', '') + auth = ("", "") return auth -# Moved outside of function to avoid recompile every call -_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') -_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') - - def check_header_validity(header): - """Verifies that header value is a string which doesn't contain - leading whitespace or return characters. This prevents unintended - header injection. + """Verifies that header parts don't contain leading whitespace + reserved characters, or return characters. :param header: tuple, in the format (name, value). """ name, value = header - if isinstance(value, bytes): - pat = _CLEAN_HEADER_REGEX_BYTE - else: - pat = _CLEAN_HEADER_REGEX_STR - try: - if not pat.match(value): - raise InvalidHeader("Invalid return character or leading space in header: %s" % name) - except TypeError: - raise InvalidHeader("Value for header {%s: %s} must be of type str or " - "bytes, not %s" % (name, value, type(value))) + for part in header: + if type(part) not in HEADER_VALIDATORS: + raise InvalidHeader( + f"Header part ({part!r}) from {{{name!r}: {value!r}}} must be " + f"of type str or bytes, not {type(part)}" + ) + + _validate_header_part(name, "name", HEADER_VALIDATORS[type(name)][0]) + _validate_header_part(value, "value", HEADER_VALIDATORS[type(value)][1]) + + +def _validate_header_part(header_part, header_kind, validator): + if not validator.match(header_part): + raise InvalidHeader( + f"Invalid leading whitespace, reserved character(s), or return" + f"character(s) in header {header_kind}: {header_part!r}" + ) def urldefragauth(url): @@ -972,21 +1063,24 @@ def urldefragauth(url): if not netloc: netloc, path = path, netloc - netloc = netloc.rsplit('@', 1)[-1] + netloc = netloc.rsplit("@", 1)[-1] - return urlunparse((scheme, netloc, path, params, query, '')) + return urlunparse((scheme, netloc, path, params, query, "")) def rewind_body(prepared_request): """Move file pointer back to its recorded starting position so it can be read again on redirect. """ - body_seek = getattr(prepared_request.body, 'seek', None) - if body_seek is not None and isinstance(prepared_request._body_position, integer_types): + body_seek = getattr(prepared_request.body, "seek", None) + if body_seek is not None and isinstance( + prepared_request._body_position, integer_types + ): try: body_seek(prepared_request._body_position) - except (IOError, OSError): - raise UnrewindableBodyError("An error occurred when rewinding request " - "body for redirect.") + except OSError: + raise UnrewindableBodyError( + "An error occurred when rewinding request body for redirect." + ) else: raise UnrewindableBodyError("Unable to rewind request body for redirect.") diff --git a/libs/common/setuptools/__init__.py b/libs/common/setuptools/__init__.py index 9d8ae1ed..6c24cc2b 100644 --- a/libs/common/setuptools/__init__.py +++ b/libs/common/setuptools/__init__.py @@ -1,129 +1,45 @@ """Extensions to the 'distutils' for large or complex distributions""" -import os -import sys import functools -import distutils.core -import distutils.filelist +import os import re +import warnings + +import _distutils_hack.override # noqa: F401 + +import distutils.core from distutils.errors import DistutilsOptionError -from distutils.util import convert_path -from fnmatch import fnmatchcase +from distutils.util import convert_path as _convert_path from ._deprecation_warning import SetuptoolsDeprecationWarning -from setuptools.extern.six import PY3, string_types -from setuptools.extern.six.moves import filter, map - import setuptools.version from setuptools.extension import Extension -from setuptools.dist import Distribution, Feature +from setuptools.dist import Distribution from setuptools.depends import Require +from setuptools.discovery import PackageFinder, PEP420PackageFinder from . import monkey - -__metaclass__ = type +from . import logging __all__ = [ - 'setup', 'Distribution', 'Feature', 'Command', 'Extension', 'Require', + 'setup', + 'Distribution', + 'Command', + 'Extension', + 'Require', 'SetuptoolsDeprecationWarning', - 'find_packages' + 'find_packages', + 'find_namespace_packages', ] -if PY3: - __all__.append('find_namespace_packages') - __version__ = setuptools.version.__version__ bootstrap_install_from = None -# If we run 2to3 on .py files, should we also convert docstrings? -# Default: yes; assume that we can detect doctests reliably -run_2to3_on_doctests = True -# Standard package names for fixer packages -lib2to3_fixer_packages = ['lib2to3.fixes'] - - -class PackageFinder: - """ - Generate a list of all Python packages found within a directory - """ - - @classmethod - def find(cls, where='.', exclude=(), include=('*',)): - """Return a list all Python packages found within directory 'where' - - 'where' is the root directory which will be searched for packages. It - should be supplied as a "cross-platform" (i.e. URL-style) path; it will - be converted to the appropriate local path syntax. - - 'exclude' is a sequence of package names to exclude; '*' can be used - as a wildcard in the names, such that 'foo.*' will exclude all - subpackages of 'foo' (but not 'foo' itself). - - 'include' is a sequence of package names to include. If it's - specified, only the named packages will be included. If it's not - specified, all found packages will be included. 'include' can contain - shell style wildcard patterns just like 'exclude'. - """ - - return list(cls._find_packages_iter( - convert_path(where), - cls._build_filter('ez_setup', '*__pycache__', *exclude), - cls._build_filter(*include))) - - @classmethod - def _find_packages_iter(cls, where, exclude, include): - """ - All the packages found in 'where' that pass the 'include' filter, but - not the 'exclude' filter. - """ - for root, dirs, files in os.walk(where, followlinks=True): - # Copy dirs to iterate over it, then empty dirs. - all_dirs = dirs[:] - dirs[:] = [] - - for dir in all_dirs: - full_path = os.path.join(root, dir) - rel_path = os.path.relpath(full_path, where) - package = rel_path.replace(os.path.sep, '.') - - # Skip directory trees that are not valid packages - if ('.' in dir or not cls._looks_like_package(full_path)): - continue - - # Should this package be included? - if include(package) and not exclude(package): - yield package - - # Keep searching subdirectories, as there may be more packages - # down there, even if the parent was excluded. - dirs.append(dir) - - @staticmethod - def _looks_like_package(path): - """Does a directory look like a package?""" - return os.path.isfile(os.path.join(path, '__init__.py')) - - @staticmethod - def _build_filter(*patterns): - """ - Given a list of patterns, return a callable that will be true only if - the input matches at least one of the patterns. - """ - return lambda name: any(fnmatchcase(name, pat=pat) for pat in patterns) - - -class PEP420PackageFinder(PackageFinder): - @staticmethod - def _looks_like_package(path): - return True - find_packages = PackageFinder.find - -if PY3: - find_namespace_packages = PEP420PackageFinder.find +find_namespace_packages = PEP420PackageFinder.find def _install_setup_requires(attrs): @@ -134,13 +50,21 @@ def _install_setup_requires(attrs): A minimal version of a distribution for supporting the fetch_build_eggs interface. """ + def __init__(self, attrs): _incl = 'dependency_links', 'setup_requires' - filtered = { - k: attrs[k] - for k in set(_incl) & set(attrs) - } - distutils.core.Distribution.__init__(self, filtered) + filtered = {k: attrs[k] for k in set(_incl) & set(attrs)} + super().__init__(filtered) + # Prevent accidentally triggering discovery with incomplete set of attrs + self.set_defaults._disable() + + def _get_project_config_files(self, filenames=None): + """Ignore ``pyproject.toml``, they are not related to setup_requires""" + try: + cfg, toml = super()._split_standard_project_metadata(filenames) + return cfg, () + except Exception: + return filenames, () def finalize_options(self): """ @@ -158,9 +82,11 @@ def _install_setup_requires(attrs): def setup(**attrs): # Make sure we have any requirements needed to interpret 'attrs'. + logging.configure() _install_setup_requires(attrs) return distutils.core.setup(**attrs) + setup.__doc__ = distutils.core.setup.__doc__ @@ -168,7 +94,59 @@ _Command = monkey.get_unpatched(distutils.core.Command) class Command(_Command): - __doc__ = _Command.__doc__ + """ + Setuptools internal actions are organized using a *command design pattern*. + This means that each action (or group of closely related actions) executed during + the build should be implemented as a ``Command`` subclass. + + These commands are abstractions and do not necessarily correspond to a command that + can (or should) be executed via a terminal, in a CLI fashion (although historically + they would). + + When creating a new command from scratch, custom defined classes **SHOULD** inherit + from ``setuptools.Command`` and implement a few mandatory methods. + Between these mandatory methods, are listed: + + .. method:: initialize_options(self) + + Set or (reset) all options/attributes/caches used by the command + to their default values. Note that these values may be overwritten during + the build. + + .. method:: finalize_options(self) + + Set final values for all options/attributes used by the command. + Most of the time, each option/attribute/cache should only be set if it does not + have any value yet (e.g. ``if self.attr is None: self.attr = val``). + + .. method:: run(self) + + Execute the actions intended by the command. + (Side effects **SHOULD** only take place when ``run`` is executed, + for example, creating new files or writing to the terminal output). + + A useful analogy for command classes is to think of them as subroutines with local + variables called "options". The options are "declared" in ``initialize_options()`` + and "defined" (given their final values, aka "finalized") in ``finalize_options()``, + both of which must be defined by every command class. The "body" of the subroutine, + (where it does all the work) is the ``run()`` method. + Between ``initialize_options()`` and ``finalize_options()``, ``setuptools`` may set + the values for options/attributes based on user's input (or circumstance), + which means that the implementation should be careful to not overwrite values in + ``finalize_options`` unless necessary. + + Please note that other commands (or other parts of setuptools) may also overwrite + the values of the command's options/attributes multiple times during the build + process. + Therefore it is important to consistently implement ``initialize_options()`` and + ``finalize_options()``. For example, all derived attributes (or attributes that + depend on the value of other attributes) **SHOULD** be recomputed in + ``finalize_options``. + + When overwriting existing commands, custom defined classes **MUST** abide by the + same APIs implemented by the original class. They also **SHOULD** inherit from the + original class. + """ command_consumes_arguments = False @@ -177,7 +155,7 @@ class Command(_Command): Construct the command for dist, updating vars(self) with any keyword parameters. """ - _Command.__init__(self, dist) + super().__init__(dist) vars(self).update(kw) def _ensure_stringlike(self, option, what, default=None): @@ -185,9 +163,10 @@ class Command(_Command): if val is None: setattr(self, option, default) return default - elif not isinstance(val, string_types): - raise DistutilsOptionError("'%s' must be a %s (got `%s`)" - % (option, what, val)) + elif not isinstance(val, str): + raise DistutilsOptionError( + "'%s' must be a %s (got `%s`)" % (option, what, val) + ) return val def ensure_string_list(self, option): @@ -195,21 +174,27 @@ class Command(_Command): currently a string, we split it either on /,\s*/ or /\s+/, so "foo bar baz", "foo,bar,baz", and "foo, bar baz" all become ["foo", "bar", "baz"]. + + .. + TODO: This method seems to be similar to the one in ``distutils.cmd`` + Probably it is just here for backward compatibility with old Python versions? + + :meta private: """ val = getattr(self, option) if val is None: return - elif isinstance(val, string_types): + elif isinstance(val, str): setattr(self, option, re.split(r',\s*|\s+', val)) else: if isinstance(val, list): - ok = all(isinstance(v, string_types) for v in val) + ok = all(isinstance(v, str) for v in val) else: ok = False if not ok: raise DistutilsOptionError( - "'%s' must be a list of strings (got %r)" - % (option, val)) + "'%s' must be a list of strings (got %r)" % (option, val) + ) def reinitialize_command(self, command, reinit_subcommands=0, **kw): cmd = _Command.reinitialize_command(self, command, reinit_subcommands) @@ -241,5 +226,22 @@ def findall(dir=os.curdir): return list(files) +@functools.wraps(_convert_path) +def convert_path(pathname): + from inspect import cleandoc + + msg = """ + The function `convert_path` is considered internal and not part of the public API. + Its direct usage by 3rd-party packages is considered deprecated and the function + may be removed in the future. + """ + warnings.warn(cleandoc(msg), SetuptoolsDeprecationWarning) + return _convert_path(pathname) + + +class sic(str): + """Treat this string as-is (https://en.wikipedia.org/wiki/Sic)""" + + # Apply monkey patches monkey.patch_all() diff --git a/libs/common/setuptools/_distutils/__init__.py b/libs/common/setuptools/_distutils/__init__.py new file mode 100644 index 00000000..1a188c35 --- /dev/null +++ b/libs/common/setuptools/_distutils/__init__.py @@ -0,0 +1,14 @@ +import sys +import importlib + +__version__, _, _ = sys.version.partition(' ') + + +try: + # Allow Debian and pkgsrc (only) to customize system + # behavior. Ref pypa/distutils#2 and pypa/distutils#16. + # This hook is deprecated and no other environments + # should use it. + importlib.import_module('_distutils_system_mod') +except ImportError: + pass diff --git a/libs/common/setuptools/_distutils/_collections.py b/libs/common/setuptools/_distutils/_collections.py new file mode 100644 index 00000000..02556614 --- /dev/null +++ b/libs/common/setuptools/_distutils/_collections.py @@ -0,0 +1,194 @@ +import collections +import functools +import itertools +import operator + + +# from jaraco.collections 3.5.1 +class DictStack(list, collections.abc.Mapping): + """ + A stack of dictionaries that behaves as a view on those dictionaries, + giving preference to the last. + + >>> stack = DictStack([dict(a=1, c=2), dict(b=2, a=2)]) + >>> stack['a'] + 2 + >>> stack['b'] + 2 + >>> stack['c'] + 2 + >>> len(stack) + 3 + >>> stack.push(dict(a=3)) + >>> stack['a'] + 3 + >>> set(stack.keys()) == set(['a', 'b', 'c']) + True + >>> set(stack.items()) == set([('a', 3), ('b', 2), ('c', 2)]) + True + >>> dict(**stack) == dict(stack) == dict(a=3, c=2, b=2) + True + >>> d = stack.pop() + >>> stack['a'] + 2 + >>> d = stack.pop() + >>> stack['a'] + 1 + >>> stack.get('b', None) + >>> 'c' in stack + True + """ + + def __iter__(self): + dicts = list.__iter__(self) + return iter(set(itertools.chain.from_iterable(c.keys() for c in dicts))) + + def __getitem__(self, key): + for scope in reversed(tuple(list.__iter__(self))): + if key in scope: + return scope[key] + raise KeyError(key) + + push = list.append + + def __contains__(self, other): + return collections.abc.Mapping.__contains__(self, other) + + def __len__(self): + return len(list(iter(self))) + + +# from jaraco.collections 3.7 +class RangeMap(dict): + """ + A dictionary-like object that uses the keys as bounds for a range. + Inclusion of the value for that range is determined by the + key_match_comparator, which defaults to less-than-or-equal. + A value is returned for a key if it is the first key that matches in + the sorted list of keys. + + One may supply keyword parameters to be passed to the sort function used + to sort keys (i.e. key, reverse) as sort_params. + + Let's create a map that maps 1-3 -> 'a', 4-6 -> 'b' + + >>> r = RangeMap({3: 'a', 6: 'b'}) # boy, that was easy + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + Even float values should work so long as the comparison operator + supports it. + + >>> r[4.5] + 'b' + + But you'll notice that the way rangemap is defined, it must be open-ended + on one side. + + >>> r[0] + 'a' + >>> r[-1] + 'a' + + One can close the open-end of the RangeMap by using undefined_value + + >>> r = RangeMap({0: RangeMap.undefined_value, 3: 'a', 6: 'b'}) + >>> r[0] + Traceback (most recent call last): + ... + KeyError: 0 + + One can get the first or last elements in the range by using RangeMap.Item + + >>> last_item = RangeMap.Item(-1) + >>> r[last_item] + 'b' + + .last_item is a shortcut for Item(-1) + + >>> r[RangeMap.last_item] + 'b' + + Sometimes it's useful to find the bounds for a RangeMap + + >>> r.bounds() + (0, 6) + + RangeMap supports .get(key, default) + + >>> r.get(0, 'not found') + 'not found' + + >>> r.get(7, 'not found') + 'not found' + + One often wishes to define the ranges by their left-most values, + which requires use of sort params and a key_match_comparator. + + >>> r = RangeMap({1: 'a', 4: 'b'}, + ... sort_params=dict(reverse=True), + ... key_match_comparator=operator.ge) + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + That wasn't nearly as easy as before, so an alternate constructor + is provided: + + >>> r = RangeMap.left({1: 'a', 4: 'b', 7: RangeMap.undefined_value}) + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + """ + + def __init__(self, source, sort_params={}, key_match_comparator=operator.le): + dict.__init__(self, source) + self.sort_params = sort_params + self.match = key_match_comparator + + @classmethod + def left(cls, source): + return cls( + source, sort_params=dict(reverse=True), key_match_comparator=operator.ge + ) + + def __getitem__(self, item): + sorted_keys = sorted(self.keys(), **self.sort_params) + if isinstance(item, RangeMap.Item): + result = self.__getitem__(sorted_keys[item]) + else: + key = self._find_first_match_(sorted_keys, item) + result = dict.__getitem__(self, key) + if result is RangeMap.undefined_value: + raise KeyError(key) + return result + + def get(self, key, default=None): + """ + Return the value for key if key is in the dictionary, else default. + If default is not given, it defaults to None, so that this method + never raises a KeyError. + """ + try: + return self[key] + except KeyError: + return default + + def _find_first_match_(self, keys, item): + is_match = functools.partial(self.match, item) + matches = list(filter(is_match, keys)) + if matches: + return matches[0] + raise KeyError(item) + + def bounds(self): + sorted_keys = sorted(self.keys(), **self.sort_params) + return (sorted_keys[RangeMap.first_item], sorted_keys[RangeMap.last_item]) + + # some special values for the RangeMap + undefined_value = type(str('RangeValueUndefined'), (), {})() + + class Item(int): + "RangeMap Item" + + first_item = Item(0) + last_item = Item(-1) diff --git a/libs/common/setuptools/_distutils/_functools.py b/libs/common/setuptools/_distutils/_functools.py new file mode 100644 index 00000000..e7053bac --- /dev/null +++ b/libs/common/setuptools/_distutils/_functools.py @@ -0,0 +1,20 @@ +import functools + + +# from jaraco.functools 3.5 +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper diff --git a/libs/common/setuptools/_distutils/_log.py b/libs/common/setuptools/_distutils/_log.py new file mode 100644 index 00000000..4a2ae0ac --- /dev/null +++ b/libs/common/setuptools/_distutils/_log.py @@ -0,0 +1,4 @@ +import logging + + +log = logging.getLogger() diff --git a/libs/common/setuptools/_distutils/_macos_compat.py b/libs/common/setuptools/_distutils/_macos_compat.py new file mode 100644 index 00000000..17769e91 --- /dev/null +++ b/libs/common/setuptools/_distutils/_macos_compat.py @@ -0,0 +1,12 @@ +import sys +import importlib + + +def bypass_compiler_fixup(cmd, args): + return cmd + + +if sys.platform == 'darwin': + compiler_fixup = importlib.import_module('_osx_support').compiler_fixup +else: + compiler_fixup = bypass_compiler_fixup diff --git a/libs/common/setuptools/_distutils/_msvccompiler.py b/libs/common/setuptools/_distutils/_msvccompiler.py new file mode 100644 index 00000000..8b4023c4 --- /dev/null +++ b/libs/common/setuptools/_distutils/_msvccompiler.py @@ -0,0 +1,572 @@ +"""distutils._msvccompiler + +Contains MSVCCompiler, an implementation of the abstract CCompiler class +for Microsoft Visual Studio 2015. + +The module is compatible with VS 2015 and later. You can find legacy support +for older versions in distutils.msvc9compiler and distutils.msvccompiler. +""" + +# Written by Perry Stoll +# hacked by Robin Becker and Thomas Heller to do a better job of +# finding DevStudio (through the registry) +# ported to VS 2005 and VS 2008 by Christian Heimes +# ported to VS 2015 by Steve Dower + +import os +import subprocess +import contextlib +import warnings +import unittest.mock as mock + +with contextlib.suppress(ImportError): + import winreg + +from .errors import ( + DistutilsExecError, + DistutilsPlatformError, + CompileError, + LibError, + LinkError, +) +from .ccompiler import CCompiler, gen_lib_options +from ._log import log +from .util import get_platform + +from itertools import count + + +def _find_vc2015(): + try: + key = winreg.OpenKeyEx( + winreg.HKEY_LOCAL_MACHINE, + r"Software\Microsoft\VisualStudio\SxS\VC7", + access=winreg.KEY_READ | winreg.KEY_WOW64_32KEY, + ) + except OSError: + log.debug("Visual C++ is not registered") + return None, None + + best_version = 0 + best_dir = None + with key: + for i in count(): + try: + v, vc_dir, vt = winreg.EnumValue(key, i) + except OSError: + break + if v and vt == winreg.REG_SZ and os.path.isdir(vc_dir): + try: + version = int(float(v)) + except (ValueError, TypeError): + continue + if version >= 14 and version > best_version: + best_version, best_dir = version, vc_dir + return best_version, best_dir + + +def _find_vc2017(): + """Returns "15, path" based on the result of invoking vswhere.exe + If no install is found, returns "None, None" + + The version is returned to avoid unnecessarily changing the function + result. It may be ignored when the path is not None. + + If vswhere.exe is not available, by definition, VS 2017 is not + installed. + """ + root = os.environ.get("ProgramFiles(x86)") or os.environ.get("ProgramFiles") + if not root: + return None, None + + try: + path = subprocess.check_output( + [ + os.path.join( + root, "Microsoft Visual Studio", "Installer", "vswhere.exe" + ), + "-latest", + "-prerelease", + "-requires", + "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "-property", + "installationPath", + "-products", + "*", + ], + encoding="mbcs", + errors="strict", + ).strip() + except (subprocess.CalledProcessError, OSError, UnicodeDecodeError): + return None, None + + path = os.path.join(path, "VC", "Auxiliary", "Build") + if os.path.isdir(path): + return 15, path + + return None, None + + +PLAT_SPEC_TO_RUNTIME = { + 'x86': 'x86', + 'x86_amd64': 'x64', + 'x86_arm': 'arm', + 'x86_arm64': 'arm64', +} + + +def _find_vcvarsall(plat_spec): + # bpo-38597: Removed vcruntime return value + _, best_dir = _find_vc2017() + + if not best_dir: + best_version, best_dir = _find_vc2015() + + if not best_dir: + log.debug("No suitable Visual C++ version found") + return None, None + + vcvarsall = os.path.join(best_dir, "vcvarsall.bat") + if not os.path.isfile(vcvarsall): + log.debug("%s cannot be found", vcvarsall) + return None, None + + return vcvarsall, None + + +def _get_vc_env(plat_spec): + if os.getenv("DISTUTILS_USE_SDK"): + return {key.lower(): value for key, value in os.environ.items()} + + vcvarsall, _ = _find_vcvarsall(plat_spec) + if not vcvarsall: + raise DistutilsPlatformError("Unable to find vcvarsall.bat") + + try: + out = subprocess.check_output( + f'cmd /u /c "{vcvarsall}" {plat_spec} && set', + stderr=subprocess.STDOUT, + ).decode('utf-16le', errors='replace') + except subprocess.CalledProcessError as exc: + log.error(exc.output) + raise DistutilsPlatformError(f"Error executing {exc.cmd}") + + env = { + key.lower(): value + for key, _, value in (line.partition('=') for line in out.splitlines()) + if key and value + } + + return env + + +def _find_exe(exe, paths=None): + """Return path to an MSVC executable program. + + Tries to find the program in several places: first, one of the + MSVC program search paths from the registry; next, the directories + in the PATH environment variable. If any of those work, return an + absolute path that is known to exist. If none of them work, just + return the original program name, 'exe'. + """ + if not paths: + paths = os.getenv('path').split(os.pathsep) + for p in paths: + fn = os.path.join(os.path.abspath(p), exe) + if os.path.isfile(fn): + return fn + return exe + + +# A map keyed by get_platform() return values to values accepted by +# 'vcvarsall.bat'. Always cross-compile from x86 to work with the +# lighter-weight MSVC installs that do not include native 64-bit tools. +PLAT_TO_VCVARS = { + 'win32': 'x86', + 'win-amd64': 'x86_amd64', + 'win-arm32': 'x86_arm', + 'win-arm64': 'x86_arm64', +} + + +class MSVCCompiler(CCompiler): + """Concrete class that implements an interface to Microsoft Visual C++, + as defined by the CCompiler abstract class.""" + + compiler_type = 'msvc' + + # Just set this so CCompiler's constructor doesn't barf. We currently + # don't use the 'set_executables()' bureaucracy provided by CCompiler, + # as it really isn't necessary for this sort of single-compiler class. + # Would be nice to have a consistent interface with UnixCCompiler, + # though, so it's worth thinking about. + executables = {} + + # Private class data (need to distinguish C from C++ source for compiler) + _c_extensions = ['.c'] + _cpp_extensions = ['.cc', '.cpp', '.cxx'] + _rc_extensions = ['.rc'] + _mc_extensions = ['.mc'] + + # Needed for the filename generation methods provided by the + # base class, CCompiler. + src_extensions = _c_extensions + _cpp_extensions + _rc_extensions + _mc_extensions + res_extension = '.res' + obj_extension = '.obj' + static_lib_extension = '.lib' + shared_lib_extension = '.dll' + static_lib_format = shared_lib_format = '%s%s' + exe_extension = '.exe' + + def __init__(self, verbose=0, dry_run=0, force=0): + super().__init__(verbose, dry_run, force) + # target platform (.plat_name is consistent with 'bdist') + self.plat_name = None + self.initialized = False + + @classmethod + def _configure(cls, vc_env): + """ + Set class-level include/lib dirs. + """ + cls.include_dirs = cls._parse_path(vc_env.get('include', '')) + cls.library_dirs = cls._parse_path(vc_env.get('lib', '')) + + @staticmethod + def _parse_path(val): + return [dir.rstrip(os.sep) for dir in val.split(os.pathsep) if dir] + + def initialize(self, plat_name=None): + # multi-init means we would need to check platform same each time... + assert not self.initialized, "don't init multiple times" + if plat_name is None: + plat_name = get_platform() + # sanity check for platforms to prevent obscure errors later. + if plat_name not in PLAT_TO_VCVARS: + raise DistutilsPlatformError( + f"--plat-name must be one of {tuple(PLAT_TO_VCVARS)}" + ) + + # Get the vcvarsall.bat spec for the requested platform. + plat_spec = PLAT_TO_VCVARS[plat_name] + + vc_env = _get_vc_env(plat_spec) + if not vc_env: + raise DistutilsPlatformError( + "Unable to find a compatible " "Visual Studio installation." + ) + self._configure(vc_env) + + self._paths = vc_env.get('path', '') + paths = self._paths.split(os.pathsep) + self.cc = _find_exe("cl.exe", paths) + self.linker = _find_exe("link.exe", paths) + self.lib = _find_exe("lib.exe", paths) + self.rc = _find_exe("rc.exe", paths) # resource compiler + self.mc = _find_exe("mc.exe", paths) # message compiler + self.mt = _find_exe("mt.exe", paths) # message compiler + + self.preprocess_options = None + # bpo-38597: Always compile with dynamic linking + # Future releases of Python 3.x will include all past + # versions of vcruntime*.dll for compatibility. + self.compile_options = ['/nologo', '/O2', '/W3', '/GL', '/DNDEBUG', '/MD'] + + self.compile_options_debug = [ + '/nologo', + '/Od', + '/MDd', + '/Zi', + '/W3', + '/D_DEBUG', + ] + + ldflags = ['/nologo', '/INCREMENTAL:NO', '/LTCG'] + + ldflags_debug = ['/nologo', '/INCREMENTAL:NO', '/LTCG', '/DEBUG:FULL'] + + self.ldflags_exe = [*ldflags, '/MANIFEST:EMBED,ID=1'] + self.ldflags_exe_debug = [*ldflags_debug, '/MANIFEST:EMBED,ID=1'] + self.ldflags_shared = [ + *ldflags, + '/DLL', + '/MANIFEST:EMBED,ID=2', + '/MANIFESTUAC:NO', + ] + self.ldflags_shared_debug = [ + *ldflags_debug, + '/DLL', + '/MANIFEST:EMBED,ID=2', + '/MANIFESTUAC:NO', + ] + self.ldflags_static = [*ldflags] + self.ldflags_static_debug = [*ldflags_debug] + + self._ldflags = { + (CCompiler.EXECUTABLE, None): self.ldflags_exe, + (CCompiler.EXECUTABLE, False): self.ldflags_exe, + (CCompiler.EXECUTABLE, True): self.ldflags_exe_debug, + (CCompiler.SHARED_OBJECT, None): self.ldflags_shared, + (CCompiler.SHARED_OBJECT, False): self.ldflags_shared, + (CCompiler.SHARED_OBJECT, True): self.ldflags_shared_debug, + (CCompiler.SHARED_LIBRARY, None): self.ldflags_static, + (CCompiler.SHARED_LIBRARY, False): self.ldflags_static, + (CCompiler.SHARED_LIBRARY, True): self.ldflags_static_debug, + } + + self.initialized = True + + # -- Worker methods ------------------------------------------------ + + @property + def out_extensions(self): + return { + **super().out_extensions, + **{ + ext: self.res_extension + for ext in self._rc_extensions + self._mc_extensions + }, + } + + def compile( # noqa: C901 + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None, + ): + + if not self.initialized: + self.initialize() + compile_info = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + macros, objects, extra_postargs, pp_opts, build = compile_info + + compile_opts = extra_preargs or [] + compile_opts.append('/c') + if debug: + compile_opts.extend(self.compile_options_debug) + else: + compile_opts.extend(self.compile_options) + + add_cpp_opts = False + + for obj in objects: + try: + src, ext = build[obj] + except KeyError: + continue + if debug: + # pass the full pathname to MSVC in debug mode, + # this allows the debugger to find the source file + # without asking the user to browse for it + src = os.path.abspath(src) + + if ext in self._c_extensions: + input_opt = "/Tc" + src + elif ext in self._cpp_extensions: + input_opt = "/Tp" + src + add_cpp_opts = True + elif ext in self._rc_extensions: + # compile .RC to .RES file + input_opt = src + output_opt = "/fo" + obj + try: + self.spawn([self.rc] + pp_opts + [output_opt, input_opt]) + except DistutilsExecError as msg: + raise CompileError(msg) + continue + elif ext in self._mc_extensions: + # Compile .MC to .RC file to .RES file. + # * '-h dir' specifies the directory for the + # generated include file + # * '-r dir' specifies the target directory of the + # generated RC file and the binary message resource + # it includes + # + # For now (since there are no options to change this), + # we use the source-directory for the include file and + # the build directory for the RC file and message + # resources. This works at least for win32all. + h_dir = os.path.dirname(src) + rc_dir = os.path.dirname(obj) + try: + # first compile .MC to .RC and .H file + self.spawn([self.mc, '-h', h_dir, '-r', rc_dir, src]) + base, _ = os.path.splitext(os.path.basename(src)) + rc_file = os.path.join(rc_dir, base + '.rc') + # then compile .RC to .RES file + self.spawn([self.rc, "/fo" + obj, rc_file]) + + except DistutilsExecError as msg: + raise CompileError(msg) + continue + else: + # how to handle this file? + raise CompileError(f"Don't know how to compile {src} to {obj}") + + args = [self.cc] + compile_opts + pp_opts + if add_cpp_opts: + args.append('/EHsc') + args.append(input_opt) + args.append("/Fo" + obj) + args.extend(extra_postargs) + + try: + self.spawn(args) + except DistutilsExecError as msg: + raise CompileError(msg) + + return objects + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=0, target_lang=None + ): + + if not self.initialized: + self.initialize() + objects, output_dir = self._fix_object_args(objects, output_dir) + output_filename = self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + lib_args = objects + ['/OUT:' + output_filename] + if debug: + pass # XXX what goes here? + try: + log.debug('Executing "%s" %s', self.lib, ' '.join(lib_args)) + self.spawn([self.lib] + lib_args) + except DistutilsExecError as msg: + raise LibError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def link( + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + + if not self.initialized: + self.initialize() + objects, output_dir = self._fix_object_args(objects, output_dir) + fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) + libraries, library_dirs, runtime_library_dirs = fixed_args + + if runtime_library_dirs: + self.warn( + "I don't know what to do with 'runtime_library_dirs': " + + str(runtime_library_dirs) + ) + + lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries) + if output_dir is not None: + output_filename = os.path.join(output_dir, output_filename) + + if self._need_link(objects, output_filename): + ldflags = self._ldflags[target_desc, debug] + + export_opts = ["/EXPORT:" + sym for sym in (export_symbols or [])] + + ld_args = ( + ldflags + lib_opts + export_opts + objects + ['/OUT:' + output_filename] + ) + + # The MSVC linker generates .lib and .exp files, which cannot be + # suppressed by any linker switches. The .lib files may even be + # needed! Make sure they are generated in the temporary build + # directory. Since they have different names for debug and release + # builds, they can go into the same directory. + build_temp = os.path.dirname(objects[0]) + if export_symbols is not None: + (dll_name, dll_ext) = os.path.splitext( + os.path.basename(output_filename) + ) + implib_file = os.path.join(build_temp, self.library_filename(dll_name)) + ld_args.append('/IMPLIB:' + implib_file) + + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + + output_dir = os.path.dirname(os.path.abspath(output_filename)) + self.mkpath(output_dir) + try: + log.debug('Executing "%s" %s', self.linker, ' '.join(ld_args)) + self.spawn([self.linker] + ld_args) + except DistutilsExecError as msg: + raise LinkError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def spawn(self, cmd): + env = dict(os.environ, PATH=self._paths) + with self._fallback_spawn(cmd, env) as fallback: + return super().spawn(cmd, env=env) + return fallback.value + + @contextlib.contextmanager + def _fallback_spawn(self, cmd, env): + """ + Discovered in pypa/distutils#15, some tools monkeypatch the compiler, + so the 'env' kwarg causes a TypeError. Detect this condition and + restore the legacy, unsafe behavior. + """ + bag = type('Bag', (), {})() + try: + yield bag + except TypeError as exc: + if "unexpected keyword argument 'env'" not in str(exc): + raise + else: + return + warnings.warn("Fallback spawn triggered. Please update distutils monkeypatch.") + with mock.patch.dict('os.environ', env): + bag.value = super().spawn(cmd) + + # -- Miscellaneous methods ----------------------------------------- + # These are all used by the 'gen_lib_options() function, in + # ccompiler.py. + + def library_dir_option(self, dir): + return "/LIBPATH:" + dir + + def runtime_library_dir_option(self, dir): + raise DistutilsPlatformError( + "don't know how to set runtime library search path for MSVC" + ) + + def library_option(self, lib): + return self.library_filename(lib) + + def find_library_file(self, dirs, lib, debug=0): + # Prefer a debugging library if found (and requested), but deal + # with it if we don't have one. + if debug: + try_names = [lib + "_d", lib] + else: + try_names = [lib] + for dir in dirs: + for name in try_names: + libfile = os.path.join(dir, self.library_filename(name)) + if os.path.isfile(libfile): + return libfile + else: + # Oops, didn't find it in *any* of 'dirs' + return None diff --git a/libs/common/setuptools/_distutils/archive_util.py b/libs/common/setuptools/_distutils/archive_util.py new file mode 100644 index 00000000..7f9e1e00 --- /dev/null +++ b/libs/common/setuptools/_distutils/archive_util.py @@ -0,0 +1,280 @@ +"""distutils.archive_util + +Utility functions for creating archive files (tarballs, zip files, +that sort of thing).""" + +import os +from warnings import warn +import sys + +try: + import zipfile +except ImportError: + zipfile = None + + +from .errors import DistutilsExecError +from .spawn import spawn +from .dir_util import mkpath +from ._log import log + +try: + from pwd import getpwnam +except ImportError: + getpwnam = None + +try: + from grp import getgrnam +except ImportError: + getgrnam = None + + +def _get_gid(name): + """Returns a gid, given a group name.""" + if getgrnam is None or name is None: + return None + try: + result = getgrnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + + +def _get_uid(name): + """Returns an uid, given a user name.""" + if getpwnam is None or name is None: + return None + try: + result = getpwnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + + +def make_tarball( + base_name, base_dir, compress="gzip", verbose=0, dry_run=0, owner=None, group=None +): + """Create a (possibly compressed) tar file from all the files under + 'base_dir'. + + 'compress' must be "gzip" (the default), "bzip2", "xz", "compress", or + None. ("compress" will be deprecated in Python 3.2) + + 'owner' and 'group' can be used to define an owner and a group for the + archive that is being built. If not provided, the current owner and group + will be used. + + The output tar file will be named 'base_dir' + ".tar", possibly plus + the appropriate compression extension (".gz", ".bz2", ".xz" or ".Z"). + + Returns the output filename. + """ + tar_compression = { + 'gzip': 'gz', + 'bzip2': 'bz2', + 'xz': 'xz', + None: '', + 'compress': '', + } + compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'xz': '.xz', 'compress': '.Z'} + + # flags for compression program, each element of list will be an argument + if compress is not None and compress not in compress_ext.keys(): + raise ValueError( + "bad value for 'compress': must be None, 'gzip', 'bzip2', " + "'xz' or 'compress'" + ) + + archive_name = base_name + '.tar' + if compress != 'compress': + archive_name += compress_ext.get(compress, '') + + mkpath(os.path.dirname(archive_name), dry_run=dry_run) + + # creating the tarball + import tarfile # late import so Python build itself doesn't break + + log.info('Creating tar archive') + + uid = _get_uid(owner) + gid = _get_gid(group) + + def _set_uid_gid(tarinfo): + if gid is not None: + tarinfo.gid = gid + tarinfo.gname = group + if uid is not None: + tarinfo.uid = uid + tarinfo.uname = owner + return tarinfo + + if not dry_run: + tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + try: + tar.add(base_dir, filter=_set_uid_gid) + finally: + tar.close() + + # compression using `compress` + if compress == 'compress': + warn("'compress' is deprecated.", DeprecationWarning) + # the option varies depending on the platform + compressed_name = archive_name + compress_ext[compress] + if sys.platform == 'win32': + cmd = [compress, archive_name, compressed_name] + else: + cmd = [compress, '-f', archive_name] + spawn(cmd, dry_run=dry_run) + return compressed_name + + return archive_name + + +def make_zipfile(base_name, base_dir, verbose=0, dry_run=0): # noqa: C901 + """Create a zip file from all the files under 'base_dir'. + + The output zip file will be named 'base_name' + ".zip". Uses either the + "zipfile" Python module (if available) or the InfoZIP "zip" utility + (if installed and found on the default search path). If neither tool is + available, raises DistutilsExecError. Returns the name of the output zip + file. + """ + zip_filename = base_name + ".zip" + mkpath(os.path.dirname(zip_filename), dry_run=dry_run) + + # If zipfile module is not available, try spawning an external + # 'zip' command. + if zipfile is None: + if verbose: + zipoptions = "-r" + else: + zipoptions = "-rq" + + try: + spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) + except DistutilsExecError: + # XXX really should distinguish between "couldn't find + # external 'zip' command" and "zip failed". + raise DistutilsExecError( + ( + "unable to create zip file '%s': " + "could neither import the 'zipfile' module nor " + "find a standalone zip utility" + ) + % zip_filename + ) + + else: + log.info("creating '%s' and adding '%s' to it", zip_filename, base_dir) + + if not dry_run: + try: + zip = zipfile.ZipFile( + zip_filename, "w", compression=zipfile.ZIP_DEFLATED + ) + except RuntimeError: + zip = zipfile.ZipFile(zip_filename, "w", compression=zipfile.ZIP_STORED) + + with zip: + if base_dir != os.curdir: + path = os.path.normpath(os.path.join(base_dir, '')) + zip.write(path, path) + log.info("adding '%s'", path) + for dirpath, dirnames, filenames in os.walk(base_dir): + for name in dirnames: + path = os.path.normpath(os.path.join(dirpath, name, '')) + zip.write(path, path) + log.info("adding '%s'", path) + for name in filenames: + path = os.path.normpath(os.path.join(dirpath, name)) + if os.path.isfile(path): + zip.write(path, path) + log.info("adding '%s'", path) + + return zip_filename + + +ARCHIVE_FORMATS = { + 'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), + 'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), + 'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"), + 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"), + 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (make_zipfile, [], "ZIP file"), +} + + +def check_archive_formats(formats): + """Returns the first format from the 'format' list that is unknown. + + If all formats are known, returns None + """ + for format in formats: + if format not in ARCHIVE_FORMATS: + return format + return None + + +def make_archive( + base_name, + format, + root_dir=None, + base_dir=None, + verbose=0, + dry_run=0, + owner=None, + group=None, +): + """Create an archive file (eg. zip or tar). + + 'base_name' is the name of the file to create, minus any format-specific + extension; 'format' is the archive format: one of "zip", "tar", "gztar", + "bztar", "xztar", or "ztar". + + 'root_dir' is a directory that will be the root directory of the + archive; ie. we typically chdir into 'root_dir' before creating the + archive. 'base_dir' is the directory where we start archiving from; + ie. 'base_dir' will be the common prefix of all files and + directories in the archive. 'root_dir' and 'base_dir' both default + to the current directory. Returns the name of the archive file. + + 'owner' and 'group' are used when creating a tar archive. By default, + uses the current owner and group. + """ + save_cwd = os.getcwd() + if root_dir is not None: + log.debug("changing into '%s'", root_dir) + base_name = os.path.abspath(base_name) + if not dry_run: + os.chdir(root_dir) + + if base_dir is None: + base_dir = os.curdir + + kwargs = {'dry_run': dry_run} + + try: + format_info = ARCHIVE_FORMATS[format] + except KeyError: + raise ValueError("unknown archive format '%s'" % format) + + func = format_info[0] + for arg, val in format_info[1]: + kwargs[arg] = val + + if format != 'zip': + kwargs['owner'] = owner + kwargs['group'] = group + + try: + filename = func(base_name, base_dir, **kwargs) + finally: + if root_dir is not None: + log.debug("changing back to '%s'", save_cwd) + os.chdir(save_cwd) + + return filename diff --git a/libs/common/setuptools/_distutils/bcppcompiler.py b/libs/common/setuptools/_distutils/bcppcompiler.py new file mode 100644 index 00000000..5d6b8653 --- /dev/null +++ b/libs/common/setuptools/_distutils/bcppcompiler.py @@ -0,0 +1,408 @@ +"""distutils.bcppcompiler + +Contains BorlandCCompiler, an implementation of the abstract CCompiler class +for the Borland C++ compiler. +""" + +# This implementation by Lyle Johnson, based on the original msvccompiler.py +# module and using the directions originally published by Gordon Williams. + +# XXX looks like there's a LOT of overlap between these two classes: +# someone should sit down and factor out the common code as +# WindowsCCompiler! --GPW + + +import os +import warnings + +from .errors import ( + DistutilsExecError, + CompileError, + LibError, + LinkError, + UnknownFileError, +) +from .ccompiler import CCompiler, gen_preprocess_options +from .file_util import write_file +from .dep_util import newer +from ._log import log + + +warnings.warn( + "bcppcompiler is deprecated and slated to be removed " + "in the future. Please discontinue use or file an issue " + "with pypa/distutils describing your use case.", + DeprecationWarning, +) + + +class BCPPCompiler(CCompiler): + """Concrete class that implements an interface to the Borland C/C++ + compiler, as defined by the CCompiler abstract class. + """ + + compiler_type = 'bcpp' + + # Just set this so CCompiler's constructor doesn't barf. We currently + # don't use the 'set_executables()' bureaucracy provided by CCompiler, + # as it really isn't necessary for this sort of single-compiler class. + # Would be nice to have a consistent interface with UnixCCompiler, + # though, so it's worth thinking about. + executables = {} + + # Private class data (need to distinguish C from C++ source for compiler) + _c_extensions = ['.c'] + _cpp_extensions = ['.cc', '.cpp', '.cxx'] + + # Needed for the filename generation methods provided by the + # base class, CCompiler. + src_extensions = _c_extensions + _cpp_extensions + obj_extension = '.obj' + static_lib_extension = '.lib' + shared_lib_extension = '.dll' + static_lib_format = shared_lib_format = '%s%s' + exe_extension = '.exe' + + def __init__(self, verbose=0, dry_run=0, force=0): + + super().__init__(verbose, dry_run, force) + + # These executables are assumed to all be in the path. + # Borland doesn't seem to use any special registry settings to + # indicate their installation locations. + + self.cc = "bcc32.exe" + self.linker = "ilink32.exe" + self.lib = "tlib.exe" + + self.preprocess_options = None + self.compile_options = ['/tWM', '/O2', '/q', '/g0'] + self.compile_options_debug = ['/tWM', '/Od', '/q', '/g0'] + + self.ldflags_shared = ['/Tpd', '/Gn', '/q', '/x'] + self.ldflags_shared_debug = ['/Tpd', '/Gn', '/q', '/x'] + self.ldflags_static = [] + self.ldflags_exe = ['/Gn', '/q', '/x'] + self.ldflags_exe_debug = ['/Gn', '/q', '/x', '/r'] + + # -- Worker methods ------------------------------------------------ + + def compile( # noqa: C901 + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None, + ): + + macros, objects, extra_postargs, pp_opts, build = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + compile_opts = extra_preargs or [] + compile_opts.append('-c') + if debug: + compile_opts.extend(self.compile_options_debug) + else: + compile_opts.extend(self.compile_options) + + for obj in objects: + try: + src, ext = build[obj] + except KeyError: + continue + # XXX why do the normpath here? + src = os.path.normpath(src) + obj = os.path.normpath(obj) + # XXX _setup_compile() did a mkpath() too but before the normpath. + # Is it possible to skip the normpath? + self.mkpath(os.path.dirname(obj)) + + if ext == '.res': + # This is already a binary file -- skip it. + continue # the 'for' loop + if ext == '.rc': + # This needs to be compiled to a .res file -- do it now. + try: + self.spawn(["brcc32", "-fo", obj, src]) + except DistutilsExecError as msg: + raise CompileError(msg) + continue # the 'for' loop + + # The next two are both for the real compiler. + if ext in self._c_extensions: + input_opt = "" + elif ext in self._cpp_extensions: + input_opt = "-P" + else: + # Unknown file type -- no extra options. The compiler + # will probably fail, but let it just in case this is a + # file the compiler recognizes even if we don't. + input_opt = "" + + output_opt = "-o" + obj + + # Compiler command line syntax is: "bcc32 [options] file(s)". + # Note that the source file names must appear at the end of + # the command line. + try: + self.spawn( + [self.cc] + + compile_opts + + pp_opts + + [input_opt, output_opt] + + extra_postargs + + [src] + ) + except DistutilsExecError as msg: + raise CompileError(msg) + + return objects + + # compile () + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=0, target_lang=None + ): + + (objects, output_dir) = self._fix_object_args(objects, output_dir) + output_filename = self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + lib_args = [output_filename, '/u'] + objects + if debug: + pass # XXX what goes here? + try: + self.spawn([self.lib] + lib_args) + except DistutilsExecError as msg: + raise LibError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + # create_static_lib () + + def link( # noqa: C901 + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + + # XXX this ignores 'build_temp'! should follow the lead of + # msvccompiler.py + + (objects, output_dir) = self._fix_object_args(objects, output_dir) + (libraries, library_dirs, runtime_library_dirs) = self._fix_lib_args( + libraries, library_dirs, runtime_library_dirs + ) + + if runtime_library_dirs: + log.warning( + "I don't know what to do with 'runtime_library_dirs': %s", + str(runtime_library_dirs), + ) + + if output_dir is not None: + output_filename = os.path.join(output_dir, output_filename) + + if self._need_link(objects, output_filename): + + # Figure out linker args based on type of target. + if target_desc == CCompiler.EXECUTABLE: + startup_obj = 'c0w32' + if debug: + ld_args = self.ldflags_exe_debug[:] + else: + ld_args = self.ldflags_exe[:] + else: + startup_obj = 'c0d32' + if debug: + ld_args = self.ldflags_shared_debug[:] + else: + ld_args = self.ldflags_shared[:] + + # Create a temporary exports file for use by the linker + if export_symbols is None: + def_file = '' + else: + head, tail = os.path.split(output_filename) + modname, ext = os.path.splitext(tail) + temp_dir = os.path.dirname(objects[0]) # preserve tree structure + def_file = os.path.join(temp_dir, '%s.def' % modname) + contents = ['EXPORTS'] + for sym in export_symbols or []: + contents.append(' {}=_{}'.format(sym, sym)) + self.execute(write_file, (def_file, contents), "writing %s" % def_file) + + # Borland C++ has problems with '/' in paths + objects2 = map(os.path.normpath, objects) + # split objects in .obj and .res files + # Borland C++ needs them at different positions in the command line + objects = [startup_obj] + resources = [] + for file in objects2: + (base, ext) = os.path.splitext(os.path.normcase(file)) + if ext == '.res': + resources.append(file) + else: + objects.append(file) + + for ell in library_dirs: + ld_args.append("/L%s" % os.path.normpath(ell)) + ld_args.append("/L.") # we sometimes use relative paths + + # list of object files + ld_args.extend(objects) + + # XXX the command-line syntax for Borland C++ is a bit wonky; + # certain filenames are jammed together in one big string, but + # comma-delimited. This doesn't mesh too well with the + # Unix-centric attitude (with a DOS/Windows quoting hack) of + # 'spawn()', so constructing the argument list is a bit + # awkward. Note that doing the obvious thing and jamming all + # the filenames and commas into one argument would be wrong, + # because 'spawn()' would quote any filenames with spaces in + # them. Arghghh!. Apparently it works fine as coded... + + # name of dll/exe file + ld_args.extend([',', output_filename]) + # no map file and start libraries + ld_args.append(',,') + + for lib in libraries: + # see if we find it and if there is a bcpp specific lib + # (xxx_bcpp.lib) + libfile = self.find_library_file(library_dirs, lib, debug) + if libfile is None: + ld_args.append(lib) + # probably a BCPP internal library -- don't warn + else: + # full name which prefers bcpp_xxx.lib over xxx.lib + ld_args.append(libfile) + + # some default libraries + ld_args.append('import32') + ld_args.append('cw32mt') + + # def file for export symbols + ld_args.extend([',', def_file]) + # add resource files + ld_args.append(',') + ld_args.extend(resources) + + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + + self.mkpath(os.path.dirname(output_filename)) + try: + self.spawn([self.linker] + ld_args) + except DistutilsExecError as msg: + raise LinkError(msg) + + else: + log.debug("skipping %s (up-to-date)", output_filename) + + # link () + + # -- Miscellaneous methods ----------------------------------------- + + def find_library_file(self, dirs, lib, debug=0): + # List of effective library names to try, in order of preference: + # xxx_bcpp.lib is better than xxx.lib + # and xxx_d.lib is better than xxx.lib if debug is set + # + # The "_bcpp" suffix is to handle a Python installation for people + # with multiple compilers (primarily Distutils hackers, I suspect + # ;-). The idea is they'd have one static library for each + # compiler they care about, since (almost?) every Windows compiler + # seems to have a different format for static libraries. + if debug: + dlib = lib + "_d" + try_names = (dlib + "_bcpp", lib + "_bcpp", dlib, lib) + else: + try_names = (lib + "_bcpp", lib) + + for dir in dirs: + for name in try_names: + libfile = os.path.join(dir, self.library_filename(name)) + if os.path.exists(libfile): + return libfile + else: + # Oops, didn't find it in *any* of 'dirs' + return None + + # overwrite the one from CCompiler to support rc and res-files + def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): + if output_dir is None: + output_dir = '' + obj_names = [] + for src_name in source_filenames: + # use normcase to make sure '.rc' is really '.rc' and not '.RC' + (base, ext) = os.path.splitext(os.path.normcase(src_name)) + if ext not in (self.src_extensions + ['.rc', '.res']): + raise UnknownFileError( + "unknown file type '{}' (from '{}')".format(ext, src_name) + ) + if strip_dir: + base = os.path.basename(base) + if ext == '.res': + # these can go unchanged + obj_names.append(os.path.join(output_dir, base + ext)) + elif ext == '.rc': + # these need to be compiled to .res-files + obj_names.append(os.path.join(output_dir, base + '.res')) + else: + obj_names.append(os.path.join(output_dir, base + self.obj_extension)) + return obj_names + + # object_filenames () + + def preprocess( + self, + source, + output_file=None, + macros=None, + include_dirs=None, + extra_preargs=None, + extra_postargs=None, + ): + + (_, macros, include_dirs) = self._fix_compile_args(None, macros, include_dirs) + pp_opts = gen_preprocess_options(macros, include_dirs) + pp_args = ['cpp32.exe'] + pp_opts + if output_file is not None: + pp_args.append('-o' + output_file) + if extra_preargs: + pp_args[:0] = extra_preargs + if extra_postargs: + pp_args.extend(extra_postargs) + pp_args.append(source) + + # We need to preprocess: either we're being forced to, or the + # source file is newer than the target (or the target doesn't + # exist). + if self.force or output_file is None or newer(source, output_file): + if output_file: + self.mkpath(os.path.dirname(output_file)) + try: + self.spawn(pp_args) + except DistutilsExecError as msg: + print(msg) + raise CompileError(msg) + + # preprocess() diff --git a/libs/common/setuptools/_distutils/ccompiler.py b/libs/common/setuptools/_distutils/ccompiler.py new file mode 100644 index 00000000..64635311 --- /dev/null +++ b/libs/common/setuptools/_distutils/ccompiler.py @@ -0,0 +1,1220 @@ +"""distutils.ccompiler + +Contains CCompiler, an abstract base class that defines the interface +for the Distutils compiler abstraction model.""" + +import sys +import os +import re + +from .errors import ( + CompileError, + LinkError, + UnknownFileError, + DistutilsPlatformError, + DistutilsModuleError, +) +from .spawn import spawn +from .file_util import move_file +from .dir_util import mkpath +from .dep_util import newer_group +from .util import split_quoted, execute +from ._log import log + + +class CCompiler: + """Abstract base class to define the interface that must be implemented + by real compiler classes. Also has some utility methods used by + several compiler classes. + + The basic idea behind a compiler abstraction class is that each + instance can be used for all the compile/link steps in building a + single project. Thus, attributes common to all of those compile and + link steps -- include directories, macros to define, libraries to link + against, etc. -- are attributes of the compiler instance. To allow for + variability in how individual files are treated, most of those + attributes may be varied on a per-compilation or per-link basis. + """ + + # 'compiler_type' is a class attribute that identifies this class. It + # keeps code that wants to know what kind of compiler it's dealing with + # from having to import all possible compiler classes just to do an + # 'isinstance'. In concrete CCompiler subclasses, 'compiler_type' + # should really, really be one of the keys of the 'compiler_class' + # dictionary (see below -- used by the 'new_compiler()' factory + # function) -- authors of new compiler interface classes are + # responsible for updating 'compiler_class'! + compiler_type = None + + # XXX things not handled by this compiler abstraction model: + # * client can't provide additional options for a compiler, + # e.g. warning, optimization, debugging flags. Perhaps this + # should be the domain of concrete compiler abstraction classes + # (UnixCCompiler, MSVCCompiler, etc.) -- or perhaps the base + # class should have methods for the common ones. + # * can't completely override the include or library searchg + # path, ie. no "cc -I -Idir1 -Idir2" or "cc -L -Ldir1 -Ldir2". + # I'm not sure how widely supported this is even by Unix + # compilers, much less on other platforms. And I'm even less + # sure how useful it is; maybe for cross-compiling, but + # support for that is a ways off. (And anyways, cross + # compilers probably have a dedicated binary with the + # right paths compiled in. I hope.) + # * can't do really freaky things with the library list/library + # dirs, e.g. "-Ldir1 -lfoo -Ldir2 -lfoo" to link against + # different versions of libfoo.a in different locations. I + # think this is useless without the ability to null out the + # library search path anyways. + + # Subclasses that rely on the standard filename generation methods + # implemented below should override these; see the comment near + # those methods ('object_filenames()' et. al.) for details: + src_extensions = None # list of strings + obj_extension = None # string + static_lib_extension = None + shared_lib_extension = None # string + static_lib_format = None # format string + shared_lib_format = None # prob. same as static_lib_format + exe_extension = None # string + + # Default language settings. language_map is used to detect a source + # file or Extension target language, checking source filenames. + # language_order is used to detect the language precedence, when deciding + # what language to use when mixing source types. For example, if some + # extension has two files with ".c" extension, and one with ".cpp", it + # is still linked as c++. + language_map = { + ".c": "c", + ".cc": "c++", + ".cpp": "c++", + ".cxx": "c++", + ".m": "objc", + } + language_order = ["c++", "objc", "c"] + + include_dirs = [] + """ + include dirs specific to this compiler class + """ + + library_dirs = [] + """ + library dirs specific to this compiler class + """ + + def __init__(self, verbose=0, dry_run=0, force=0): + self.dry_run = dry_run + self.force = force + self.verbose = verbose + + # 'output_dir': a common output directory for object, library, + # shared object, and shared library files + self.output_dir = None + + # 'macros': a list of macro definitions (or undefinitions). A + # macro definition is a 2-tuple (name, value), where the value is + # either a string or None (no explicit value). A macro + # undefinition is a 1-tuple (name,). + self.macros = [] + + # 'include_dirs': a list of directories to search for include files + self.include_dirs = [] + + # 'libraries': a list of libraries to include in any link + # (library names, not filenames: eg. "foo" not "libfoo.a") + self.libraries = [] + + # 'library_dirs': a list of directories to search for libraries + self.library_dirs = [] + + # 'runtime_library_dirs': a list of directories to search for + # shared libraries/objects at runtime + self.runtime_library_dirs = [] + + # 'objects': a list of object files (or similar, such as explicitly + # named library files) to include on any link + self.objects = [] + + for key in self.executables.keys(): + self.set_executable(key, self.executables[key]) + + def set_executables(self, **kwargs): + """Define the executables (and options for them) that will be run + to perform the various stages of compilation. The exact set of + executables that may be specified here depends on the compiler + class (via the 'executables' class attribute), but most will have: + compiler the C/C++ compiler + linker_so linker used to create shared objects and libraries + linker_exe linker used to create binary executables + archiver static library creator + + On platforms with a command-line (Unix, DOS/Windows), each of these + is a string that will be split into executable name and (optional) + list of arguments. (Splitting the string is done similarly to how + Unix shells operate: words are delimited by spaces, but quotes and + backslashes can override this. See + 'distutils.util.split_quoted()'.) + """ + + # Note that some CCompiler implementation classes will define class + # attributes 'cpp', 'cc', etc. with hard-coded executable names; + # this is appropriate when a compiler class is for exactly one + # compiler/OS combination (eg. MSVCCompiler). Other compiler + # classes (UnixCCompiler, in particular) are driven by information + # discovered at run-time, since there are many different ways to do + # basically the same things with Unix C compilers. + + for key in kwargs: + if key not in self.executables: + raise ValueError( + "unknown executable '%s' for class %s" + % (key, self.__class__.__name__) + ) + self.set_executable(key, kwargs[key]) + + def set_executable(self, key, value): + if isinstance(value, str): + setattr(self, key, split_quoted(value)) + else: + setattr(self, key, value) + + def _find_macro(self, name): + i = 0 + for defn in self.macros: + if defn[0] == name: + return i + i += 1 + return None + + def _check_macro_definitions(self, definitions): + """Ensures that every element of 'definitions' is a valid macro + definition, ie. either (name,value) 2-tuple or a (name,) tuple. Do + nothing if all definitions are OK, raise TypeError otherwise. + """ + for defn in definitions: + if not ( + isinstance(defn, tuple) + and ( + len(defn) in (1, 2) + and (isinstance(defn[1], str) or defn[1] is None) + ) + and isinstance(defn[0], str) + ): + raise TypeError( + ("invalid macro definition '%s': " % defn) + + "must be tuple (string,), (string, string), or " + + "(string, None)" + ) + + # -- Bookkeeping methods ------------------------------------------- + + def define_macro(self, name, value=None): + """Define a preprocessor macro for all compilations driven by this + compiler object. The optional parameter 'value' should be a + string; if it is not supplied, then the macro will be defined + without an explicit value and the exact outcome depends on the + compiler used (XXX true? does ANSI say anything about this?) + """ + # Delete from the list of macro definitions/undefinitions if + # already there (so that this one will take precedence). + i = self._find_macro(name) + if i is not None: + del self.macros[i] + + self.macros.append((name, value)) + + def undefine_macro(self, name): + """Undefine a preprocessor macro for all compilations driven by + this compiler object. If the same macro is defined by + 'define_macro()' and undefined by 'undefine_macro()' the last call + takes precedence (including multiple redefinitions or + undefinitions). If the macro is redefined/undefined on a + per-compilation basis (ie. in the call to 'compile()'), then that + takes precedence. + """ + # Delete from the list of macro definitions/undefinitions if + # already there (so that this one will take precedence). + i = self._find_macro(name) + if i is not None: + del self.macros[i] + + undefn = (name,) + self.macros.append(undefn) + + def add_include_dir(self, dir): + """Add 'dir' to the list of directories that will be searched for + header files. The compiler is instructed to search directories in + the order in which they are supplied by successive calls to + 'add_include_dir()'. + """ + self.include_dirs.append(dir) + + def set_include_dirs(self, dirs): + """Set the list of directories that will be searched to 'dirs' (a + list of strings). Overrides any preceding calls to + 'add_include_dir()'; subsequence calls to 'add_include_dir()' add + to the list passed to 'set_include_dirs()'. This does not affect + any list of standard include directories that the compiler may + search by default. + """ + self.include_dirs = dirs[:] + + def add_library(self, libname): + """Add 'libname' to the list of libraries that will be included in + all links driven by this compiler object. Note that 'libname' + should *not* be the name of a file containing a library, but the + name of the library itself: the actual filename will be inferred by + the linker, the compiler, or the compiler class (depending on the + platform). + + The linker will be instructed to link against libraries in the + order they were supplied to 'add_library()' and/or + 'set_libraries()'. It is perfectly valid to duplicate library + names; the linker will be instructed to link against libraries as + many times as they are mentioned. + """ + self.libraries.append(libname) + + def set_libraries(self, libnames): + """Set the list of libraries to be included in all links driven by + this compiler object to 'libnames' (a list of strings). This does + not affect any standard system libraries that the linker may + include by default. + """ + self.libraries = libnames[:] + + def add_library_dir(self, dir): + """Add 'dir' to the list of directories that will be searched for + libraries specified to 'add_library()' and 'set_libraries()'. The + linker will be instructed to search for libraries in the order they + are supplied to 'add_library_dir()' and/or 'set_library_dirs()'. + """ + self.library_dirs.append(dir) + + def set_library_dirs(self, dirs): + """Set the list of library search directories to 'dirs' (a list of + strings). This does not affect any standard library search path + that the linker may search by default. + """ + self.library_dirs = dirs[:] + + def add_runtime_library_dir(self, dir): + """Add 'dir' to the list of directories that will be searched for + shared libraries at runtime. + """ + self.runtime_library_dirs.append(dir) + + def set_runtime_library_dirs(self, dirs): + """Set the list of directories to search for shared libraries at + runtime to 'dirs' (a list of strings). This does not affect any + standard search path that the runtime linker may search by + default. + """ + self.runtime_library_dirs = dirs[:] + + def add_link_object(self, object): + """Add 'object' to the list of object files (or analogues, such as + explicitly named library files or the output of "resource + compilers") to be included in every link driven by this compiler + object. + """ + self.objects.append(object) + + def set_link_objects(self, objects): + """Set the list of object files (or analogues) to be included in + every link to 'objects'. This does not affect any standard object + files that the linker may include by default (such as system + libraries). + """ + self.objects = objects[:] + + # -- Private utility methods -------------------------------------- + # (here for the convenience of subclasses) + + # Helper method to prep compiler in subclass compile() methods + + def _setup_compile(self, outdir, macros, incdirs, sources, depends, extra): + """Process arguments and decide which source files to compile.""" + outdir, macros, incdirs = self._fix_compile_args(outdir, macros, incdirs) + + if extra is None: + extra = [] + + # Get the list of expected output (object) files + objects = self.object_filenames(sources, strip_dir=0, output_dir=outdir) + assert len(objects) == len(sources) + + pp_opts = gen_preprocess_options(macros, incdirs) + + build = {} + for i in range(len(sources)): + src = sources[i] + obj = objects[i] + ext = os.path.splitext(src)[1] + self.mkpath(os.path.dirname(obj)) + build[obj] = (src, ext) + + return macros, objects, extra, pp_opts, build + + def _get_cc_args(self, pp_opts, debug, before): + # works for unixccompiler, cygwinccompiler + cc_args = pp_opts + ['-c'] + if debug: + cc_args[:0] = ['-g'] + if before: + cc_args[:0] = before + return cc_args + + def _fix_compile_args(self, output_dir, macros, include_dirs): + """Typecheck and fix-up some of the arguments to the 'compile()' + method, and return fixed-up values. Specifically: if 'output_dir' + is None, replaces it with 'self.output_dir'; ensures that 'macros' + is a list, and augments it with 'self.macros'; ensures that + 'include_dirs' is a list, and augments it with 'self.include_dirs'. + Guarantees that the returned values are of the correct type, + i.e. for 'output_dir' either string or None, and for 'macros' and + 'include_dirs' either list or None. + """ + if output_dir is None: + output_dir = self.output_dir + elif not isinstance(output_dir, str): + raise TypeError("'output_dir' must be a string or None") + + if macros is None: + macros = self.macros + elif isinstance(macros, list): + macros = macros + (self.macros or []) + else: + raise TypeError("'macros' (if supplied) must be a list of tuples") + + if include_dirs is None: + include_dirs = self.include_dirs + elif isinstance(include_dirs, (list, tuple)): + include_dirs = list(include_dirs) + (self.include_dirs or []) + else: + raise TypeError("'include_dirs' (if supplied) must be a list of strings") + + # add include dirs for class + include_dirs += self.__class__.include_dirs + + return output_dir, macros, include_dirs + + def _prep_compile(self, sources, output_dir, depends=None): + """Decide which source files must be recompiled. + + Determine the list of object files corresponding to 'sources', + and figure out which ones really need to be recompiled. + Return a list of all object files and a dictionary telling + which source files can be skipped. + """ + # Get the list of expected output (object) files + objects = self.object_filenames(sources, output_dir=output_dir) + assert len(objects) == len(sources) + + # Return an empty dict for the "which source files can be skipped" + # return value to preserve API compatibility. + return objects, {} + + def _fix_object_args(self, objects, output_dir): + """Typecheck and fix up some arguments supplied to various methods. + Specifically: ensure that 'objects' is a list; if output_dir is + None, replace with self.output_dir. Return fixed versions of + 'objects' and 'output_dir'. + """ + if not isinstance(objects, (list, tuple)): + raise TypeError("'objects' must be a list or tuple of strings") + objects = list(objects) + + if output_dir is None: + output_dir = self.output_dir + elif not isinstance(output_dir, str): + raise TypeError("'output_dir' must be a string or None") + + return (objects, output_dir) + + def _fix_lib_args(self, libraries, library_dirs, runtime_library_dirs): + """Typecheck and fix up some of the arguments supplied to the + 'link_*' methods. Specifically: ensure that all arguments are + lists, and augment them with their permanent versions + (eg. 'self.libraries' augments 'libraries'). Return a tuple with + fixed versions of all arguments. + """ + if libraries is None: + libraries = self.libraries + elif isinstance(libraries, (list, tuple)): + libraries = list(libraries) + (self.libraries or []) + else: + raise TypeError("'libraries' (if supplied) must be a list of strings") + + if library_dirs is None: + library_dirs = self.library_dirs + elif isinstance(library_dirs, (list, tuple)): + library_dirs = list(library_dirs) + (self.library_dirs or []) + else: + raise TypeError("'library_dirs' (if supplied) must be a list of strings") + + # add library dirs for class + library_dirs += self.__class__.library_dirs + + if runtime_library_dirs is None: + runtime_library_dirs = self.runtime_library_dirs + elif isinstance(runtime_library_dirs, (list, tuple)): + runtime_library_dirs = list(runtime_library_dirs) + ( + self.runtime_library_dirs or [] + ) + else: + raise TypeError( + "'runtime_library_dirs' (if supplied) " "must be a list of strings" + ) + + return (libraries, library_dirs, runtime_library_dirs) + + def _need_link(self, objects, output_file): + """Return true if we need to relink the files listed in 'objects' + to recreate 'output_file'. + """ + if self.force: + return True + else: + if self.dry_run: + newer = newer_group(objects, output_file, missing='newer') + else: + newer = newer_group(objects, output_file) + return newer + + def detect_language(self, sources): + """Detect the language of a given file, or list of files. Uses + language_map, and language_order to do the job. + """ + if not isinstance(sources, list): + sources = [sources] + lang = None + index = len(self.language_order) + for source in sources: + base, ext = os.path.splitext(source) + extlang = self.language_map.get(ext) + try: + extindex = self.language_order.index(extlang) + if extindex < index: + lang = extlang + index = extindex + except ValueError: + pass + return lang + + # -- Worker methods ------------------------------------------------ + # (must be implemented by subclasses) + + def preprocess( + self, + source, + output_file=None, + macros=None, + include_dirs=None, + extra_preargs=None, + extra_postargs=None, + ): + """Preprocess a single C/C++ source file, named in 'source'. + Output will be written to file named 'output_file', or stdout if + 'output_file' not supplied. 'macros' is a list of macro + definitions as for 'compile()', which will augment the macros set + with 'define_macro()' and 'undefine_macro()'. 'include_dirs' is a + list of directory names that will be added to the default list. + + Raises PreprocessError on failure. + """ + pass + + def compile( + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None, + ): + """Compile one or more source files. + + 'sources' must be a list of filenames, most likely C/C++ + files, but in reality anything that can be handled by a + particular compiler and compiler class (eg. MSVCCompiler can + handle resource files in 'sources'). Return a list of object + filenames, one per source filename in 'sources'. Depending on + the implementation, not all source files will necessarily be + compiled, but all corresponding object filenames will be + returned. + + If 'output_dir' is given, object files will be put under it, while + retaining their original path component. That is, "foo/bar.c" + normally compiles to "foo/bar.o" (for a Unix implementation); if + 'output_dir' is "build", then it would compile to + "build/foo/bar.o". + + 'macros', if given, must be a list of macro definitions. A macro + definition is either a (name, value) 2-tuple or a (name,) 1-tuple. + The former defines a macro; if the value is None, the macro is + defined without an explicit value. The 1-tuple case undefines a + macro. Later definitions/redefinitions/ undefinitions take + precedence. + + 'include_dirs', if given, must be a list of strings, the + directories to add to the default include file search path for this + compilation only. + + 'debug' is a boolean; if true, the compiler will be instructed to + output debug symbols in (or alongside) the object file(s). + + 'extra_preargs' and 'extra_postargs' are implementation- dependent. + On platforms that have the notion of a command-line (e.g. Unix, + DOS/Windows), they are most likely lists of strings: extra + command-line arguments to prepend/append to the compiler command + line. On other platforms, consult the implementation class + documentation. In any event, they are intended as an escape hatch + for those occasions when the abstract compiler framework doesn't + cut the mustard. + + 'depends', if given, is a list of filenames that all targets + depend on. If a source file is older than any file in + depends, then the source file will be recompiled. This + supports dependency tracking, but only at a coarse + granularity. + + Raises CompileError on failure. + """ + # A concrete compiler class can either override this method + # entirely or implement _compile(). + macros, objects, extra_postargs, pp_opts, build = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) + + for obj in objects: + try: + src, ext = build[obj] + except KeyError: + continue + self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + + # Return *all* object filenames, not just the ones we just built. + return objects + + def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): + """Compile 'src' to product 'obj'.""" + # A concrete compiler class that does not override compile() + # should implement _compile(). + pass + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=0, target_lang=None + ): + """Link a bunch of stuff together to create a static library file. + The "bunch of stuff" consists of the list of object files supplied + as 'objects', the extra object files supplied to + 'add_link_object()' and/or 'set_link_objects()', the libraries + supplied to 'add_library()' and/or 'set_libraries()', and the + libraries supplied as 'libraries' (if any). + + 'output_libname' should be a library name, not a filename; the + filename will be inferred from the library name. 'output_dir' is + the directory where the library file will be put. + + 'debug' is a boolean; if true, debugging information will be + included in the library (note that on most platforms, it is the + compile step where this matters: the 'debug' flag is included here + just for consistency). + + 'target_lang' is the target language for which the given objects + are being compiled. This allows specific linkage time treatment of + certain languages. + + Raises LibError on failure. + """ + pass + + # values for target_desc parameter in link() + SHARED_OBJECT = "shared_object" + SHARED_LIBRARY = "shared_library" + EXECUTABLE = "executable" + + def link( + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + """Link a bunch of stuff together to create an executable or + shared library file. + + The "bunch of stuff" consists of the list of object files supplied + as 'objects'. 'output_filename' should be a filename. If + 'output_dir' is supplied, 'output_filename' is relative to it + (i.e. 'output_filename' can provide directory components if + needed). + + 'libraries' is a list of libraries to link against. These are + library names, not filenames, since they're translated into + filenames in a platform-specific way (eg. "foo" becomes "libfoo.a" + on Unix and "foo.lib" on DOS/Windows). However, they can include a + directory component, which means the linker will look in that + specific directory rather than searching all the normal locations. + + 'library_dirs', if supplied, should be a list of directories to + search for libraries that were specified as bare library names + (ie. no directory component). These are on top of the system + default and those supplied to 'add_library_dir()' and/or + 'set_library_dirs()'. 'runtime_library_dirs' is a list of + directories that will be embedded into the shared library and used + to search for other shared libraries that *it* depends on at + run-time. (This may only be relevant on Unix.) + + 'export_symbols' is a list of symbols that the shared library will + export. (This appears to be relevant only on Windows.) + + 'debug' is as for 'compile()' and 'create_static_lib()', with the + slight distinction that it actually matters on most platforms (as + opposed to 'create_static_lib()', which includes a 'debug' flag + mostly for form's sake). + + 'extra_preargs' and 'extra_postargs' are as for 'compile()' (except + of course that they supply command-line arguments for the + particular linker being used). + + 'target_lang' is the target language for which the given objects + are being compiled. This allows specific linkage time treatment of + certain languages. + + Raises LinkError on failure. + """ + raise NotImplementedError + + # Old 'link_*()' methods, rewritten to use the new 'link()' method. + + def link_shared_lib( + self, + objects, + output_libname, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + self.link( + CCompiler.SHARED_LIBRARY, + objects, + self.library_filename(output_libname, lib_type='shared'), + output_dir, + libraries, + library_dirs, + runtime_library_dirs, + export_symbols, + debug, + extra_preargs, + extra_postargs, + build_temp, + target_lang, + ) + + def link_shared_object( + self, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + self.link( + CCompiler.SHARED_OBJECT, + objects, + output_filename, + output_dir, + libraries, + library_dirs, + runtime_library_dirs, + export_symbols, + debug, + extra_preargs, + extra_postargs, + build_temp, + target_lang, + ) + + def link_executable( + self, + objects, + output_progname, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + target_lang=None, + ): + self.link( + CCompiler.EXECUTABLE, + objects, + self.executable_filename(output_progname), + output_dir, + libraries, + library_dirs, + runtime_library_dirs, + None, + debug, + extra_preargs, + extra_postargs, + None, + target_lang, + ) + + # -- Miscellaneous methods ----------------------------------------- + # These are all used by the 'gen_lib_options() function; there is + # no appropriate default implementation so subclasses should + # implement all of these. + + def library_dir_option(self, dir): + """Return the compiler option to add 'dir' to the list of + directories searched for libraries. + """ + raise NotImplementedError + + def runtime_library_dir_option(self, dir): + """Return the compiler option to add 'dir' to the list of + directories searched for runtime libraries. + """ + raise NotImplementedError + + def library_option(self, lib): + """Return the compiler option to add 'lib' to the list of libraries + linked into the shared library or executable. + """ + raise NotImplementedError + + def has_function( # noqa: C901 + self, + funcname, + includes=None, + include_dirs=None, + libraries=None, + library_dirs=None, + ): + """Return a boolean indicating whether funcname is supported on + the current platform. The optional arguments can be used to + augment the compilation environment. + """ + # this can't be included at module scope because it tries to + # import math which might not be available at that point - maybe + # the necessary logic should just be inlined? + import tempfile + + if includes is None: + includes = [] + if include_dirs is None: + include_dirs = [] + if libraries is None: + libraries = [] + if library_dirs is None: + library_dirs = [] + fd, fname = tempfile.mkstemp(".c", funcname, text=True) + f = os.fdopen(fd, "w") + try: + for incl in includes: + f.write("""#include "%s"\n""" % incl) + f.write( + """\ +int main (int argc, char **argv) { + %s(); + return 0; +} +""" + % funcname + ) + finally: + f.close() + try: + objects = self.compile([fname], include_dirs=include_dirs) + except CompileError: + return False + finally: + os.remove(fname) + + try: + self.link_executable( + objects, "a.out", libraries=libraries, library_dirs=library_dirs + ) + except (LinkError, TypeError): + return False + else: + os.remove(os.path.join(self.output_dir or '', "a.out")) + finally: + for fn in objects: + os.remove(fn) + return True + + def find_library_file(self, dirs, lib, debug=0): + """Search the specified list of directories for a static or shared + library file 'lib' and return the full path to that file. If + 'debug' true, look for a debugging version (if that makes sense on + the current platform). Return None if 'lib' wasn't found in any of + the specified directories. + """ + raise NotImplementedError + + # -- Filename generation methods ----------------------------------- + + # The default implementation of the filename generating methods are + # prejudiced towards the Unix/DOS/Windows view of the world: + # * object files are named by replacing the source file extension + # (eg. .c/.cpp -> .o/.obj) + # * library files (shared or static) are named by plugging the + # library name and extension into a format string, eg. + # "lib%s.%s" % (lib_name, ".a") for Unix static libraries + # * executables are named by appending an extension (possibly + # empty) to the program name: eg. progname + ".exe" for + # Windows + # + # To reduce redundant code, these methods expect to find + # several attributes in the current object (presumably defined + # as class attributes): + # * src_extensions - + # list of C/C++ source file extensions, eg. ['.c', '.cpp'] + # * obj_extension - + # object file extension, eg. '.o' or '.obj' + # * static_lib_extension - + # extension for static library files, eg. '.a' or '.lib' + # * shared_lib_extension - + # extension for shared library/object files, eg. '.so', '.dll' + # * static_lib_format - + # format string for generating static library filenames, + # eg. 'lib%s.%s' or '%s.%s' + # * shared_lib_format + # format string for generating shared library filenames + # (probably same as static_lib_format, since the extension + # is one of the intended parameters to the format string) + # * exe_extension - + # extension for executable files, eg. '' or '.exe' + + def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): + if output_dir is None: + output_dir = '' + return list( + self._make_out_path(output_dir, strip_dir, src_name) + for src_name in source_filenames + ) + + @property + def out_extensions(self): + return dict.fromkeys(self.src_extensions, self.obj_extension) + + def _make_out_path(self, output_dir, strip_dir, src_name): + base, ext = os.path.splitext(src_name) + base = self._make_relative(base) + try: + new_ext = self.out_extensions[ext] + except LookupError: + raise UnknownFileError( + "unknown file type '{}' (from '{}')".format(ext, src_name) + ) + if strip_dir: + base = os.path.basename(base) + return os.path.join(output_dir, base + new_ext) + + @staticmethod + def _make_relative(base): + """ + In order to ensure that a filename always honors the + indicated output_dir, make sure it's relative. + Ref python/cpython#37775. + """ + # Chop off the drive + no_drive = os.path.splitdrive(base)[1] + # If abs, chop off leading / + return no_drive[os.path.isabs(no_drive) :] + + def shared_object_filename(self, basename, strip_dir=0, output_dir=''): + assert output_dir is not None + if strip_dir: + basename = os.path.basename(basename) + return os.path.join(output_dir, basename + self.shared_lib_extension) + + def executable_filename(self, basename, strip_dir=0, output_dir=''): + assert output_dir is not None + if strip_dir: + basename = os.path.basename(basename) + return os.path.join(output_dir, basename + (self.exe_extension or '')) + + def library_filename( + self, libname, lib_type='static', strip_dir=0, output_dir='' # or 'shared' + ): + assert output_dir is not None + expected = '"static", "shared", "dylib", "xcode_stub"' + if lib_type not in eval(expected): + raise ValueError(f"'lib_type' must be {expected}") + fmt = getattr(self, lib_type + "_lib_format") + ext = getattr(self, lib_type + "_lib_extension") + + dir, base = os.path.split(libname) + filename = fmt % (base, ext) + if strip_dir: + dir = '' + + return os.path.join(output_dir, dir, filename) + + # -- Utility methods ----------------------------------------------- + + def announce(self, msg, level=1): + log.debug(msg) + + def debug_print(self, msg): + from distutils.debug import DEBUG + + if DEBUG: + print(msg) + + def warn(self, msg): + sys.stderr.write("warning: %s\n" % msg) + + def execute(self, func, args, msg=None, level=1): + execute(func, args, msg, self.dry_run) + + def spawn(self, cmd, **kwargs): + spawn(cmd, dry_run=self.dry_run, **kwargs) + + def move_file(self, src, dst): + return move_file(src, dst, dry_run=self.dry_run) + + def mkpath(self, name, mode=0o777): + mkpath(name, mode, dry_run=self.dry_run) + + +# Map a sys.platform/os.name ('posix', 'nt') to the default compiler +# type for that platform. Keys are interpreted as re match +# patterns. Order is important; platform mappings are preferred over +# OS names. +_default_compilers = ( + # Platform string mappings + # on a cygwin built python we can use gcc like an ordinary UNIXish + # compiler + ('cygwin.*', 'unix'), + # OS name mappings + ('posix', 'unix'), + ('nt', 'msvc'), +) + + +def get_default_compiler(osname=None, platform=None): + """Determine the default compiler to use for the given platform. + + osname should be one of the standard Python OS names (i.e. the + ones returned by os.name) and platform the common value + returned by sys.platform for the platform in question. + + The default values are os.name and sys.platform in case the + parameters are not given. + """ + if osname is None: + osname = os.name + if platform is None: + platform = sys.platform + for pattern, compiler in _default_compilers: + if ( + re.match(pattern, platform) is not None + or re.match(pattern, osname) is not None + ): + return compiler + # Default to Unix compiler + return 'unix' + + +# Map compiler types to (module_name, class_name) pairs -- ie. where to +# find the code that implements an interface to this compiler. (The module +# is assumed to be in the 'distutils' package.) +compiler_class = { + 'unix': ('unixccompiler', 'UnixCCompiler', "standard UNIX-style compiler"), + 'msvc': ('_msvccompiler', 'MSVCCompiler', "Microsoft Visual C++"), + 'cygwin': ( + 'cygwinccompiler', + 'CygwinCCompiler', + "Cygwin port of GNU C Compiler for Win32", + ), + 'mingw32': ( + 'cygwinccompiler', + 'Mingw32CCompiler', + "Mingw32 port of GNU C Compiler for Win32", + ), + 'bcpp': ('bcppcompiler', 'BCPPCompiler', "Borland C++ Compiler"), +} + + +def show_compilers(): + """Print list of available compilers (used by the "--help-compiler" + options to "build", "build_ext", "build_clib"). + """ + # XXX this "knows" that the compiler option it's describing is + # "--compiler", which just happens to be the case for the three + # commands that use it. + from distutils.fancy_getopt import FancyGetopt + + compilers = [] + for compiler in compiler_class.keys(): + compilers.append(("compiler=" + compiler, None, compiler_class[compiler][2])) + compilers.sort() + pretty_printer = FancyGetopt(compilers) + pretty_printer.print_help("List of available compilers:") + + +def new_compiler(plat=None, compiler=None, verbose=0, dry_run=0, force=0): + """Generate an instance of some CCompiler subclass for the supplied + platform/compiler combination. 'plat' defaults to 'os.name' + (eg. 'posix', 'nt'), and 'compiler' defaults to the default compiler + for that platform. Currently only 'posix' and 'nt' are supported, and + the default compilers are "traditional Unix interface" (UnixCCompiler + class) and Visual C++ (MSVCCompiler class). Note that it's perfectly + possible to ask for a Unix compiler object under Windows, and a + Microsoft compiler object under Unix -- if you supply a value for + 'compiler', 'plat' is ignored. + """ + if plat is None: + plat = os.name + + try: + if compiler is None: + compiler = get_default_compiler(plat) + + (module_name, class_name, long_description) = compiler_class[compiler] + except KeyError: + msg = "don't know how to compile C/C++ code on platform '%s'" % plat + if compiler is not None: + msg = msg + " with '%s' compiler" % compiler + raise DistutilsPlatformError(msg) + + try: + module_name = "distutils." + module_name + __import__(module_name) + module = sys.modules[module_name] + klass = vars(module)[class_name] + except ImportError: + raise DistutilsModuleError( + "can't compile C/C++ code: unable to load module '%s'" % module_name + ) + except KeyError: + raise DistutilsModuleError( + "can't compile C/C++ code: unable to find class '%s' " + "in module '%s'" % (class_name, module_name) + ) + + # XXX The None is necessary to preserve backwards compatibility + # with classes that expect verbose to be the first positional + # argument. + return klass(None, dry_run, force) + + +def gen_preprocess_options(macros, include_dirs): + """Generate C pre-processor options (-D, -U, -I) as used by at least + two types of compilers: the typical Unix compiler and Visual C++. + 'macros' is the usual thing, a list of 1- or 2-tuples, where (name,) + means undefine (-U) macro 'name', and (name,value) means define (-D) + macro 'name' to 'value'. 'include_dirs' is just a list of directory + names to be added to the header file search path (-I). Returns a list + of command-line options suitable for either Unix compilers or Visual + C++. + """ + # XXX it would be nice (mainly aesthetic, and so we don't generate + # stupid-looking command lines) to go over 'macros' and eliminate + # redundant definitions/undefinitions (ie. ensure that only the + # latest mention of a particular macro winds up on the command + # line). I don't think it's essential, though, since most (all?) + # Unix C compilers only pay attention to the latest -D or -U + # mention of a macro on their command line. Similar situation for + # 'include_dirs'. I'm punting on both for now. Anyways, weeding out + # redundancies like this should probably be the province of + # CCompiler, since the data structures used are inherited from it + # and therefore common to all CCompiler classes. + pp_opts = [] + for macro in macros: + if not (isinstance(macro, tuple) and 1 <= len(macro) <= 2): + raise TypeError( + "bad macro definition '%s': " + "each element of 'macros' list must be a 1- or 2-tuple" % macro + ) + + if len(macro) == 1: # undefine this macro + pp_opts.append("-U%s" % macro[0]) + elif len(macro) == 2: + if macro[1] is None: # define with no explicit value + pp_opts.append("-D%s" % macro[0]) + else: + # XXX *don't* need to be clever about quoting the + # macro value here, because we're going to avoid the + # shell at all costs when we spawn the command! + pp_opts.append("-D%s=%s" % macro) + + for dir in include_dirs: + pp_opts.append("-I%s" % dir) + return pp_opts + + +def gen_lib_options(compiler, library_dirs, runtime_library_dirs, libraries): + """Generate linker options for searching library directories and + linking with specific libraries. 'libraries' and 'library_dirs' are, + respectively, lists of library names (not filenames!) and search + directories. Returns a list of command-line options suitable for use + with some compiler (depending on the two format strings passed in). + """ + lib_opts = [] + + for dir in library_dirs: + lib_opts.append(compiler.library_dir_option(dir)) + + for dir in runtime_library_dirs: + opt = compiler.runtime_library_dir_option(dir) + if isinstance(opt, list): + lib_opts = lib_opts + opt + else: + lib_opts.append(opt) + + # XXX it's important that we *not* remove redundant library mentions! + # sometimes you really do have to say "-lfoo -lbar -lfoo" in order to + # resolve all symbols. I just hope we never have to say "-lfoo obj.o + # -lbar" to get things to work -- that's certainly a possibility, but a + # pretty nasty way to arrange your C code. + + for lib in libraries: + (lib_dir, lib_name) = os.path.split(lib) + if lib_dir: + lib_file = compiler.find_library_file([lib_dir], lib_name) + if lib_file: + lib_opts.append(lib_file) + else: + compiler.warn( + "no library file corresponding to " "'%s' found (skipping)" % lib + ) + else: + lib_opts.append(compiler.library_option(lib)) + return lib_opts diff --git a/libs/common/setuptools/_distutils/cmd.py b/libs/common/setuptools/_distutils/cmd.py new file mode 100644 index 00000000..918db853 --- /dev/null +++ b/libs/common/setuptools/_distutils/cmd.py @@ -0,0 +1,435 @@ +"""distutils.cmd + +Provides the Command class, the base class for the command classes +in the distutils.command package. +""" + +import sys +import os +import re +import logging + +from .errors import DistutilsOptionError +from . import util, dir_util, file_util, archive_util, dep_util +from ._log import log + + +class Command: + """Abstract base class for defining command classes, the "worker bees" + of the Distutils. A useful analogy for command classes is to think of + them as subroutines with local variables called "options". The options + are "declared" in 'initialize_options()' and "defined" (given their + final values, aka "finalized") in 'finalize_options()', both of which + must be defined by every command class. The distinction between the + two is necessary because option values might come from the outside + world (command line, config file, ...), and any options dependent on + other options must be computed *after* these outside influences have + been processed -- hence 'finalize_options()'. The "body" of the + subroutine, where it does all its work based on the values of its + options, is the 'run()' method, which must also be implemented by every + command class. + """ + + # 'sub_commands' formalizes the notion of a "family" of commands, + # eg. "install" as the parent with sub-commands "install_lib", + # "install_headers", etc. The parent of a family of commands + # defines 'sub_commands' as a class attribute; it's a list of + # (command_name : string, predicate : unbound_method | string | None) + # tuples, where 'predicate' is a method of the parent command that + # determines whether the corresponding command is applicable in the + # current situation. (Eg. we "install_headers" is only applicable if + # we have any C header files to install.) If 'predicate' is None, + # that command is always applicable. + # + # 'sub_commands' is usually defined at the *end* of a class, because + # predicates can be unbound methods, so they must already have been + # defined. The canonical example is the "install" command. + sub_commands = [] + + # -- Creation/initialization methods ------------------------------- + + def __init__(self, dist): + """Create and initialize a new Command object. Most importantly, + invokes the 'initialize_options()' method, which is the real + initializer and depends on the actual command being + instantiated. + """ + # late import because of mutual dependence between these classes + from distutils.dist import Distribution + + if not isinstance(dist, Distribution): + raise TypeError("dist must be a Distribution instance") + if self.__class__ is Command: + raise RuntimeError("Command is an abstract class") + + self.distribution = dist + self.initialize_options() + + # Per-command versions of the global flags, so that the user can + # customize Distutils' behaviour command-by-command and let some + # commands fall back on the Distribution's behaviour. None means + # "not defined, check self.distribution's copy", while 0 or 1 mean + # false and true (duh). Note that this means figuring out the real + # value of each flag is a touch complicated -- hence "self._dry_run" + # will be handled by __getattr__, below. + # XXX This needs to be fixed. + self._dry_run = None + + # verbose is largely ignored, but needs to be set for + # backwards compatibility (I think)? + self.verbose = dist.verbose + + # Some commands define a 'self.force' option to ignore file + # timestamps, but methods defined *here* assume that + # 'self.force' exists for all commands. So define it here + # just to be safe. + self.force = None + + # The 'help' flag is just used for command-line parsing, so + # none of that complicated bureaucracy is needed. + self.help = 0 + + # 'finalized' records whether or not 'finalize_options()' has been + # called. 'finalize_options()' itself should not pay attention to + # this flag: it is the business of 'ensure_finalized()', which + # always calls 'finalize_options()', to respect/update it. + self.finalized = 0 + + # XXX A more explicit way to customize dry_run would be better. + def __getattr__(self, attr): + if attr == 'dry_run': + myval = getattr(self, "_" + attr) + if myval is None: + return getattr(self.distribution, attr) + else: + return myval + else: + raise AttributeError(attr) + + def ensure_finalized(self): + if not self.finalized: + self.finalize_options() + self.finalized = 1 + + # Subclasses must define: + # initialize_options() + # provide default values for all options; may be customized by + # setup script, by options from config file(s), or by command-line + # options + # finalize_options() + # decide on the final values for all options; this is called + # after all possible intervention from the outside world + # (command-line, option file, etc.) has been processed + # run() + # run the command: do whatever it is we're here to do, + # controlled by the command's various option values + + def initialize_options(self): + """Set default values for all the options that this command + supports. Note that these defaults may be overridden by other + commands, by the setup script, by config files, or by the + command-line. Thus, this is not the place to code dependencies + between options; generally, 'initialize_options()' implementations + are just a bunch of "self.foo = None" assignments. + + This method must be implemented by all command classes. + """ + raise RuntimeError( + "abstract method -- subclass %s must override" % self.__class__ + ) + + def finalize_options(self): + """Set final values for all the options that this command supports. + This is always called as late as possible, ie. after any option + assignments from the command-line or from other commands have been + done. Thus, this is the place to code option dependencies: if + 'foo' depends on 'bar', then it is safe to set 'foo' from 'bar' as + long as 'foo' still has the same value it was assigned in + 'initialize_options()'. + + This method must be implemented by all command classes. + """ + raise RuntimeError( + "abstract method -- subclass %s must override" % self.__class__ + ) + + def dump_options(self, header=None, indent=""): + from distutils.fancy_getopt import longopt_xlate + + if header is None: + header = "command options for '%s':" % self.get_command_name() + self.announce(indent + header, level=logging.INFO) + indent = indent + " " + for (option, _, _) in self.user_options: + option = option.translate(longopt_xlate) + if option[-1] == "=": + option = option[:-1] + value = getattr(self, option) + self.announce(indent + "{} = {}".format(option, value), level=logging.INFO) + + def run(self): + """A command's raison d'etre: carry out the action it exists to + perform, controlled by the options initialized in + 'initialize_options()', customized by other commands, the setup + script, the command-line, and config files, and finalized in + 'finalize_options()'. All terminal output and filesystem + interaction should be done by 'run()'. + + This method must be implemented by all command classes. + """ + raise RuntimeError( + "abstract method -- subclass %s must override" % self.__class__ + ) + + def announce(self, msg, level=logging.DEBUG): + log.log(level, msg) + + def debug_print(self, msg): + """Print 'msg' to stdout if the global DEBUG (taken from the + DISTUTILS_DEBUG environment variable) flag is true. + """ + from distutils.debug import DEBUG + + if DEBUG: + print(msg) + sys.stdout.flush() + + # -- Option validation methods ------------------------------------- + # (these are very handy in writing the 'finalize_options()' method) + # + # NB. the general philosophy here is to ensure that a particular option + # value meets certain type and value constraints. If not, we try to + # force it into conformance (eg. if we expect a list but have a string, + # split the string on comma and/or whitespace). If we can't force the + # option into conformance, raise DistutilsOptionError. Thus, command + # classes need do nothing more than (eg.) + # self.ensure_string_list('foo') + # and they can be guaranteed that thereafter, self.foo will be + # a list of strings. + + def _ensure_stringlike(self, option, what, default=None): + val = getattr(self, option) + if val is None: + setattr(self, option, default) + return default + elif not isinstance(val, str): + raise DistutilsOptionError( + "'{}' must be a {} (got `{}`)".format(option, what, val) + ) + return val + + def ensure_string(self, option, default=None): + """Ensure that 'option' is a string; if not defined, set it to + 'default'. + """ + self._ensure_stringlike(option, "string", default) + + def ensure_string_list(self, option): + r"""Ensure that 'option' is a list of strings. If 'option' is + currently a string, we split it either on /,\s*/ or /\s+/, so + "foo bar baz", "foo,bar,baz", and "foo, bar baz" all become + ["foo", "bar", "baz"]. + """ + val = getattr(self, option) + if val is None: + return + elif isinstance(val, str): + setattr(self, option, re.split(r',\s*|\s+', val)) + else: + if isinstance(val, list): + ok = all(isinstance(v, str) for v in val) + else: + ok = False + if not ok: + raise DistutilsOptionError( + "'{}' must be a list of strings (got {!r})".format(option, val) + ) + + def _ensure_tested_string(self, option, tester, what, error_fmt, default=None): + val = self._ensure_stringlike(option, what, default) + if val is not None and not tester(val): + raise DistutilsOptionError( + ("error in '%s' option: " + error_fmt) % (option, val) + ) + + def ensure_filename(self, option): + """Ensure that 'option' is the name of an existing file.""" + self._ensure_tested_string( + option, os.path.isfile, "filename", "'%s' does not exist or is not a file" + ) + + def ensure_dirname(self, option): + self._ensure_tested_string( + option, + os.path.isdir, + "directory name", + "'%s' does not exist or is not a directory", + ) + + # -- Convenience methods for commands ------------------------------ + + def get_command_name(self): + if hasattr(self, 'command_name'): + return self.command_name + else: + return self.__class__.__name__ + + def set_undefined_options(self, src_cmd, *option_pairs): + """Set the values of any "undefined" options from corresponding + option values in some other command object. "Undefined" here means + "is None", which is the convention used to indicate that an option + has not been changed between 'initialize_options()' and + 'finalize_options()'. Usually called from 'finalize_options()' for + options that depend on some other command rather than another + option of the same command. 'src_cmd' is the other command from + which option values will be taken (a command object will be created + for it if necessary); the remaining arguments are + '(src_option,dst_option)' tuples which mean "take the value of + 'src_option' in the 'src_cmd' command object, and copy it to + 'dst_option' in the current command object". + """ + # Option_pairs: list of (src_option, dst_option) tuples + src_cmd_obj = self.distribution.get_command_obj(src_cmd) + src_cmd_obj.ensure_finalized() + for (src_option, dst_option) in option_pairs: + if getattr(self, dst_option) is None: + setattr(self, dst_option, getattr(src_cmd_obj, src_option)) + + def get_finalized_command(self, command, create=1): + """Wrapper around Distribution's 'get_command_obj()' method: find + (create if necessary and 'create' is true) the command object for + 'command', call its 'ensure_finalized()' method, and return the + finalized command object. + """ + cmd_obj = self.distribution.get_command_obj(command, create) + cmd_obj.ensure_finalized() + return cmd_obj + + # XXX rename to 'get_reinitialized_command()'? (should do the + # same in dist.py, if so) + def reinitialize_command(self, command, reinit_subcommands=0): + return self.distribution.reinitialize_command(command, reinit_subcommands) + + def run_command(self, command): + """Run some other command: uses the 'run_command()' method of + Distribution, which creates and finalizes the command object if + necessary and then invokes its 'run()' method. + """ + self.distribution.run_command(command) + + def get_sub_commands(self): + """Determine the sub-commands that are relevant in the current + distribution (ie., that need to be run). This is based on the + 'sub_commands' class attribute: each tuple in that list may include + a method that we call to determine if the subcommand needs to be + run for the current distribution. Return a list of command names. + """ + commands = [] + for (cmd_name, method) in self.sub_commands: + if method is None or method(self): + commands.append(cmd_name) + return commands + + # -- External world manipulation ----------------------------------- + + def warn(self, msg): + log.warning("warning: %s: %s\n", self.get_command_name(), msg) + + def execute(self, func, args, msg=None, level=1): + util.execute(func, args, msg, dry_run=self.dry_run) + + def mkpath(self, name, mode=0o777): + dir_util.mkpath(name, mode, dry_run=self.dry_run) + + def copy_file( + self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1 + ): + """Copy a file respecting verbose, dry-run and force flags. (The + former two default to whatever is in the Distribution object, and + the latter defaults to false for commands that don't define it.)""" + return file_util.copy_file( + infile, + outfile, + preserve_mode, + preserve_times, + not self.force, + link, + dry_run=self.dry_run, + ) + + def copy_tree( + self, + infile, + outfile, + preserve_mode=1, + preserve_times=1, + preserve_symlinks=0, + level=1, + ): + """Copy an entire directory tree respecting verbose, dry-run, + and force flags. + """ + return dir_util.copy_tree( + infile, + outfile, + preserve_mode, + preserve_times, + preserve_symlinks, + not self.force, + dry_run=self.dry_run, + ) + + def move_file(self, src, dst, level=1): + """Move a file respecting dry-run flag.""" + return file_util.move_file(src, dst, dry_run=self.dry_run) + + def spawn(self, cmd, search_path=1, level=1): + """Spawn an external command respecting dry-run flag.""" + from distutils.spawn import spawn + + spawn(cmd, search_path, dry_run=self.dry_run) + + def make_archive( + self, base_name, format, root_dir=None, base_dir=None, owner=None, group=None + ): + return archive_util.make_archive( + base_name, + format, + root_dir, + base_dir, + dry_run=self.dry_run, + owner=owner, + group=group, + ) + + def make_file( + self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1 + ): + """Special case of 'execute()' for operations that process one or + more input files and generate one output file. Works just like + 'execute()', except the operation is skipped and a different + message printed if 'outfile' already exists and is newer than all + files listed in 'infiles'. If the command defined 'self.force', + and it is true, then the command is unconditionally run -- does no + timestamp checks. + """ + if skip_msg is None: + skip_msg = "skipping %s (inputs unchanged)" % outfile + + # Allow 'infiles' to be a single string + if isinstance(infiles, str): + infiles = (infiles,) + elif not isinstance(infiles, (list, tuple)): + raise TypeError("'infiles' must be a string, or a list or tuple of strings") + + if exec_msg is None: + exec_msg = "generating {} from {}".format(outfile, ', '.join(infiles)) + + # If 'outfile' must be regenerated (either because it doesn't + # exist, is out-of-date, or the 'force' flag is true) then + # perform the action that presumably regenerates it + if self.force or dep_util.newer_group(infiles, outfile): + self.execute(func, args, exec_msg, level) + # Otherwise, print the "skip" message + else: + log.debug(skip_msg) diff --git a/libs/common/setuptools/_distutils/command/__init__.py b/libs/common/setuptools/_distutils/command/__init__.py new file mode 100644 index 00000000..028dcfa0 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/__init__.py @@ -0,0 +1,25 @@ +"""distutils.command + +Package containing implementation of all the standard Distutils +commands.""" + +__all__ = [ # noqa: F822 + 'build', + 'build_py', + 'build_ext', + 'build_clib', + 'build_scripts', + 'clean', + 'install', + 'install_lib', + 'install_headers', + 'install_scripts', + 'install_data', + 'sdist', + 'register', + 'bdist', + 'bdist_dumb', + 'bdist_rpm', + 'check', + 'upload', +] diff --git a/libs/common/setuptools/_distutils/command/_framework_compat.py b/libs/common/setuptools/_distutils/command/_framework_compat.py new file mode 100644 index 00000000..cffa27cb --- /dev/null +++ b/libs/common/setuptools/_distutils/command/_framework_compat.py @@ -0,0 +1,55 @@ +""" +Backward compatibility for homebrew builds on macOS. +""" + + +import sys +import os +import functools +import subprocess +import sysconfig + + +@functools.lru_cache() +def enabled(): + """ + Only enabled for Python 3.9 framework homebrew builds + except ensurepip and venv. + """ + PY39 = (3, 9) < sys.version_info < (3, 10) + framework = sys.platform == 'darwin' and sys._framework + homebrew = "Cellar" in sysconfig.get_config_var('projectbase') + venv = sys.prefix != sys.base_prefix + ensurepip = os.environ.get("ENSUREPIP_OPTIONS") + return PY39 and framework and homebrew and not venv and not ensurepip + + +schemes = dict( + osx_framework_library=dict( + stdlib='{installed_base}/{platlibdir}/python{py_version_short}', + platstdlib='{platbase}/{platlibdir}/python{py_version_short}', + purelib='{homebrew_prefix}/lib/python{py_version_short}/site-packages', + platlib='{homebrew_prefix}/{platlibdir}/python{py_version_short}/site-packages', + include='{installed_base}/include/python{py_version_short}{abiflags}', + platinclude='{installed_platbase}/include/python{py_version_short}{abiflags}', + scripts='{homebrew_prefix}/bin', + data='{homebrew_prefix}', + ) +) + + +@functools.lru_cache() +def vars(): + if not enabled(): + return {} + homebrew_prefix = subprocess.check_output(['brew', '--prefix'], text=True).strip() + return locals() + + +def scheme(name): + """ + Override the selected scheme for posix_prefix. + """ + if not enabled() or not name.endswith('_prefix'): + return name + return 'osx_framework_library' diff --git a/libs/common/setuptools/_distutils/command/bdist.py b/libs/common/setuptools/_distutils/command/bdist.py new file mode 100644 index 00000000..bf0baab0 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/bdist.py @@ -0,0 +1,157 @@ +"""distutils.command.bdist + +Implements the Distutils 'bdist' command (create a built [binary] +distribution).""" + +import os +import warnings + +from ..core import Command +from ..errors import DistutilsPlatformError, DistutilsOptionError +from ..util import get_platform + + +def show_formats(): + """Print list of available formats (arguments to "--format" option).""" + from ..fancy_getopt import FancyGetopt + + formats = [] + for format in bdist.format_commands: + formats.append(("formats=" + format, None, bdist.format_commands[format][1])) + pretty_printer = FancyGetopt(formats) + pretty_printer.print_help("List of available distribution formats:") + + +class ListCompat(dict): + # adapter to allow for Setuptools compatibility in format_commands + def append(self, item): + warnings.warn( + """format_commands is now a dict. append is deprecated.""", + DeprecationWarning, + stacklevel=2, + ) + + +class bdist(Command): + + description = "create a built (binary) distribution" + + user_options = [ + ('bdist-base=', 'b', "temporary directory for creating built distributions"), + ( + 'plat-name=', + 'p', + "platform name to embed in generated filenames " + "(default: %s)" % get_platform(), + ), + ('formats=', None, "formats for distribution (comma-separated list)"), + ( + 'dist-dir=', + 'd', + "directory to put final built distributions in " "[default: dist]", + ), + ('skip-build', None, "skip rebuilding everything (for testing/debugging)"), + ( + 'owner=', + 'u', + "Owner name used when creating a tar file" " [default: current user]", + ), + ( + 'group=', + 'g', + "Group name used when creating a tar file" " [default: current group]", + ), + ] + + boolean_options = ['skip-build'] + + help_options = [ + ('help-formats', None, "lists available distribution formats", show_formats), + ] + + # The following commands do not take a format option from bdist + no_format_option = ('bdist_rpm',) + + # This won't do in reality: will need to distinguish RPM-ish Linux, + # Debian-ish Linux, Solaris, FreeBSD, ..., Windows, Mac OS. + default_format = {'posix': 'gztar', 'nt': 'zip'} + + # Define commands in preferred order for the --help-formats option + format_commands = ListCompat( + { + 'rpm': ('bdist_rpm', "RPM distribution"), + 'gztar': ('bdist_dumb', "gzip'ed tar file"), + 'bztar': ('bdist_dumb', "bzip2'ed tar file"), + 'xztar': ('bdist_dumb', "xz'ed tar file"), + 'ztar': ('bdist_dumb', "compressed tar file"), + 'tar': ('bdist_dumb', "tar file"), + 'zip': ('bdist_dumb', "ZIP file"), + } + ) + + # for compatibility until consumers only reference format_commands + format_command = format_commands + + def initialize_options(self): + self.bdist_base = None + self.plat_name = None + self.formats = None + self.dist_dir = None + self.skip_build = 0 + self.group = None + self.owner = None + + def finalize_options(self): + # have to finalize 'plat_name' before 'bdist_base' + if self.plat_name is None: + if self.skip_build: + self.plat_name = get_platform() + else: + self.plat_name = self.get_finalized_command('build').plat_name + + # 'bdist_base' -- parent of per-built-distribution-format + # temporary directories (eg. we'll probably have + # "build/bdist./dumb", "build/bdist./rpm", etc.) + if self.bdist_base is None: + build_base = self.get_finalized_command('build').build_base + self.bdist_base = os.path.join(build_base, 'bdist.' + self.plat_name) + + self.ensure_string_list('formats') + if self.formats is None: + try: + self.formats = [self.default_format[os.name]] + except KeyError: + raise DistutilsPlatformError( + "don't know how to create built distributions " + "on platform %s" % os.name + ) + + if self.dist_dir is None: + self.dist_dir = "dist" + + def run(self): + # Figure out which sub-commands we need to run. + commands = [] + for format in self.formats: + try: + commands.append(self.format_commands[format][0]) + except KeyError: + raise DistutilsOptionError("invalid format '%s'" % format) + + # Reinitialize and run each command. + for i in range(len(self.formats)): + cmd_name = commands[i] + sub_cmd = self.reinitialize_command(cmd_name) + if cmd_name not in self.no_format_option: + sub_cmd.format = self.formats[i] + + # passing the owner and group names for tar archiving + if cmd_name == 'bdist_dumb': + sub_cmd.owner = self.owner + sub_cmd.group = self.group + + # If we're going to need to run this command again, tell it to + # keep its temporary files around so subsequent runs go faster. + if cmd_name in commands[i + 1 :]: + sub_cmd.keep_temp = 1 + self.run_command(cmd_name) diff --git a/libs/common/setuptools/_distutils/command/bdist_dumb.py b/libs/common/setuptools/_distutils/command/bdist_dumb.py new file mode 100644 index 00000000..071da77e --- /dev/null +++ b/libs/common/setuptools/_distutils/command/bdist_dumb.py @@ -0,0 +1,144 @@ +"""distutils.command.bdist_dumb + +Implements the Distutils 'bdist_dumb' command (create a "dumb" built +distribution -- i.e., just an archive to be unpacked under $prefix or +$exec_prefix).""" + +import os +from ..core import Command +from ..util import get_platform +from ..dir_util import remove_tree, ensure_relative +from ..errors import DistutilsPlatformError +from ..sysconfig import get_python_version +from distutils._log import log + + +class bdist_dumb(Command): + + description = "create a \"dumb\" built distribution" + + user_options = [ + ('bdist-dir=', 'd', "temporary directory for creating the distribution"), + ( + 'plat-name=', + 'p', + "platform name to embed in generated filenames " + "(default: %s)" % get_platform(), + ), + ( + 'format=', + 'f', + "archive format to create (tar, gztar, bztar, xztar, " "ztar, zip)", + ), + ( + 'keep-temp', + 'k', + "keep the pseudo-installation tree around after " + + "creating the distribution archive", + ), + ('dist-dir=', 'd', "directory to put final built distributions in"), + ('skip-build', None, "skip rebuilding everything (for testing/debugging)"), + ( + 'relative', + None, + "build the archive using relative paths " "(default: false)", + ), + ( + 'owner=', + 'u', + "Owner name used when creating a tar file" " [default: current user]", + ), + ( + 'group=', + 'g', + "Group name used when creating a tar file" " [default: current group]", + ), + ] + + boolean_options = ['keep-temp', 'skip-build', 'relative'] + + default_format = {'posix': 'gztar', 'nt': 'zip'} + + def initialize_options(self): + self.bdist_dir = None + self.plat_name = None + self.format = None + self.keep_temp = 0 + self.dist_dir = None + self.skip_build = None + self.relative = 0 + self.owner = None + self.group = None + + def finalize_options(self): + if self.bdist_dir is None: + bdist_base = self.get_finalized_command('bdist').bdist_base + self.bdist_dir = os.path.join(bdist_base, 'dumb') + + if self.format is None: + try: + self.format = self.default_format[os.name] + except KeyError: + raise DistutilsPlatformError( + "don't know how to create dumb built distributions " + "on platform %s" % os.name + ) + + self.set_undefined_options( + 'bdist', + ('dist_dir', 'dist_dir'), + ('plat_name', 'plat_name'), + ('skip_build', 'skip_build'), + ) + + def run(self): + if not self.skip_build: + self.run_command('build') + + install = self.reinitialize_command('install', reinit_subcommands=1) + install.root = self.bdist_dir + install.skip_build = self.skip_build + install.warn_dir = 0 + + log.info("installing to %s", self.bdist_dir) + self.run_command('install') + + # And make an archive relative to the root of the + # pseudo-installation tree. + archive_basename = "{}.{}".format( + self.distribution.get_fullname(), self.plat_name + ) + + pseudoinstall_root = os.path.join(self.dist_dir, archive_basename) + if not self.relative: + archive_root = self.bdist_dir + else: + if self.distribution.has_ext_modules() and ( + install.install_base != install.install_platbase + ): + raise DistutilsPlatformError( + "can't make a dumb built distribution where " + "base and platbase are different (%s, %s)" + % (repr(install.install_base), repr(install.install_platbase)) + ) + else: + archive_root = os.path.join( + self.bdist_dir, ensure_relative(install.install_base) + ) + + # Make the archive + filename = self.make_archive( + pseudoinstall_root, + self.format, + root_dir=archive_root, + owner=self.owner, + group=self.group, + ) + if self.distribution.has_ext_modules(): + pyversion = get_python_version() + else: + pyversion = 'any' + self.distribution.dist_files.append(('bdist_dumb', pyversion, filename)) + + if not self.keep_temp: + remove_tree(self.bdist_dir, dry_run=self.dry_run) diff --git a/libs/common/setuptools/_distutils/command/bdist_rpm.py b/libs/common/setuptools/_distutils/command/bdist_rpm.py new file mode 100644 index 00000000..340527b0 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/bdist_rpm.py @@ -0,0 +1,615 @@ +"""distutils.command.bdist_rpm + +Implements the Distutils 'bdist_rpm' command (create RPM source and binary +distributions).""" + +import subprocess +import sys +import os + +from ..core import Command +from ..debug import DEBUG +from ..file_util import write_file +from ..errors import ( + DistutilsOptionError, + DistutilsPlatformError, + DistutilsFileError, + DistutilsExecError, +) +from ..sysconfig import get_python_version +from distutils._log import log + + +class bdist_rpm(Command): + + description = "create an RPM distribution" + + user_options = [ + ('bdist-base=', None, "base directory for creating built distributions"), + ( + 'rpm-base=', + None, + "base directory for creating RPMs (defaults to \"rpm\" under " + "--bdist-base; must be specified for RPM 2)", + ), + ( + 'dist-dir=', + 'd', + "directory to put final RPM files in " "(and .spec files if --spec-only)", + ), + ( + 'python=', + None, + "path to Python interpreter to hard-code in the .spec file " + "(default: \"python\")", + ), + ( + 'fix-python', + None, + "hard-code the exact path to the current Python interpreter in " + "the .spec file", + ), + ('spec-only', None, "only regenerate spec file"), + ('source-only', None, "only generate source RPM"), + ('binary-only', None, "only generate binary RPM"), + ('use-bzip2', None, "use bzip2 instead of gzip to create source distribution"), + # More meta-data: too RPM-specific to put in the setup script, + # but needs to go in the .spec file -- so we make these options + # to "bdist_rpm". The idea is that packagers would put this + # info in setup.cfg, although they are of course free to + # supply it on the command line. + ( + 'distribution-name=', + None, + "name of the (Linux) distribution to which this " + "RPM applies (*not* the name of the module distribution!)", + ), + ('group=', None, "package classification [default: \"Development/Libraries\"]"), + ('release=', None, "RPM release number"), + ('serial=', None, "RPM serial number"), + ( + 'vendor=', + None, + "RPM \"vendor\" (eg. \"Joe Blow \") " + "[default: maintainer or author from setup script]", + ), + ( + 'packager=', + None, + "RPM packager (eg. \"Jane Doe \") " "[default: vendor]", + ), + ('doc-files=', None, "list of documentation files (space or comma-separated)"), + ('changelog=', None, "RPM changelog"), + ('icon=', None, "name of icon file"), + ('provides=', None, "capabilities provided by this package"), + ('requires=', None, "capabilities required by this package"), + ('conflicts=', None, "capabilities which conflict with this package"), + ('build-requires=', None, "capabilities required to build this package"), + ('obsoletes=', None, "capabilities made obsolete by this package"), + ('no-autoreq', None, "do not automatically calculate dependencies"), + # Actions to take when building RPM + ('keep-temp', 'k', "don't clean up RPM build directory"), + ('no-keep-temp', None, "clean up RPM build directory [default]"), + ( + 'use-rpm-opt-flags', + None, + "compile with RPM_OPT_FLAGS when building from source RPM", + ), + ('no-rpm-opt-flags', None, "do not pass any RPM CFLAGS to compiler"), + ('rpm3-mode', None, "RPM 3 compatibility mode (default)"), + ('rpm2-mode', None, "RPM 2 compatibility mode"), + # Add the hooks necessary for specifying custom scripts + ('prep-script=', None, "Specify a script for the PREP phase of RPM building"), + ('build-script=', None, "Specify a script for the BUILD phase of RPM building"), + ( + 'pre-install=', + None, + "Specify a script for the pre-INSTALL phase of RPM building", + ), + ( + 'install-script=', + None, + "Specify a script for the INSTALL phase of RPM building", + ), + ( + 'post-install=', + None, + "Specify a script for the post-INSTALL phase of RPM building", + ), + ( + 'pre-uninstall=', + None, + "Specify a script for the pre-UNINSTALL phase of RPM building", + ), + ( + 'post-uninstall=', + None, + "Specify a script for the post-UNINSTALL phase of RPM building", + ), + ('clean-script=', None, "Specify a script for the CLEAN phase of RPM building"), + ( + 'verify-script=', + None, + "Specify a script for the VERIFY phase of the RPM build", + ), + # Allow a packager to explicitly force an architecture + ('force-arch=', None, "Force an architecture onto the RPM build process"), + ('quiet', 'q', "Run the INSTALL phase of RPM building in quiet mode"), + ] + + boolean_options = [ + 'keep-temp', + 'use-rpm-opt-flags', + 'rpm3-mode', + 'no-autoreq', + 'quiet', + ] + + negative_opt = { + 'no-keep-temp': 'keep-temp', + 'no-rpm-opt-flags': 'use-rpm-opt-flags', + 'rpm2-mode': 'rpm3-mode', + } + + def initialize_options(self): + self.bdist_base = None + self.rpm_base = None + self.dist_dir = None + self.python = None + self.fix_python = None + self.spec_only = None + self.binary_only = None + self.source_only = None + self.use_bzip2 = None + + self.distribution_name = None + self.group = None + self.release = None + self.serial = None + self.vendor = None + self.packager = None + self.doc_files = None + self.changelog = None + self.icon = None + + self.prep_script = None + self.build_script = None + self.install_script = None + self.clean_script = None + self.verify_script = None + self.pre_install = None + self.post_install = None + self.pre_uninstall = None + self.post_uninstall = None + self.prep = None + self.provides = None + self.requires = None + self.conflicts = None + self.build_requires = None + self.obsoletes = None + + self.keep_temp = 0 + self.use_rpm_opt_flags = 1 + self.rpm3_mode = 1 + self.no_autoreq = 0 + + self.force_arch = None + self.quiet = 0 + + def finalize_options(self): + self.set_undefined_options('bdist', ('bdist_base', 'bdist_base')) + if self.rpm_base is None: + if not self.rpm3_mode: + raise DistutilsOptionError("you must specify --rpm-base in RPM 2 mode") + self.rpm_base = os.path.join(self.bdist_base, "rpm") + + if self.python is None: + if self.fix_python: + self.python = sys.executable + else: + self.python = "python3" + elif self.fix_python: + raise DistutilsOptionError( + "--python and --fix-python are mutually exclusive options" + ) + + if os.name != 'posix': + raise DistutilsPlatformError( + "don't know how to create RPM " "distributions on platform %s" % os.name + ) + if self.binary_only and self.source_only: + raise DistutilsOptionError( + "cannot supply both '--source-only' and '--binary-only'" + ) + + # don't pass CFLAGS to pure python distributions + if not self.distribution.has_ext_modules(): + self.use_rpm_opt_flags = 0 + + self.set_undefined_options('bdist', ('dist_dir', 'dist_dir')) + self.finalize_package_data() + + def finalize_package_data(self): + self.ensure_string('group', "Development/Libraries") + self.ensure_string( + 'vendor', + "%s <%s>" + % (self.distribution.get_contact(), self.distribution.get_contact_email()), + ) + self.ensure_string('packager') + self.ensure_string_list('doc_files') + if isinstance(self.doc_files, list): + for readme in ('README', 'README.txt'): + if os.path.exists(readme) and readme not in self.doc_files: + self.doc_files.append(readme) + + self.ensure_string('release', "1") + self.ensure_string('serial') # should it be an int? + + self.ensure_string('distribution_name') + + self.ensure_string('changelog') + # Format changelog correctly + self.changelog = self._format_changelog(self.changelog) + + self.ensure_filename('icon') + + self.ensure_filename('prep_script') + self.ensure_filename('build_script') + self.ensure_filename('install_script') + self.ensure_filename('clean_script') + self.ensure_filename('verify_script') + self.ensure_filename('pre_install') + self.ensure_filename('post_install') + self.ensure_filename('pre_uninstall') + self.ensure_filename('post_uninstall') + + # XXX don't forget we punted on summaries and descriptions -- they + # should be handled here eventually! + + # Now *this* is some meta-data that belongs in the setup script... + self.ensure_string_list('provides') + self.ensure_string_list('requires') + self.ensure_string_list('conflicts') + self.ensure_string_list('build_requires') + self.ensure_string_list('obsoletes') + + self.ensure_string('force_arch') + + def run(self): # noqa: C901 + if DEBUG: + print("before _get_package_data():") + print("vendor =", self.vendor) + print("packager =", self.packager) + print("doc_files =", self.doc_files) + print("changelog =", self.changelog) + + # make directories + if self.spec_only: + spec_dir = self.dist_dir + self.mkpath(spec_dir) + else: + rpm_dir = {} + for d in ('SOURCES', 'SPECS', 'BUILD', 'RPMS', 'SRPMS'): + rpm_dir[d] = os.path.join(self.rpm_base, d) + self.mkpath(rpm_dir[d]) + spec_dir = rpm_dir['SPECS'] + + # Spec file goes into 'dist_dir' if '--spec-only specified', + # build/rpm. otherwise. + spec_path = os.path.join(spec_dir, "%s.spec" % self.distribution.get_name()) + self.execute( + write_file, (spec_path, self._make_spec_file()), "writing '%s'" % spec_path + ) + + if self.spec_only: # stop if requested + return + + # Make a source distribution and copy to SOURCES directory with + # optional icon. + saved_dist_files = self.distribution.dist_files[:] + sdist = self.reinitialize_command('sdist') + if self.use_bzip2: + sdist.formats = ['bztar'] + else: + sdist.formats = ['gztar'] + self.run_command('sdist') + self.distribution.dist_files = saved_dist_files + + source = sdist.get_archive_files()[0] + source_dir = rpm_dir['SOURCES'] + self.copy_file(source, source_dir) + + if self.icon: + if os.path.exists(self.icon): + self.copy_file(self.icon, source_dir) + else: + raise DistutilsFileError("icon file '%s' does not exist" % self.icon) + + # build package + log.info("building RPMs") + rpm_cmd = ['rpmbuild'] + + if self.source_only: # what kind of RPMs? + rpm_cmd.append('-bs') + elif self.binary_only: + rpm_cmd.append('-bb') + else: + rpm_cmd.append('-ba') + rpm_cmd.extend(['--define', '__python %s' % self.python]) + if self.rpm3_mode: + rpm_cmd.extend(['--define', '_topdir %s' % os.path.abspath(self.rpm_base)]) + if not self.keep_temp: + rpm_cmd.append('--clean') + + if self.quiet: + rpm_cmd.append('--quiet') + + rpm_cmd.append(spec_path) + # Determine the binary rpm names that should be built out of this spec + # file + # Note that some of these may not be really built (if the file + # list is empty) + nvr_string = "%{name}-%{version}-%{release}" + src_rpm = nvr_string + ".src.rpm" + non_src_rpm = "%{arch}/" + nvr_string + ".%{arch}.rpm" + q_cmd = r"rpm -q --qf '{} {}\n' --specfile '{}'".format( + src_rpm, + non_src_rpm, + spec_path, + ) + + out = os.popen(q_cmd) + try: + binary_rpms = [] + source_rpm = None + while True: + line = out.readline() + if not line: + break + ell = line.strip().split() + assert len(ell) == 2 + binary_rpms.append(ell[1]) + # The source rpm is named after the first entry in the spec file + if source_rpm is None: + source_rpm = ell[0] + + status = out.close() + if status: + raise DistutilsExecError("Failed to execute: %s" % repr(q_cmd)) + + finally: + out.close() + + self.spawn(rpm_cmd) + + if not self.dry_run: + if self.distribution.has_ext_modules(): + pyversion = get_python_version() + else: + pyversion = 'any' + + if not self.binary_only: + srpm = os.path.join(rpm_dir['SRPMS'], source_rpm) + assert os.path.exists(srpm) + self.move_file(srpm, self.dist_dir) + filename = os.path.join(self.dist_dir, source_rpm) + self.distribution.dist_files.append(('bdist_rpm', pyversion, filename)) + + if not self.source_only: + for rpm in binary_rpms: + rpm = os.path.join(rpm_dir['RPMS'], rpm) + if os.path.exists(rpm): + self.move_file(rpm, self.dist_dir) + filename = os.path.join(self.dist_dir, os.path.basename(rpm)) + self.distribution.dist_files.append( + ('bdist_rpm', pyversion, filename) + ) + + def _dist_path(self, path): + return os.path.join(self.dist_dir, os.path.basename(path)) + + def _make_spec_file(self): # noqa: C901 + """Generate the text of an RPM spec file and return it as a + list of strings (one per line). + """ + # definitions and headers + spec_file = [ + '%define name ' + self.distribution.get_name(), + '%define version ' + self.distribution.get_version().replace('-', '_'), + '%define unmangled_version ' + self.distribution.get_version(), + '%define release ' + self.release.replace('-', '_'), + '', + 'Summary: ' + (self.distribution.get_description() or "UNKNOWN"), + ] + + # Workaround for #14443 which affects some RPM based systems such as + # RHEL6 (and probably derivatives) + vendor_hook = subprocess.getoutput('rpm --eval %{__os_install_post}') + # Generate a potential replacement value for __os_install_post (whilst + # normalizing the whitespace to simplify the test for whether the + # invocation of brp-python-bytecompile passes in __python): + vendor_hook = '\n'.join( + [' %s \\' % line.strip() for line in vendor_hook.splitlines()] + ) + problem = "brp-python-bytecompile \\\n" + fixed = "brp-python-bytecompile %{__python} \\\n" + fixed_hook = vendor_hook.replace(problem, fixed) + if fixed_hook != vendor_hook: + spec_file.append('# Workaround for http://bugs.python.org/issue14443') + spec_file.append('%define __os_install_post ' + fixed_hook + '\n') + + # put locale summaries into spec file + # XXX not supported for now (hard to put a dictionary + # in a config file -- arg!) + # for locale in self.summaries.keys(): + # spec_file.append('Summary(%s): %s' % (locale, + # self.summaries[locale])) + + spec_file.extend( + [ + 'Name: %{name}', + 'Version: %{version}', + 'Release: %{release}', + ] + ) + + # XXX yuck! this filename is available from the "sdist" command, + # but only after it has run: and we create the spec file before + # running "sdist", in case of --spec-only. + if self.use_bzip2: + spec_file.append('Source0: %{name}-%{unmangled_version}.tar.bz2') + else: + spec_file.append('Source0: %{name}-%{unmangled_version}.tar.gz') + + spec_file.extend( + [ + 'License: ' + (self.distribution.get_license() or "UNKNOWN"), + 'Group: ' + self.group, + 'BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot', + 'Prefix: %{_prefix}', + ] + ) + + if not self.force_arch: + # noarch if no extension modules + if not self.distribution.has_ext_modules(): + spec_file.append('BuildArch: noarch') + else: + spec_file.append('BuildArch: %s' % self.force_arch) + + for field in ( + 'Vendor', + 'Packager', + 'Provides', + 'Requires', + 'Conflicts', + 'Obsoletes', + ): + val = getattr(self, field.lower()) + if isinstance(val, list): + spec_file.append('{}: {}'.format(field, ' '.join(val))) + elif val is not None: + spec_file.append('{}: {}'.format(field, val)) + + if self.distribution.get_url(): + spec_file.append('Url: ' + self.distribution.get_url()) + + if self.distribution_name: + spec_file.append('Distribution: ' + self.distribution_name) + + if self.build_requires: + spec_file.append('BuildRequires: ' + ' '.join(self.build_requires)) + + if self.icon: + spec_file.append('Icon: ' + os.path.basename(self.icon)) + + if self.no_autoreq: + spec_file.append('AutoReq: 0') + + spec_file.extend( + [ + '', + '%description', + self.distribution.get_long_description() or "", + ] + ) + + # put locale descriptions into spec file + # XXX again, suppressed because config file syntax doesn't + # easily support this ;-( + # for locale in self.descriptions.keys(): + # spec_file.extend([ + # '', + # '%description -l ' + locale, + # self.descriptions[locale], + # ]) + + # rpm scripts + # figure out default build script + def_setup_call = "{} {}".format(self.python, os.path.basename(sys.argv[0])) + def_build = "%s build" % def_setup_call + if self.use_rpm_opt_flags: + def_build = 'env CFLAGS="$RPM_OPT_FLAGS" ' + def_build + + # insert contents of files + + # XXX this is kind of misleading: user-supplied options are files + # that we open and interpolate into the spec file, but the defaults + # are just text that we drop in as-is. Hmmm. + + install_cmd = ( + '%s install -O1 --root=$RPM_BUILD_ROOT ' '--record=INSTALLED_FILES' + ) % def_setup_call + + script_options = [ + ('prep', 'prep_script', "%setup -n %{name}-%{unmangled_version}"), + ('build', 'build_script', def_build), + ('install', 'install_script', install_cmd), + ('clean', 'clean_script', "rm -rf $RPM_BUILD_ROOT"), + ('verifyscript', 'verify_script', None), + ('pre', 'pre_install', None), + ('post', 'post_install', None), + ('preun', 'pre_uninstall', None), + ('postun', 'post_uninstall', None), + ] + + for (rpm_opt, attr, default) in script_options: + # Insert contents of file referred to, if no file is referred to + # use 'default' as contents of script + val = getattr(self, attr) + if val or default: + spec_file.extend( + [ + '', + '%' + rpm_opt, + ] + ) + if val: + with open(val) as f: + spec_file.extend(f.read().split('\n')) + else: + spec_file.append(default) + + # files section + spec_file.extend( + [ + '', + '%files -f INSTALLED_FILES', + '%defattr(-,root,root)', + ] + ) + + if self.doc_files: + spec_file.append('%doc ' + ' '.join(self.doc_files)) + + if self.changelog: + spec_file.extend( + [ + '', + '%changelog', + ] + ) + spec_file.extend(self.changelog) + + return spec_file + + def _format_changelog(self, changelog): + """Format the changelog correctly and convert it to a list of strings""" + if not changelog: + return changelog + new_changelog = [] + for line in changelog.strip().split('\n'): + line = line.strip() + if line[0] == '*': + new_changelog.extend(['', line]) + elif line[0] == '-': + new_changelog.append(line) + else: + new_changelog.append(' ' + line) + + # strip trailing newline inserted by first changelog entry + if not new_changelog[0]: + del new_changelog[0] + + return new_changelog diff --git a/libs/common/setuptools/_distutils/command/build.py b/libs/common/setuptools/_distutils/command/build.py new file mode 100644 index 00000000..c3ab410f --- /dev/null +++ b/libs/common/setuptools/_distutils/command/build.py @@ -0,0 +1,153 @@ +"""distutils.command.build + +Implements the Distutils 'build' command.""" + +import sys +import os +from ..core import Command +from ..errors import DistutilsOptionError +from ..util import get_platform + + +def show_compilers(): + from ..ccompiler import show_compilers + + show_compilers() + + +class build(Command): + + description = "build everything needed to install" + + user_options = [ + ('build-base=', 'b', "base directory for build library"), + ('build-purelib=', None, "build directory for platform-neutral distributions"), + ('build-platlib=', None, "build directory for platform-specific distributions"), + ( + 'build-lib=', + None, + "build directory for all distribution (defaults to either " + + "build-purelib or build-platlib", + ), + ('build-scripts=', None, "build directory for scripts"), + ('build-temp=', 't', "temporary build directory"), + ( + 'plat-name=', + 'p', + "platform name to build for, if supported " + "(default: %s)" % get_platform(), + ), + ('compiler=', 'c', "specify the compiler type"), + ('parallel=', 'j', "number of parallel build jobs"), + ('debug', 'g', "compile extensions and libraries with debugging information"), + ('force', 'f', "forcibly build everything (ignore file timestamps)"), + ('executable=', 'e', "specify final destination interpreter path (build.py)"), + ] + + boolean_options = ['debug', 'force'] + + help_options = [ + ('help-compiler', None, "list available compilers", show_compilers), + ] + + def initialize_options(self): + self.build_base = 'build' + # these are decided only after 'build_base' has its final value + # (unless overridden by the user or client) + self.build_purelib = None + self.build_platlib = None + self.build_lib = None + self.build_temp = None + self.build_scripts = None + self.compiler = None + self.plat_name = None + self.debug = None + self.force = 0 + self.executable = None + self.parallel = None + + def finalize_options(self): # noqa: C901 + if self.plat_name is None: + self.plat_name = get_platform() + else: + # plat-name only supported for windows (other platforms are + # supported via ./configure flags, if at all). Avoid misleading + # other platforms. + if os.name != 'nt': + raise DistutilsOptionError( + "--plat-name only supported on Windows (try " + "using './configure --help' on your platform)" + ) + + plat_specifier = ".{}-{}".format(self.plat_name, sys.implementation.cache_tag) + + # Make it so Python 2.x and Python 2.x with --with-pydebug don't + # share the same build directories. Doing so confuses the build + # process for C modules + if hasattr(sys, 'gettotalrefcount'): + plat_specifier += '-pydebug' + + # 'build_purelib' and 'build_platlib' just default to 'lib' and + # 'lib.' under the base build directory. We only use one of + # them for a given distribution, though -- + if self.build_purelib is None: + self.build_purelib = os.path.join(self.build_base, 'lib') + if self.build_platlib is None: + self.build_platlib = os.path.join(self.build_base, 'lib' + plat_specifier) + + # 'build_lib' is the actual directory that we will use for this + # particular module distribution -- if user didn't supply it, pick + # one of 'build_purelib' or 'build_platlib'. + if self.build_lib is None: + if self.distribution.has_ext_modules(): + self.build_lib = self.build_platlib + else: + self.build_lib = self.build_purelib + + # 'build_temp' -- temporary directory for compiler turds, + # "build/temp." + if self.build_temp is None: + self.build_temp = os.path.join(self.build_base, 'temp' + plat_specifier) + if self.build_scripts is None: + self.build_scripts = os.path.join( + self.build_base, 'scripts-%d.%d' % sys.version_info[:2] + ) + + if self.executable is None and sys.executable: + self.executable = os.path.normpath(sys.executable) + + if isinstance(self.parallel, str): + try: + self.parallel = int(self.parallel) + except ValueError: + raise DistutilsOptionError("parallel should be an integer") + + def run(self): + # Run all relevant sub-commands. This will be some subset of: + # - build_py - pure Python modules + # - build_clib - standalone C libraries + # - build_ext - Python extensions + # - build_scripts - (Python) scripts + for cmd_name in self.get_sub_commands(): + self.run_command(cmd_name) + + # -- Predicates for the sub-command list --------------------------- + + def has_pure_modules(self): + return self.distribution.has_pure_modules() + + def has_c_libraries(self): + return self.distribution.has_c_libraries() + + def has_ext_modules(self): + return self.distribution.has_ext_modules() + + def has_scripts(self): + return self.distribution.has_scripts() + + sub_commands = [ + ('build_py', has_pure_modules), + ('build_clib', has_c_libraries), + ('build_ext', has_ext_modules), + ('build_scripts', has_scripts), + ] diff --git a/libs/common/setuptools/_distutils/command/build_clib.py b/libs/common/setuptools/_distutils/command/build_clib.py new file mode 100644 index 00000000..f90c5664 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/build_clib.py @@ -0,0 +1,208 @@ +"""distutils.command.build_clib + +Implements the Distutils 'build_clib' command, to build a C/C++ library +that is included in the module distribution and needed by an extension +module.""" + + +# XXX this module has *lots* of code ripped-off quite transparently from +# build_ext.py -- not surprisingly really, as the work required to build +# a static library from a collection of C source files is not really all +# that different from what's required to build a shared object file from +# a collection of C source files. Nevertheless, I haven't done the +# necessary refactoring to account for the overlap in code between the +# two modules, mainly because a number of subtle details changed in the +# cut 'n paste. Sigh. + +import os +from ..core import Command +from ..errors import DistutilsSetupError +from ..sysconfig import customize_compiler +from distutils._log import log + + +def show_compilers(): + from ..ccompiler import show_compilers + + show_compilers() + + +class build_clib(Command): + + description = "build C/C++ libraries used by Python extensions" + + user_options = [ + ('build-clib=', 'b', "directory to build C/C++ libraries to"), + ('build-temp=', 't', "directory to put temporary build by-products"), + ('debug', 'g', "compile with debugging information"), + ('force', 'f', "forcibly build everything (ignore file timestamps)"), + ('compiler=', 'c', "specify the compiler type"), + ] + + boolean_options = ['debug', 'force'] + + help_options = [ + ('help-compiler', None, "list available compilers", show_compilers), + ] + + def initialize_options(self): + self.build_clib = None + self.build_temp = None + + # List of libraries to build + self.libraries = None + + # Compilation options for all libraries + self.include_dirs = None + self.define = None + self.undef = None + self.debug = None + self.force = 0 + self.compiler = None + + def finalize_options(self): + # This might be confusing: both build-clib and build-temp default + # to build-temp as defined by the "build" command. This is because + # I think that C libraries are really just temporary build + # by-products, at least from the point of view of building Python + # extensions -- but I want to keep my options open. + self.set_undefined_options( + 'build', + ('build_temp', 'build_clib'), + ('build_temp', 'build_temp'), + ('compiler', 'compiler'), + ('debug', 'debug'), + ('force', 'force'), + ) + + self.libraries = self.distribution.libraries + if self.libraries: + self.check_library_list(self.libraries) + + if self.include_dirs is None: + self.include_dirs = self.distribution.include_dirs or [] + if isinstance(self.include_dirs, str): + self.include_dirs = self.include_dirs.split(os.pathsep) + + # XXX same as for build_ext -- what about 'self.define' and + # 'self.undef' ? + + def run(self): + if not self.libraries: + return + + # Yech -- this is cut 'n pasted from build_ext.py! + from ..ccompiler import new_compiler + + self.compiler = new_compiler( + compiler=self.compiler, dry_run=self.dry_run, force=self.force + ) + customize_compiler(self.compiler) + + if self.include_dirs is not None: + self.compiler.set_include_dirs(self.include_dirs) + if self.define is not None: + # 'define' option is a list of (name,value) tuples + for (name, value) in self.define: + self.compiler.define_macro(name, value) + if self.undef is not None: + for macro in self.undef: + self.compiler.undefine_macro(macro) + + self.build_libraries(self.libraries) + + def check_library_list(self, libraries): + """Ensure that the list of libraries is valid. + + `library` is presumably provided as a command option 'libraries'. + This method checks that it is a list of 2-tuples, where the tuples + are (library_name, build_info_dict). + + Raise DistutilsSetupError if the structure is invalid anywhere; + just returns otherwise. + """ + if not isinstance(libraries, list): + raise DistutilsSetupError("'libraries' option must be a list of tuples") + + for lib in libraries: + if not isinstance(lib, tuple) and len(lib) != 2: + raise DistutilsSetupError("each element of 'libraries' must a 2-tuple") + + name, build_info = lib + + if not isinstance(name, str): + raise DistutilsSetupError( + "first element of each tuple in 'libraries' " + "must be a string (the library name)" + ) + + if '/' in name or (os.sep != '/' and os.sep in name): + raise DistutilsSetupError( + "bad library name '%s': " + "may not contain directory separators" % lib[0] + ) + + if not isinstance(build_info, dict): + raise DistutilsSetupError( + "second element of each tuple in 'libraries' " + "must be a dictionary (build info)" + ) + + def get_library_names(self): + # Assume the library list is valid -- 'check_library_list()' is + # called from 'finalize_options()', so it should be! + if not self.libraries: + return None + + lib_names = [] + for (lib_name, build_info) in self.libraries: + lib_names.append(lib_name) + return lib_names + + def get_source_files(self): + self.check_library_list(self.libraries) + filenames = [] + for (lib_name, build_info) in self.libraries: + sources = build_info.get('sources') + if sources is None or not isinstance(sources, (list, tuple)): + raise DistutilsSetupError( + "in 'libraries' option (library '%s'), " + "'sources' must be present and must be " + "a list of source filenames" % lib_name + ) + + filenames.extend(sources) + return filenames + + def build_libraries(self, libraries): + for (lib_name, build_info) in libraries: + sources = build_info.get('sources') + if sources is None or not isinstance(sources, (list, tuple)): + raise DistutilsSetupError( + "in 'libraries' option (library '%s'), " + "'sources' must be present and must be " + "a list of source filenames" % lib_name + ) + sources = list(sources) + + log.info("building '%s' library", lib_name) + + # First, compile the source code to object files in the library + # directory. (This should probably change to putting object + # files in a temporary build directory.) + macros = build_info.get('macros') + include_dirs = build_info.get('include_dirs') + objects = self.compiler.compile( + sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + ) + + # Now "link" the object files together into a static library. + # (On Unix at least, this isn't really linking -- it just + # builds an archive. Whatever.) + self.compiler.create_static_lib( + objects, lib_name, output_dir=self.build_clib, debug=self.debug + ) diff --git a/libs/common/setuptools/_distutils/command/build_ext.py b/libs/common/setuptools/_distutils/command/build_ext.py new file mode 100644 index 00000000..f4c0eccd --- /dev/null +++ b/libs/common/setuptools/_distutils/command/build_ext.py @@ -0,0 +1,789 @@ +"""distutils.command.build_ext + +Implements the Distutils 'build_ext' command, for building extension +modules (currently limited to C extensions, should accommodate C++ +extensions ASAP).""" + +import contextlib +import os +import re +import sys +from ..core import Command +from ..errors import ( + DistutilsOptionError, + DistutilsSetupError, + CCompilerError, + DistutilsError, + CompileError, + DistutilsPlatformError, +) +from ..sysconfig import customize_compiler, get_python_version +from ..sysconfig import get_config_h_filename +from ..dep_util import newer_group +from ..extension import Extension +from ..util import get_platform +from distutils._log import log +from . import py37compat + +from site import USER_BASE + +# An extension name is just a dot-separated list of Python NAMEs (ie. +# the same as a fully-qualified module name). +extension_name_re = re.compile(r'^[a-zA-Z_][a-zA-Z_0-9]*(\.[a-zA-Z_][a-zA-Z_0-9]*)*$') + + +def show_compilers(): + from ..ccompiler import show_compilers + + show_compilers() + + +class build_ext(Command): + + description = "build C/C++ extensions (compile/link to build directory)" + + # XXX thoughts on how to deal with complex command-line options like + # these, i.e. how to make it so fancy_getopt can suck them off the + # command line and make it look like setup.py defined the appropriate + # lists of tuples of what-have-you. + # - each command needs a callback to process its command-line options + # - Command.__init__() needs access to its share of the whole + # command line (must ultimately come from + # Distribution.parse_command_line()) + # - it then calls the current command class' option-parsing + # callback to deal with weird options like -D, which have to + # parse the option text and churn out some custom data + # structure + # - that data structure (in this case, a list of 2-tuples) + # will then be present in the command object by the time + # we get to finalize_options() (i.e. the constructor + # takes care of both command-line and client options + # in between initialize_options() and finalize_options()) + + sep_by = " (separated by '%s')" % os.pathsep + user_options = [ + ('build-lib=', 'b', "directory for compiled extension modules"), + ('build-temp=', 't', "directory for temporary files (build by-products)"), + ( + 'plat-name=', + 'p', + "platform name to cross-compile for, if supported " + "(default: %s)" % get_platform(), + ), + ( + 'inplace', + 'i', + "ignore build-lib and put compiled extensions into the source " + + "directory alongside your pure Python modules", + ), + ( + 'include-dirs=', + 'I', + "list of directories to search for header files" + sep_by, + ), + ('define=', 'D', "C preprocessor macros to define"), + ('undef=', 'U', "C preprocessor macros to undefine"), + ('libraries=', 'l', "external C libraries to link with"), + ( + 'library-dirs=', + 'L', + "directories to search for external C libraries" + sep_by, + ), + ('rpath=', 'R', "directories to search for shared C libraries at runtime"), + ('link-objects=', 'O', "extra explicit link objects to include in the link"), + ('debug', 'g', "compile/link with debugging information"), + ('force', 'f', "forcibly build everything (ignore file timestamps)"), + ('compiler=', 'c', "specify the compiler type"), + ('parallel=', 'j', "number of parallel build jobs"), + ('swig-cpp', None, "make SWIG create C++ files (default is C)"), + ('swig-opts=', None, "list of SWIG command line options"), + ('swig=', None, "path to the SWIG executable"), + ('user', None, "add user include, library and rpath"), + ] + + boolean_options = ['inplace', 'debug', 'force', 'swig-cpp', 'user'] + + help_options = [ + ('help-compiler', None, "list available compilers", show_compilers), + ] + + def initialize_options(self): + self.extensions = None + self.build_lib = None + self.plat_name = None + self.build_temp = None + self.inplace = 0 + self.package = None + + self.include_dirs = None + self.define = None + self.undef = None + self.libraries = None + self.library_dirs = None + self.rpath = None + self.link_objects = None + self.debug = None + self.force = None + self.compiler = None + self.swig = None + self.swig_cpp = None + self.swig_opts = None + self.user = None + self.parallel = None + + def finalize_options(self): # noqa: C901 + from distutils import sysconfig + + self.set_undefined_options( + 'build', + ('build_lib', 'build_lib'), + ('build_temp', 'build_temp'), + ('compiler', 'compiler'), + ('debug', 'debug'), + ('force', 'force'), + ('parallel', 'parallel'), + ('plat_name', 'plat_name'), + ) + + if self.package is None: + self.package = self.distribution.ext_package + + self.extensions = self.distribution.ext_modules + + # Make sure Python's include directories (for Python.h, pyconfig.h, + # etc.) are in the include search path. + py_include = sysconfig.get_python_inc() + plat_py_include = sysconfig.get_python_inc(plat_specific=1) + if self.include_dirs is None: + self.include_dirs = self.distribution.include_dirs or [] + if isinstance(self.include_dirs, str): + self.include_dirs = self.include_dirs.split(os.pathsep) + + # If in a virtualenv, add its include directory + # Issue 16116 + if sys.exec_prefix != sys.base_exec_prefix: + self.include_dirs.append(os.path.join(sys.exec_prefix, 'include')) + + # Put the Python "system" include dir at the end, so that + # any local include dirs take precedence. + self.include_dirs.extend(py_include.split(os.path.pathsep)) + if plat_py_include != py_include: + self.include_dirs.extend(plat_py_include.split(os.path.pathsep)) + + self.ensure_string_list('libraries') + self.ensure_string_list('link_objects') + + # Life is easier if we're not forever checking for None, so + # simplify these options to empty lists if unset + if self.libraries is None: + self.libraries = [] + if self.library_dirs is None: + self.library_dirs = [] + elif isinstance(self.library_dirs, str): + self.library_dirs = self.library_dirs.split(os.pathsep) + + if self.rpath is None: + self.rpath = [] + elif isinstance(self.rpath, str): + self.rpath = self.rpath.split(os.pathsep) + + # for extensions under windows use different directories + # for Release and Debug builds. + # also Python's library directory must be appended to library_dirs + if os.name == 'nt': + # the 'libs' directory is for binary installs - we assume that + # must be the *native* platform. But we don't really support + # cross-compiling via a binary install anyway, so we let it go. + self.library_dirs.append(os.path.join(sys.exec_prefix, 'libs')) + if sys.base_exec_prefix != sys.prefix: # Issue 16116 + self.library_dirs.append(os.path.join(sys.base_exec_prefix, 'libs')) + if self.debug: + self.build_temp = os.path.join(self.build_temp, "Debug") + else: + self.build_temp = os.path.join(self.build_temp, "Release") + + # Append the source distribution include and library directories, + # this allows distutils on windows to work in the source tree + self.include_dirs.append(os.path.dirname(get_config_h_filename())) + self.library_dirs.append(sys.base_exec_prefix) + + # Use the .lib files for the correct architecture + if self.plat_name == 'win32': + suffix = 'win32' + else: + # win-amd64 + suffix = self.plat_name[4:] + new_lib = os.path.join(sys.exec_prefix, 'PCbuild') + if suffix: + new_lib = os.path.join(new_lib, suffix) + self.library_dirs.append(new_lib) + + # For extensions under Cygwin, Python's library directory must be + # appended to library_dirs + if sys.platform[:6] == 'cygwin': + if not sysconfig.python_build: + # building third party extensions + self.library_dirs.append( + os.path.join( + sys.prefix, "lib", "python" + get_python_version(), "config" + ) + ) + else: + # building python standard extensions + self.library_dirs.append('.') + + # For building extensions with a shared Python library, + # Python's library directory must be appended to library_dirs + # See Issues: #1600860, #4366 + if sysconfig.get_config_var('Py_ENABLE_SHARED'): + if not sysconfig.python_build: + # building third party extensions + self.library_dirs.append(sysconfig.get_config_var('LIBDIR')) + else: + # building python standard extensions + self.library_dirs.append('.') + + # The argument parsing will result in self.define being a string, but + # it has to be a list of 2-tuples. All the preprocessor symbols + # specified by the 'define' option will be set to '1'. Multiple + # symbols can be separated with commas. + + if self.define: + defines = self.define.split(',') + self.define = [(symbol, '1') for symbol in defines] + + # The option for macros to undefine is also a string from the + # option parsing, but has to be a list. Multiple symbols can also + # be separated with commas here. + if self.undef: + self.undef = self.undef.split(',') + + if self.swig_opts is None: + self.swig_opts = [] + else: + self.swig_opts = self.swig_opts.split(' ') + + # Finally add the user include and library directories if requested + if self.user: + user_include = os.path.join(USER_BASE, "include") + user_lib = os.path.join(USER_BASE, "lib") + if os.path.isdir(user_include): + self.include_dirs.append(user_include) + if os.path.isdir(user_lib): + self.library_dirs.append(user_lib) + self.rpath.append(user_lib) + + if isinstance(self.parallel, str): + try: + self.parallel = int(self.parallel) + except ValueError: + raise DistutilsOptionError("parallel should be an integer") + + def run(self): # noqa: C901 + from ..ccompiler import new_compiler + + # 'self.extensions', as supplied by setup.py, is a list of + # Extension instances. See the documentation for Extension (in + # distutils.extension) for details. + # + # For backwards compatibility with Distutils 0.8.2 and earlier, we + # also allow the 'extensions' list to be a list of tuples: + # (ext_name, build_info) + # where build_info is a dictionary containing everything that + # Extension instances do except the name, with a few things being + # differently named. We convert these 2-tuples to Extension + # instances as needed. + + if not self.extensions: + return + + # If we were asked to build any C/C++ libraries, make sure that the + # directory where we put them is in the library search path for + # linking extensions. + if self.distribution.has_c_libraries(): + build_clib = self.get_finalized_command('build_clib') + self.libraries.extend(build_clib.get_library_names() or []) + self.library_dirs.append(build_clib.build_clib) + + # Setup the CCompiler object that we'll use to do all the + # compiling and linking + self.compiler = new_compiler( + compiler=self.compiler, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force, + ) + customize_compiler(self.compiler) + # If we are cross-compiling, init the compiler now (if we are not + # cross-compiling, init would not hurt, but people may rely on + # late initialization of compiler even if they shouldn't...) + if os.name == 'nt' and self.plat_name != get_platform(): + self.compiler.initialize(self.plat_name) + + # And make sure that any compile/link-related options (which might + # come from the command-line or from the setup script) are set in + # that CCompiler object -- that way, they automatically apply to + # all compiling and linking done here. + if self.include_dirs is not None: + self.compiler.set_include_dirs(self.include_dirs) + if self.define is not None: + # 'define' option is a list of (name,value) tuples + for (name, value) in self.define: + self.compiler.define_macro(name, value) + if self.undef is not None: + for macro in self.undef: + self.compiler.undefine_macro(macro) + if self.libraries is not None: + self.compiler.set_libraries(self.libraries) + if self.library_dirs is not None: + self.compiler.set_library_dirs(self.library_dirs) + if self.rpath is not None: + self.compiler.set_runtime_library_dirs(self.rpath) + if self.link_objects is not None: + self.compiler.set_link_objects(self.link_objects) + + # Now actually compile and link everything. + self.build_extensions() + + def check_extensions_list(self, extensions): # noqa: C901 + """Ensure that the list of extensions (presumably provided as a + command option 'extensions') is valid, i.e. it is a list of + Extension objects. We also support the old-style list of 2-tuples, + where the tuples are (ext_name, build_info), which are converted to + Extension instances here. + + Raise DistutilsSetupError if the structure is invalid anywhere; + just returns otherwise. + """ + if not isinstance(extensions, list): + raise DistutilsSetupError( + "'ext_modules' option must be a list of Extension instances" + ) + + for i, ext in enumerate(extensions): + if isinstance(ext, Extension): + continue # OK! (assume type-checking done + # by Extension constructor) + + if not isinstance(ext, tuple) or len(ext) != 2: + raise DistutilsSetupError( + "each element of 'ext_modules' option must be an " + "Extension instance or 2-tuple" + ) + + ext_name, build_info = ext + + log.warning( + "old-style (ext_name, build_info) tuple found in " + "ext_modules for extension '%s' " + "-- please convert to Extension instance", + ext_name, + ) + + if not (isinstance(ext_name, str) and extension_name_re.match(ext_name)): + raise DistutilsSetupError( + "first element of each tuple in 'ext_modules' " + "must be the extension name (a string)" + ) + + if not isinstance(build_info, dict): + raise DistutilsSetupError( + "second element of each tuple in 'ext_modules' " + "must be a dictionary (build info)" + ) + + # OK, the (ext_name, build_info) dict is type-safe: convert it + # to an Extension instance. + ext = Extension(ext_name, build_info['sources']) + + # Easy stuff: one-to-one mapping from dict elements to + # instance attributes. + for key in ( + 'include_dirs', + 'library_dirs', + 'libraries', + 'extra_objects', + 'extra_compile_args', + 'extra_link_args', + ): + val = build_info.get(key) + if val is not None: + setattr(ext, key, val) + + # Medium-easy stuff: same syntax/semantics, different names. + ext.runtime_library_dirs = build_info.get('rpath') + if 'def_file' in build_info: + log.warning( + "'def_file' element of build info dict " "no longer supported" + ) + + # Non-trivial stuff: 'macros' split into 'define_macros' + # and 'undef_macros'. + macros = build_info.get('macros') + if macros: + ext.define_macros = [] + ext.undef_macros = [] + for macro in macros: + if not (isinstance(macro, tuple) and len(macro) in (1, 2)): + raise DistutilsSetupError( + "'macros' element of build info dict " + "must be 1- or 2-tuple" + ) + if len(macro) == 1: + ext.undef_macros.append(macro[0]) + elif len(macro) == 2: + ext.define_macros.append(macro) + + extensions[i] = ext + + def get_source_files(self): + self.check_extensions_list(self.extensions) + filenames = [] + + # Wouldn't it be neat if we knew the names of header files too... + for ext in self.extensions: + filenames.extend(ext.sources) + return filenames + + def get_outputs(self): + # Sanity check the 'extensions' list -- can't assume this is being + # done in the same run as a 'build_extensions()' call (in fact, we + # can probably assume that it *isn't*!). + self.check_extensions_list(self.extensions) + + # And build the list of output (built) filenames. Note that this + # ignores the 'inplace' flag, and assumes everything goes in the + # "build" tree. + outputs = [] + for ext in self.extensions: + outputs.append(self.get_ext_fullpath(ext.name)) + return outputs + + def build_extensions(self): + # First, sanity-check the 'extensions' list + self.check_extensions_list(self.extensions) + if self.parallel: + self._build_extensions_parallel() + else: + self._build_extensions_serial() + + def _build_extensions_parallel(self): + workers = self.parallel + if self.parallel is True: + workers = os.cpu_count() # may return None + try: + from concurrent.futures import ThreadPoolExecutor + except ImportError: + workers = None + + if workers is None: + self._build_extensions_serial() + return + + with ThreadPoolExecutor(max_workers=workers) as executor: + futures = [ + executor.submit(self.build_extension, ext) for ext in self.extensions + ] + for ext, fut in zip(self.extensions, futures): + with self._filter_build_errors(ext): + fut.result() + + def _build_extensions_serial(self): + for ext in self.extensions: + with self._filter_build_errors(ext): + self.build_extension(ext) + + @contextlib.contextmanager + def _filter_build_errors(self, ext): + try: + yield + except (CCompilerError, DistutilsError, CompileError) as e: + if not ext.optional: + raise + self.warn('building extension "{}" failed: {}'.format(ext.name, e)) + + def build_extension(self, ext): + sources = ext.sources + if sources is None or not isinstance(sources, (list, tuple)): + raise DistutilsSetupError( + "in 'ext_modules' option (extension '%s'), " + "'sources' must be present and must be " + "a list of source filenames" % ext.name + ) + # sort to make the resulting .so file build reproducible + sources = sorted(sources) + + ext_path = self.get_ext_fullpath(ext.name) + depends = sources + ext.depends + if not (self.force or newer_group(depends, ext_path, 'newer')): + log.debug("skipping '%s' extension (up-to-date)", ext.name) + return + else: + log.info("building '%s' extension", ext.name) + + # First, scan the sources for SWIG definition files (.i), run + # SWIG on 'em to create .c files, and modify the sources list + # accordingly. + sources = self.swig_sources(sources, ext) + + # Next, compile the source code to object files. + + # XXX not honouring 'define_macros' or 'undef_macros' -- the + # CCompiler API needs to change to accommodate this, and I + # want to do one thing at a time! + + # Two possible sources for extra compiler arguments: + # - 'extra_compile_args' in Extension object + # - CFLAGS environment variable (not particularly + # elegant, but people seem to expect it and I + # guess it's useful) + # The environment variable should take precedence, and + # any sensible compiler will give precedence to later + # command line args. Hence we combine them in order: + extra_args = ext.extra_compile_args or [] + + macros = ext.define_macros[:] + for undef in ext.undef_macros: + macros.append((undef,)) + + objects = self.compiler.compile( + sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=ext.include_dirs, + debug=self.debug, + extra_postargs=extra_args, + depends=ext.depends, + ) + + # XXX outdated variable, kept here in case third-part code + # needs it. + self._built_objects = objects[:] + + # Now link the object files together into a "shared object" -- + # of course, first we have to figure out all the other things + # that go into the mix. + if ext.extra_objects: + objects.extend(ext.extra_objects) + extra_args = ext.extra_link_args or [] + + # Detect target language, if not provided + language = ext.language or self.compiler.detect_language(sources) + + self.compiler.link_shared_object( + objects, + ext_path, + libraries=self.get_libraries(ext), + library_dirs=ext.library_dirs, + runtime_library_dirs=ext.runtime_library_dirs, + extra_postargs=extra_args, + export_symbols=self.get_export_symbols(ext), + debug=self.debug, + build_temp=self.build_temp, + target_lang=language, + ) + + def swig_sources(self, sources, extension): + """Walk the list of source files in 'sources', looking for SWIG + interface (.i) files. Run SWIG on all that are found, and + return a modified 'sources' list with SWIG source files replaced + by the generated C (or C++) files. + """ + new_sources = [] + swig_sources = [] + swig_targets = {} + + # XXX this drops generated C/C++ files into the source tree, which + # is fine for developers who want to distribute the generated + # source -- but there should be an option to put SWIG output in + # the temp dir. + + if self.swig_cpp: + log.warning("--swig-cpp is deprecated - use --swig-opts=-c++") + + if ( + self.swig_cpp + or ('-c++' in self.swig_opts) + or ('-c++' in extension.swig_opts) + ): + target_ext = '.cpp' + else: + target_ext = '.c' + + for source in sources: + (base, ext) = os.path.splitext(source) + if ext == ".i": # SWIG interface file + new_sources.append(base + '_wrap' + target_ext) + swig_sources.append(source) + swig_targets[source] = new_sources[-1] + else: + new_sources.append(source) + + if not swig_sources: + return new_sources + + swig = self.swig or self.find_swig() + swig_cmd = [swig, "-python"] + swig_cmd.extend(self.swig_opts) + if self.swig_cpp: + swig_cmd.append("-c++") + + # Do not override commandline arguments + if not self.swig_opts: + for o in extension.swig_opts: + swig_cmd.append(o) + + for source in swig_sources: + target = swig_targets[source] + log.info("swigging %s to %s", source, target) + self.spawn(swig_cmd + ["-o", target, source]) + + return new_sources + + def find_swig(self): + """Return the name of the SWIG executable. On Unix, this is + just "swig" -- it should be in the PATH. Tries a bit harder on + Windows. + """ + if os.name == "posix": + return "swig" + elif os.name == "nt": + # Look for SWIG in its standard installation directory on + # Windows (or so I presume!). If we find it there, great; + # if not, act like Unix and assume it's in the PATH. + for vers in ("1.3", "1.2", "1.1"): + fn = os.path.join("c:\\swig%s" % vers, "swig.exe") + if os.path.isfile(fn): + return fn + else: + return "swig.exe" + else: + raise DistutilsPlatformError( + "I don't know how to find (much less run) SWIG " + "on platform '%s'" % os.name + ) + + # -- Name generators ----------------------------------------------- + # (extension names, filenames, whatever) + def get_ext_fullpath(self, ext_name): + """Returns the path of the filename for a given extension. + + The file is located in `build_lib` or directly in the package + (inplace option). + """ + fullname = self.get_ext_fullname(ext_name) + modpath = fullname.split('.') + filename = self.get_ext_filename(modpath[-1]) + + if not self.inplace: + # no further work needed + # returning : + # build_dir/package/path/filename + filename = os.path.join(*modpath[:-1] + [filename]) + return os.path.join(self.build_lib, filename) + + # the inplace option requires to find the package directory + # using the build_py command for that + package = '.'.join(modpath[0:-1]) + build_py = self.get_finalized_command('build_py') + package_dir = os.path.abspath(build_py.get_package_dir(package)) + + # returning + # package_dir/filename + return os.path.join(package_dir, filename) + + def get_ext_fullname(self, ext_name): + """Returns the fullname of a given extension name. + + Adds the `package.` prefix""" + if self.package is None: + return ext_name + else: + return self.package + '.' + ext_name + + def get_ext_filename(self, ext_name): + r"""Convert the name of an extension (eg. "foo.bar") into the name + of the file from which it will be loaded (eg. "foo/bar.so", or + "foo\bar.pyd"). + """ + from ..sysconfig import get_config_var + + ext_path = ext_name.split('.') + ext_suffix = get_config_var('EXT_SUFFIX') + return os.path.join(*ext_path) + ext_suffix + + def get_export_symbols(self, ext): + """Return the list of symbols that a shared extension has to + export. This either uses 'ext.export_symbols' or, if it's not + provided, "PyInit_" + module_name. Only relevant on Windows, where + the .pyd file (DLL) must export the module "PyInit_" function. + """ + name = ext.name.split('.')[-1] + try: + # Unicode module name support as defined in PEP-489 + # https://www.python.org/dev/peps/pep-0489/#export-hook-name + name.encode('ascii') + except UnicodeEncodeError: + suffix = 'U_' + name.encode('punycode').replace(b'-', b'_').decode('ascii') + else: + suffix = "_" + name + + initfunc_name = "PyInit" + suffix + if initfunc_name not in ext.export_symbols: + ext.export_symbols.append(initfunc_name) + return ext.export_symbols + + def get_libraries(self, ext): # noqa: C901 + """Return the list of libraries to link against when building a + shared extension. On most platforms, this is just 'ext.libraries'; + on Windows, we add the Python library (eg. python20.dll). + """ + # The python library is always needed on Windows. For MSVC, this + # is redundant, since the library is mentioned in a pragma in + # pyconfig.h that MSVC groks. The other Windows compilers all seem + # to need it mentioned explicitly, though, so that's what we do. + # Append '_d' to the python import library on debug builds. + if sys.platform == "win32": + from .._msvccompiler import MSVCCompiler + + if not isinstance(self.compiler, MSVCCompiler): + template = "python%d%d" + if self.debug: + template = template + '_d' + pythonlib = template % ( + sys.hexversion >> 24, + (sys.hexversion >> 16) & 0xFF, + ) + # don't extend ext.libraries, it may be shared with other + # extensions, it is a reference to the original list + return ext.libraries + [pythonlib] + else: + # On Android only the main executable and LD_PRELOADs are considered + # to be RTLD_GLOBAL, all the dependencies of the main executable + # remain RTLD_LOCAL and so the shared libraries must be linked with + # libpython when python is built with a shared python library (issue + # bpo-21536). + # On Cygwin (and if required, other POSIX-like platforms based on + # Windows like MinGW) it is simply necessary that all symbols in + # shared libraries are resolved at link time. + from ..sysconfig import get_config_var + + link_libpython = False + if get_config_var('Py_ENABLE_SHARED'): + # A native build on an Android device or on Cygwin + if hasattr(sys, 'getandroidapilevel'): + link_libpython = True + elif sys.platform == 'cygwin': + link_libpython = True + elif '_PYTHON_HOST_PLATFORM' in os.environ: + # We are cross-compiling for one of the relevant platforms + if get_config_var('ANDROID_API_LEVEL') != 0: + link_libpython = True + elif get_config_var('MACHDEP') == 'cygwin': + link_libpython = True + + if link_libpython: + ldversion = get_config_var('LDVERSION') + return ext.libraries + ['python' + ldversion] + + return ext.libraries + py37compat.pythonlib() diff --git a/libs/common/setuptools/_distutils/command/build_py.py b/libs/common/setuptools/_distutils/command/build_py.py new file mode 100644 index 00000000..9f783244 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/build_py.py @@ -0,0 +1,407 @@ +"""distutils.command.build_py + +Implements the Distutils 'build_py' command.""" + +import os +import importlib.util +import sys +import glob + +from ..core import Command +from ..errors import DistutilsOptionError, DistutilsFileError +from ..util import convert_path +from distutils._log import log + + +class build_py(Command): + + description = "\"build\" pure Python modules (copy to build directory)" + + user_options = [ + ('build-lib=', 'd', "directory to \"build\" (copy) to"), + ('compile', 'c', "compile .py to .pyc"), + ('no-compile', None, "don't compile .py files [default]"), + ( + 'optimize=', + 'O', + "also compile with optimization: -O1 for \"python -O\", " + "-O2 for \"python -OO\", and -O0 to disable [default: -O0]", + ), + ('force', 'f', "forcibly build everything (ignore file timestamps)"), + ] + + boolean_options = ['compile', 'force'] + negative_opt = {'no-compile': 'compile'} + + def initialize_options(self): + self.build_lib = None + self.py_modules = None + self.package = None + self.package_data = None + self.package_dir = None + self.compile = 0 + self.optimize = 0 + self.force = None + + def finalize_options(self): + self.set_undefined_options( + 'build', ('build_lib', 'build_lib'), ('force', 'force') + ) + + # Get the distribution options that are aliases for build_py + # options -- list of packages and list of modules. + self.packages = self.distribution.packages + self.py_modules = self.distribution.py_modules + self.package_data = self.distribution.package_data + self.package_dir = {} + if self.distribution.package_dir: + for name, path in self.distribution.package_dir.items(): + self.package_dir[name] = convert_path(path) + self.data_files = self.get_data_files() + + # Ick, copied straight from install_lib.py (fancy_getopt needs a + # type system! Hell, *everything* needs a type system!!!) + if not isinstance(self.optimize, int): + try: + self.optimize = int(self.optimize) + assert 0 <= self.optimize <= 2 + except (ValueError, AssertionError): + raise DistutilsOptionError("optimize must be 0, 1, or 2") + + def run(self): + # XXX copy_file by default preserves atime and mtime. IMHO this is + # the right thing to do, but perhaps it should be an option -- in + # particular, a site administrator might want installed files to + # reflect the time of installation rather than the last + # modification time before the installed release. + + # XXX copy_file by default preserves mode, which appears to be the + # wrong thing to do: if a file is read-only in the working + # directory, we want it to be installed read/write so that the next + # installation of the same module distribution can overwrite it + # without problems. (This might be a Unix-specific issue.) Thus + # we turn off 'preserve_mode' when copying to the build directory, + # since the build directory is supposed to be exactly what the + # installation will look like (ie. we preserve mode when + # installing). + + # Two options control which modules will be installed: 'packages' + # and 'py_modules'. The former lets us work with whole packages, not + # specifying individual modules at all; the latter is for + # specifying modules one-at-a-time. + + if self.py_modules: + self.build_modules() + if self.packages: + self.build_packages() + self.build_package_data() + + self.byte_compile(self.get_outputs(include_bytecode=0)) + + def get_data_files(self): + """Generate list of '(package,src_dir,build_dir,filenames)' tuples""" + data = [] + if not self.packages: + return data + for package in self.packages: + # Locate package source directory + src_dir = self.get_package_dir(package) + + # Compute package build directory + build_dir = os.path.join(*([self.build_lib] + package.split('.'))) + + # Length of path to strip from found files + plen = 0 + if src_dir: + plen = len(src_dir) + 1 + + # Strip directory from globbed filenames + filenames = [file[plen:] for file in self.find_data_files(package, src_dir)] + data.append((package, src_dir, build_dir, filenames)) + return data + + def find_data_files(self, package, src_dir): + """Return filenames for package's data files in 'src_dir'""" + globs = self.package_data.get('', []) + self.package_data.get(package, []) + files = [] + for pattern in globs: + # Each pattern has to be converted to a platform-specific path + filelist = glob.glob( + os.path.join(glob.escape(src_dir), convert_path(pattern)) + ) + # Files that match more than one pattern are only added once + files.extend( + [fn for fn in filelist if fn not in files and os.path.isfile(fn)] + ) + return files + + def build_package_data(self): + """Copy data files into build directory""" + for package, src_dir, build_dir, filenames in self.data_files: + for filename in filenames: + target = os.path.join(build_dir, filename) + self.mkpath(os.path.dirname(target)) + self.copy_file( + os.path.join(src_dir, filename), target, preserve_mode=False + ) + + def get_package_dir(self, package): + """Return the directory, relative to the top of the source + distribution, where package 'package' should be found + (at least according to the 'package_dir' option, if any).""" + path = package.split('.') + + if not self.package_dir: + if path: + return os.path.join(*path) + else: + return '' + else: + tail = [] + while path: + try: + pdir = self.package_dir['.'.join(path)] + except KeyError: + tail.insert(0, path[-1]) + del path[-1] + else: + tail.insert(0, pdir) + return os.path.join(*tail) + else: + # Oops, got all the way through 'path' without finding a + # match in package_dir. If package_dir defines a directory + # for the root (nameless) package, then fallback on it; + # otherwise, we might as well have not consulted + # package_dir at all, as we just use the directory implied + # by 'tail' (which should be the same as the original value + # of 'path' at this point). + pdir = self.package_dir.get('') + if pdir is not None: + tail.insert(0, pdir) + + if tail: + return os.path.join(*tail) + else: + return '' + + def check_package(self, package, package_dir): + # Empty dir name means current directory, which we can probably + # assume exists. Also, os.path.exists and isdir don't know about + # my "empty string means current dir" convention, so we have to + # circumvent them. + if package_dir != "": + if not os.path.exists(package_dir): + raise DistutilsFileError( + "package directory '%s' does not exist" % package_dir + ) + if not os.path.isdir(package_dir): + raise DistutilsFileError( + "supposed package directory '%s' exists, " + "but is not a directory" % package_dir + ) + + # Directories without __init__.py are namespace packages (PEP 420). + if package: + init_py = os.path.join(package_dir, "__init__.py") + if os.path.isfile(init_py): + return init_py + + # Either not in a package at all (__init__.py not expected), or + # __init__.py doesn't exist -- so don't return the filename. + return None + + def check_module(self, module, module_file): + if not os.path.isfile(module_file): + log.warning("file %s (for module %s) not found", module_file, module) + return False + else: + return True + + def find_package_modules(self, package, package_dir): + self.check_package(package, package_dir) + module_files = glob.glob(os.path.join(glob.escape(package_dir), "*.py")) + modules = [] + setup_script = os.path.abspath(self.distribution.script_name) + + for f in module_files: + abs_f = os.path.abspath(f) + if abs_f != setup_script: + module = os.path.splitext(os.path.basename(f))[0] + modules.append((package, module, f)) + else: + self.debug_print("excluding %s" % setup_script) + return modules + + def find_modules(self): + """Finds individually-specified Python modules, ie. those listed by + module name in 'self.py_modules'. Returns a list of tuples (package, + module_base, filename): 'package' is a tuple of the path through + package-space to the module; 'module_base' is the bare (no + packages, no dots) module name, and 'filename' is the path to the + ".py" file (relative to the distribution root) that implements the + module. + """ + # Map package names to tuples of useful info about the package: + # (package_dir, checked) + # package_dir - the directory where we'll find source files for + # this package + # checked - true if we have checked that the package directory + # is valid (exists, contains __init__.py, ... ?) + packages = {} + + # List of (package, module, filename) tuples to return + modules = [] + + # We treat modules-in-packages almost the same as toplevel modules, + # just the "package" for a toplevel is empty (either an empty + # string or empty list, depending on context). Differences: + # - don't check for __init__.py in directory for empty package + for module in self.py_modules: + path = module.split('.') + package = '.'.join(path[0:-1]) + module_base = path[-1] + + try: + (package_dir, checked) = packages[package] + except KeyError: + package_dir = self.get_package_dir(package) + checked = 0 + + if not checked: + init_py = self.check_package(package, package_dir) + packages[package] = (package_dir, 1) + if init_py: + modules.append((package, "__init__", init_py)) + + # XXX perhaps we should also check for just .pyc files + # (so greedy closed-source bastards can distribute Python + # modules too) + module_file = os.path.join(package_dir, module_base + ".py") + if not self.check_module(module, module_file): + continue + + modules.append((package, module_base, module_file)) + + return modules + + def find_all_modules(self): + """Compute the list of all modules that will be built, whether + they are specified one-module-at-a-time ('self.py_modules') or + by whole packages ('self.packages'). Return a list of tuples + (package, module, module_file), just like 'find_modules()' and + 'find_package_modules()' do.""" + modules = [] + if self.py_modules: + modules.extend(self.find_modules()) + if self.packages: + for package in self.packages: + package_dir = self.get_package_dir(package) + m = self.find_package_modules(package, package_dir) + modules.extend(m) + return modules + + def get_source_files(self): + return [module[-1] for module in self.find_all_modules()] + + def get_module_outfile(self, build_dir, package, module): + outfile_path = [build_dir] + list(package) + [module + ".py"] + return os.path.join(*outfile_path) + + def get_outputs(self, include_bytecode=1): + modules = self.find_all_modules() + outputs = [] + for (package, module, module_file) in modules: + package = package.split('.') + filename = self.get_module_outfile(self.build_lib, package, module) + outputs.append(filename) + if include_bytecode: + if self.compile: + outputs.append( + importlib.util.cache_from_source(filename, optimization='') + ) + if self.optimize > 0: + outputs.append( + importlib.util.cache_from_source( + filename, optimization=self.optimize + ) + ) + + outputs += [ + os.path.join(build_dir, filename) + for package, src_dir, build_dir, filenames in self.data_files + for filename in filenames + ] + + return outputs + + def build_module(self, module, module_file, package): + if isinstance(package, str): + package = package.split('.') + elif not isinstance(package, (list, tuple)): + raise TypeError( + "'package' must be a string (dot-separated), list, or tuple" + ) + + # Now put the module source file into the "build" area -- this is + # easy, we just copy it somewhere under self.build_lib (the build + # directory for Python source). + outfile = self.get_module_outfile(self.build_lib, package, module) + dir = os.path.dirname(outfile) + self.mkpath(dir) + return self.copy_file(module_file, outfile, preserve_mode=0) + + def build_modules(self): + modules = self.find_modules() + for (package, module, module_file) in modules: + # Now "build" the module -- ie. copy the source file to + # self.build_lib (the build directory for Python source). + # (Actually, it gets copied to the directory for this package + # under self.build_lib.) + self.build_module(module, module_file, package) + + def build_packages(self): + for package in self.packages: + # Get list of (package, module, module_file) tuples based on + # scanning the package directory. 'package' is only included + # in the tuple so that 'find_modules()' and + # 'find_package_tuples()' have a consistent interface; it's + # ignored here (apart from a sanity check). Also, 'module' is + # the *unqualified* module name (ie. no dots, no package -- we + # already know its package!), and 'module_file' is the path to + # the .py file, relative to the current directory + # (ie. including 'package_dir'). + package_dir = self.get_package_dir(package) + modules = self.find_package_modules(package, package_dir) + + # Now loop over the modules we found, "building" each one (just + # copy it to self.build_lib). + for (package_, module, module_file) in modules: + assert package == package_ + self.build_module(module, module_file, package) + + def byte_compile(self, files): + if sys.dont_write_bytecode: + self.warn('byte-compiling is disabled, skipping.') + return + + from ..util import byte_compile + + prefix = self.build_lib + if prefix[-1] != os.sep: + prefix = prefix + os.sep + + # XXX this code is essentially the same as the 'byte_compile() + # method of the "install_lib" command, except for the determination + # of the 'prefix' string. Hmmm. + if self.compile: + byte_compile( + files, optimize=0, force=self.force, prefix=prefix, dry_run=self.dry_run + ) + if self.optimize > 0: + byte_compile( + files, + optimize=self.optimize, + force=self.force, + prefix=prefix, + dry_run=self.dry_run, + ) diff --git a/libs/common/setuptools/_distutils/command/build_scripts.py b/libs/common/setuptools/_distutils/command/build_scripts.py new file mode 100644 index 00000000..87174f6b --- /dev/null +++ b/libs/common/setuptools/_distutils/command/build_scripts.py @@ -0,0 +1,173 @@ +"""distutils.command.build_scripts + +Implements the Distutils 'build_scripts' command.""" + +import os +import re +from stat import ST_MODE +from distutils import sysconfig +from ..core import Command +from ..dep_util import newer +from ..util import convert_path +from distutils._log import log +import tokenize + +shebang_pattern = re.compile('^#!.*python[0-9.]*([ \t].*)?$') +""" +Pattern matching a Python interpreter indicated in first line of a script. +""" + +# for Setuptools compatibility +first_line_re = shebang_pattern + + +class build_scripts(Command): + + description = "\"build\" scripts (copy and fixup #! line)" + + user_options = [ + ('build-dir=', 'd', "directory to \"build\" (copy) to"), + ('force', 'f', "forcibly build everything (ignore file timestamps"), + ('executable=', 'e', "specify final destination interpreter path"), + ] + + boolean_options = ['force'] + + def initialize_options(self): + self.build_dir = None + self.scripts = None + self.force = None + self.executable = None + + def finalize_options(self): + self.set_undefined_options( + 'build', + ('build_scripts', 'build_dir'), + ('force', 'force'), + ('executable', 'executable'), + ) + self.scripts = self.distribution.scripts + + def get_source_files(self): + return self.scripts + + def run(self): + if not self.scripts: + return + self.copy_scripts() + + def copy_scripts(self): + """ + Copy each script listed in ``self.scripts``. + + If a script is marked as a Python script (first line matches + 'shebang_pattern', i.e. starts with ``#!`` and contains + "python"), then adjust in the copy the first line to refer to + the current Python interpreter. + """ + self.mkpath(self.build_dir) + outfiles = [] + updated_files = [] + for script in self.scripts: + self._copy_script(script, outfiles, updated_files) + + self._change_modes(outfiles) + + return outfiles, updated_files + + def _copy_script(self, script, outfiles, updated_files): # noqa: C901 + shebang_match = None + script = convert_path(script) + outfile = os.path.join(self.build_dir, os.path.basename(script)) + outfiles.append(outfile) + + if not self.force and not newer(script, outfile): + log.debug("not copying %s (up-to-date)", script) + return + + # Always open the file, but ignore failures in dry-run mode + # in order to attempt to copy directly. + try: + f = tokenize.open(script) + except OSError: + if not self.dry_run: + raise + f = None + else: + first_line = f.readline() + if not first_line: + self.warn("%s is an empty file (skipping)" % script) + return + + shebang_match = shebang_pattern.match(first_line) + + updated_files.append(outfile) + if shebang_match: + log.info("copying and adjusting %s -> %s", script, self.build_dir) + if not self.dry_run: + if not sysconfig.python_build: + executable = self.executable + else: + executable = os.path.join( + sysconfig.get_config_var("BINDIR"), + "python%s%s" + % ( + sysconfig.get_config_var("VERSION"), + sysconfig.get_config_var("EXE"), + ), + ) + post_interp = shebang_match.group(1) or '' + shebang = "#!" + executable + post_interp + "\n" + self._validate_shebang(shebang, f.encoding) + with open(outfile, "w", encoding=f.encoding) as outf: + outf.write(shebang) + outf.writelines(f.readlines()) + if f: + f.close() + else: + if f: + f.close() + self.copy_file(script, outfile) + + def _change_modes(self, outfiles): + if os.name != 'posix': + return + + for file in outfiles: + self._change_mode(file) + + def _change_mode(self, file): + if self.dry_run: + log.info("changing mode of %s", file) + return + + oldmode = os.stat(file)[ST_MODE] & 0o7777 + newmode = (oldmode | 0o555) & 0o7777 + if newmode != oldmode: + log.info("changing mode of %s from %o to %o", file, oldmode, newmode) + os.chmod(file, newmode) + + @staticmethod + def _validate_shebang(shebang, encoding): + # Python parser starts to read a script using UTF-8 until + # it gets a #coding:xxx cookie. The shebang has to be the + # first line of a file, the #coding:xxx cookie cannot be + # written before. So the shebang has to be encodable to + # UTF-8. + try: + shebang.encode('utf-8') + except UnicodeEncodeError: + raise ValueError( + "The shebang ({!r}) is not encodable " "to utf-8".format(shebang) + ) + + # If the script is encoded to a custom encoding (use a + # #coding:xxx cookie), the shebang has to be encodable to + # the script encoding too. + try: + shebang.encode(encoding) + except UnicodeEncodeError: + raise ValueError( + "The shebang ({!r}) is not encodable " + "to the script encoding ({})".format(shebang, encoding) + ) diff --git a/libs/common/setuptools/_distutils/command/check.py b/libs/common/setuptools/_distutils/command/check.py new file mode 100644 index 00000000..575e49fb --- /dev/null +++ b/libs/common/setuptools/_distutils/command/check.py @@ -0,0 +1,151 @@ +"""distutils.command.check + +Implements the Distutils 'check' command. +""" +import contextlib + +from ..core import Command +from ..errors import DistutilsSetupError + +with contextlib.suppress(ImportError): + import docutils.utils + import docutils.parsers.rst + import docutils.frontend + import docutils.nodes + + class SilentReporter(docutils.utils.Reporter): + def __init__( + self, + source, + report_level, + halt_level, + stream=None, + debug=0, + encoding='ascii', + error_handler='replace', + ): + self.messages = [] + super().__init__( + source, report_level, halt_level, stream, debug, encoding, error_handler + ) + + def system_message(self, level, message, *children, **kwargs): + self.messages.append((level, message, children, kwargs)) + return docutils.nodes.system_message( + message, level=level, type=self.levels[level], *children, **kwargs + ) + + +class check(Command): + """This command checks the meta-data of the package.""" + + description = "perform some checks on the package" + user_options = [ + ('metadata', 'm', 'Verify meta-data'), + ( + 'restructuredtext', + 'r', + ( + 'Checks if long string meta-data syntax ' + 'are reStructuredText-compliant' + ), + ), + ('strict', 's', 'Will exit with an error if a check fails'), + ] + + boolean_options = ['metadata', 'restructuredtext', 'strict'] + + def initialize_options(self): + """Sets default values for options.""" + self.restructuredtext = 0 + self.metadata = 1 + self.strict = 0 + self._warnings = 0 + + def finalize_options(self): + pass + + def warn(self, msg): + """Counts the number of warnings that occurs.""" + self._warnings += 1 + return Command.warn(self, msg) + + def run(self): + """Runs the command.""" + # perform the various tests + if self.metadata: + self.check_metadata() + if self.restructuredtext: + if 'docutils' in globals(): + try: + self.check_restructuredtext() + except TypeError as exc: + raise DistutilsSetupError(str(exc)) + elif self.strict: + raise DistutilsSetupError('The docutils package is needed.') + + # let's raise an error in strict mode, if we have at least + # one warning + if self.strict and self._warnings > 0: + raise DistutilsSetupError('Please correct your package.') + + def check_metadata(self): + """Ensures that all required elements of meta-data are supplied. + + Required fields: + name, version + + Warns if any are missing. + """ + metadata = self.distribution.metadata + + missing = [] + for attr in 'name', 'version': + if not getattr(metadata, attr, None): + missing.append(attr) + + if missing: + self.warn("missing required meta-data: %s" % ', '.join(missing)) + + def check_restructuredtext(self): + """Checks if the long string fields are reST-compliant.""" + data = self.distribution.get_long_description() + for warning in self._check_rst_data(data): + line = warning[-1].get('line') + if line is None: + warning = warning[1] + else: + warning = '{} (line {})'.format(warning[1], line) + self.warn(warning) + + def _check_rst_data(self, data): + """Returns warnings when the provided data doesn't compile.""" + # the include and csv_table directives need this to be a path + source_path = self.distribution.script_name or 'setup.py' + parser = docutils.parsers.rst.Parser() + settings = docutils.frontend.OptionParser( + components=(docutils.parsers.rst.Parser,) + ).get_default_values() + settings.tab_width = 4 + settings.pep_references = None + settings.rfc_references = None + reporter = SilentReporter( + source_path, + settings.report_level, + settings.halt_level, + stream=settings.warning_stream, + debug=settings.debug, + encoding=settings.error_encoding, + error_handler=settings.error_encoding_error_handler, + ) + + document = docutils.nodes.document(settings, reporter, source=source_path) + document.note_source(source_path, -1) + try: + parser.parse(data, document) + except AttributeError as e: + reporter.messages.append( + (-1, 'Could not finish the parsing: %s.' % e, '', {}) + ) + + return reporter.messages diff --git a/libs/common/setuptools/_distutils/command/clean.py b/libs/common/setuptools/_distutils/command/clean.py new file mode 100644 index 00000000..d6eb3eba --- /dev/null +++ b/libs/common/setuptools/_distutils/command/clean.py @@ -0,0 +1,76 @@ +"""distutils.command.clean + +Implements the Distutils 'clean' command.""" + +# contributed by Bastian Kleineidam , added 2000-03-18 + +import os +from ..core import Command +from ..dir_util import remove_tree +from distutils._log import log + + +class clean(Command): + + description = "clean up temporary files from 'build' command" + user_options = [ + ('build-base=', 'b', "base build directory (default: 'build.build-base')"), + ( + 'build-lib=', + None, + "build directory for all modules (default: 'build.build-lib')", + ), + ('build-temp=', 't', "temporary build directory (default: 'build.build-temp')"), + ( + 'build-scripts=', + None, + "build directory for scripts (default: 'build.build-scripts')", + ), + ('bdist-base=', None, "temporary directory for built distributions"), + ('all', 'a', "remove all build output, not just temporary by-products"), + ] + + boolean_options = ['all'] + + def initialize_options(self): + self.build_base = None + self.build_lib = None + self.build_temp = None + self.build_scripts = None + self.bdist_base = None + self.all = None + + def finalize_options(self): + self.set_undefined_options( + 'build', + ('build_base', 'build_base'), + ('build_lib', 'build_lib'), + ('build_scripts', 'build_scripts'), + ('build_temp', 'build_temp'), + ) + self.set_undefined_options('bdist', ('bdist_base', 'bdist_base')) + + def run(self): + # remove the build/temp. directory (unless it's already + # gone) + if os.path.exists(self.build_temp): + remove_tree(self.build_temp, dry_run=self.dry_run) + else: + log.debug("'%s' does not exist -- can't clean it", self.build_temp) + + if self.all: + # remove build directories + for directory in (self.build_lib, self.bdist_base, self.build_scripts): + if os.path.exists(directory): + remove_tree(directory, dry_run=self.dry_run) + else: + log.warning("'%s' does not exist -- can't clean it", directory) + + # just for the heck of it, try to remove the base build directory: + # we might have emptied it right now, but if not we don't care + if not self.dry_run: + try: + os.rmdir(self.build_base) + log.info("removing '%s'", self.build_base) + except OSError: + pass diff --git a/libs/common/setuptools/_distutils/command/config.py b/libs/common/setuptools/_distutils/command/config.py new file mode 100644 index 00000000..8bf0e489 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/config.py @@ -0,0 +1,377 @@ +"""distutils.command.config + +Implements the Distutils 'config' command, a (mostly) empty command class +that exists mainly to be sub-classed by specific module distributions and +applications. The idea is that while every "config" command is different, +at least they're all named the same, and users always see "config" in the +list of standard commands. Also, this is a good place to put common +configure-like tasks: "try to compile this C code", or "figure out where +this header file lives". +""" + +import os +import re + +from ..core import Command +from ..errors import DistutilsExecError +from ..sysconfig import customize_compiler +from distutils._log import log + +LANG_EXT = {"c": ".c", "c++": ".cxx"} + + +class config(Command): + + description = "prepare to build" + + user_options = [ + ('compiler=', None, "specify the compiler type"), + ('cc=', None, "specify the compiler executable"), + ('include-dirs=', 'I', "list of directories to search for header files"), + ('define=', 'D', "C preprocessor macros to define"), + ('undef=', 'U', "C preprocessor macros to undefine"), + ('libraries=', 'l', "external C libraries to link with"), + ('library-dirs=', 'L', "directories to search for external C libraries"), + ('noisy', None, "show every action (compile, link, run, ...) taken"), + ( + 'dump-source', + None, + "dump generated source files before attempting to compile them", + ), + ] + + # The three standard command methods: since the "config" command + # does nothing by default, these are empty. + + def initialize_options(self): + self.compiler = None + self.cc = None + self.include_dirs = None + self.libraries = None + self.library_dirs = None + + # maximal output for now + self.noisy = 1 + self.dump_source = 1 + + # list of temporary files generated along-the-way that we have + # to clean at some point + self.temp_files = [] + + def finalize_options(self): + if self.include_dirs is None: + self.include_dirs = self.distribution.include_dirs or [] + elif isinstance(self.include_dirs, str): + self.include_dirs = self.include_dirs.split(os.pathsep) + + if self.libraries is None: + self.libraries = [] + elif isinstance(self.libraries, str): + self.libraries = [self.libraries] + + if self.library_dirs is None: + self.library_dirs = [] + elif isinstance(self.library_dirs, str): + self.library_dirs = self.library_dirs.split(os.pathsep) + + def run(self): + pass + + # Utility methods for actual "config" commands. The interfaces are + # loosely based on Autoconf macros of similar names. Sub-classes + # may use these freely. + + def _check_compiler(self): + """Check that 'self.compiler' really is a CCompiler object; + if not, make it one. + """ + # We do this late, and only on-demand, because this is an expensive + # import. + from ..ccompiler import CCompiler, new_compiler + + if not isinstance(self.compiler, CCompiler): + self.compiler = new_compiler( + compiler=self.compiler, dry_run=self.dry_run, force=1 + ) + customize_compiler(self.compiler) + if self.include_dirs: + self.compiler.set_include_dirs(self.include_dirs) + if self.libraries: + self.compiler.set_libraries(self.libraries) + if self.library_dirs: + self.compiler.set_library_dirs(self.library_dirs) + + def _gen_temp_sourcefile(self, body, headers, lang): + filename = "_configtest" + LANG_EXT[lang] + with open(filename, "w") as file: + if headers: + for header in headers: + file.write("#include <%s>\n" % header) + file.write("\n") + file.write(body) + if body[-1] != "\n": + file.write("\n") + return filename + + def _preprocess(self, body, headers, include_dirs, lang): + src = self._gen_temp_sourcefile(body, headers, lang) + out = "_configtest.i" + self.temp_files.extend([src, out]) + self.compiler.preprocess(src, out, include_dirs=include_dirs) + return (src, out) + + def _compile(self, body, headers, include_dirs, lang): + src = self._gen_temp_sourcefile(body, headers, lang) + if self.dump_source: + dump_file(src, "compiling '%s':" % src) + (obj,) = self.compiler.object_filenames([src]) + self.temp_files.extend([src, obj]) + self.compiler.compile([src], include_dirs=include_dirs) + return (src, obj) + + def _link(self, body, headers, include_dirs, libraries, library_dirs, lang): + (src, obj) = self._compile(body, headers, include_dirs, lang) + prog = os.path.splitext(os.path.basename(src))[0] + self.compiler.link_executable( + [obj], + prog, + libraries=libraries, + library_dirs=library_dirs, + target_lang=lang, + ) + + if self.compiler.exe_extension is not None: + prog = prog + self.compiler.exe_extension + self.temp_files.append(prog) + + return (src, obj, prog) + + def _clean(self, *filenames): + if not filenames: + filenames = self.temp_files + self.temp_files = [] + log.info("removing: %s", ' '.join(filenames)) + for filename in filenames: + try: + os.remove(filename) + except OSError: + pass + + # XXX these ignore the dry-run flag: what to do, what to do? even if + # you want a dry-run build, you still need some sort of configuration + # info. My inclination is to make it up to the real config command to + # consult 'dry_run', and assume a default (minimal) configuration if + # true. The problem with trying to do it here is that you'd have to + # return either true or false from all the 'try' methods, neither of + # which is correct. + + # XXX need access to the header search path and maybe default macros. + + def try_cpp(self, body=None, headers=None, include_dirs=None, lang="c"): + """Construct a source file from 'body' (a string containing lines + of C/C++ code) and 'headers' (a list of header files to include) + and run it through the preprocessor. Return true if the + preprocessor succeeded, false if there were any errors. + ('body' probably isn't of much use, but what the heck.) + """ + from ..ccompiler import CompileError + + self._check_compiler() + ok = True + try: + self._preprocess(body, headers, include_dirs, lang) + except CompileError: + ok = False + + self._clean() + return ok + + def search_cpp(self, pattern, body=None, headers=None, include_dirs=None, lang="c"): + """Construct a source file (just like 'try_cpp()'), run it through + the preprocessor, and return true if any line of the output matches + 'pattern'. 'pattern' should either be a compiled regex object or a + string containing a regex. If both 'body' and 'headers' are None, + preprocesses an empty file -- which can be useful to determine the + symbols the preprocessor and compiler set by default. + """ + self._check_compiler() + src, out = self._preprocess(body, headers, include_dirs, lang) + + if isinstance(pattern, str): + pattern = re.compile(pattern) + + with open(out) as file: + match = False + while True: + line = file.readline() + if line == '': + break + if pattern.search(line): + match = True + break + + self._clean() + return match + + def try_compile(self, body, headers=None, include_dirs=None, lang="c"): + """Try to compile a source file built from 'body' and 'headers'. + Return true on success, false otherwise. + """ + from ..ccompiler import CompileError + + self._check_compiler() + try: + self._compile(body, headers, include_dirs, lang) + ok = True + except CompileError: + ok = False + + log.info(ok and "success!" or "failure.") + self._clean() + return ok + + def try_link( + self, + body, + headers=None, + include_dirs=None, + libraries=None, + library_dirs=None, + lang="c", + ): + """Try to compile and link a source file, built from 'body' and + 'headers', to executable form. Return true on success, false + otherwise. + """ + from ..ccompiler import CompileError, LinkError + + self._check_compiler() + try: + self._link(body, headers, include_dirs, libraries, library_dirs, lang) + ok = True + except (CompileError, LinkError): + ok = False + + log.info(ok and "success!" or "failure.") + self._clean() + return ok + + def try_run( + self, + body, + headers=None, + include_dirs=None, + libraries=None, + library_dirs=None, + lang="c", + ): + """Try to compile, link to an executable, and run a program + built from 'body' and 'headers'. Return true on success, false + otherwise. + """ + from ..ccompiler import CompileError, LinkError + + self._check_compiler() + try: + src, obj, exe = self._link( + body, headers, include_dirs, libraries, library_dirs, lang + ) + self.spawn([exe]) + ok = True + except (CompileError, LinkError, DistutilsExecError): + ok = False + + log.info(ok and "success!" or "failure.") + self._clean() + return ok + + # -- High-level methods -------------------------------------------- + # (these are the ones that are actually likely to be useful + # when implementing a real-world config command!) + + def check_func( + self, + func, + headers=None, + include_dirs=None, + libraries=None, + library_dirs=None, + decl=0, + call=0, + ): + """Determine if function 'func' is available by constructing a + source file that refers to 'func', and compiles and links it. + If everything succeeds, returns true; otherwise returns false. + + The constructed source file starts out by including the header + files listed in 'headers'. If 'decl' is true, it then declares + 'func' (as "int func()"); you probably shouldn't supply 'headers' + and set 'decl' true in the same call, or you might get errors about + a conflicting declarations for 'func'. Finally, the constructed + 'main()' function either references 'func' or (if 'call' is true) + calls it. 'libraries' and 'library_dirs' are used when + linking. + """ + self._check_compiler() + body = [] + if decl: + body.append("int %s ();" % func) + body.append("int main () {") + if call: + body.append(" %s();" % func) + else: + body.append(" %s;" % func) + body.append("}") + body = "\n".join(body) + "\n" + + return self.try_link(body, headers, include_dirs, libraries, library_dirs) + + def check_lib( + self, + library, + library_dirs=None, + headers=None, + include_dirs=None, + other_libraries=[], + ): + """Determine if 'library' is available to be linked against, + without actually checking that any particular symbols are provided + by it. 'headers' will be used in constructing the source file to + be compiled, but the only effect of this is to check if all the + header files listed are available. Any libraries listed in + 'other_libraries' will be included in the link, in case 'library' + has symbols that depend on other libraries. + """ + self._check_compiler() + return self.try_link( + "int main (void) { }", + headers, + include_dirs, + [library] + other_libraries, + library_dirs, + ) + + def check_header(self, header, include_dirs=None, library_dirs=None, lang="c"): + """Determine if the system header file named by 'header_file' + exists and can be found by the preprocessor; return true if so, + false otherwise. + """ + return self.try_cpp( + body="/* No body */", headers=[header], include_dirs=include_dirs + ) + + +def dump_file(filename, head=None): + """Dumps a file content into log.info. + + If head is not None, will be dumped before the file content. + """ + if head is None: + log.info('%s', filename) + else: + log.info(head) + file = open(filename) + try: + log.info(file.read()) + finally: + file.close() diff --git a/libs/common/setuptools/_distutils/command/install.py b/libs/common/setuptools/_distutils/command/install.py new file mode 100644 index 00000000..08d2f881 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/install.py @@ -0,0 +1,814 @@ +"""distutils.command.install + +Implements the Distutils 'install' command.""" + +import sys +import os +import contextlib +import sysconfig +import itertools + +from distutils._log import log +from ..core import Command +from ..debug import DEBUG +from ..sysconfig import get_config_vars +from ..file_util import write_file +from ..util import convert_path, subst_vars, change_root +from ..util import get_platform +from ..errors import DistutilsOptionError, DistutilsPlatformError +from . import _framework_compat as fw +from .. import _collections + +from site import USER_BASE +from site import USER_SITE + +HAS_USER_SITE = True + +WINDOWS_SCHEME = { + 'purelib': '{base}/Lib/site-packages', + 'platlib': '{base}/Lib/site-packages', + 'headers': '{base}/Include/{dist_name}', + 'scripts': '{base}/Scripts', + 'data': '{base}', +} + +INSTALL_SCHEMES = { + 'posix_prefix': { + 'purelib': '{base}/lib/{implementation_lower}{py_version_short}/site-packages', + 'platlib': '{platbase}/{platlibdir}/{implementation_lower}' + '{py_version_short}/site-packages', + 'headers': '{base}/include/{implementation_lower}' + '{py_version_short}{abiflags}/{dist_name}', + 'scripts': '{base}/bin', + 'data': '{base}', + }, + 'posix_home': { + 'purelib': '{base}/lib/{implementation_lower}', + 'platlib': '{base}/{platlibdir}/{implementation_lower}', + 'headers': '{base}/include/{implementation_lower}/{dist_name}', + 'scripts': '{base}/bin', + 'data': '{base}', + }, + 'nt': WINDOWS_SCHEME, + 'pypy': { + 'purelib': '{base}/site-packages', + 'platlib': '{base}/site-packages', + 'headers': '{base}/include/{dist_name}', + 'scripts': '{base}/bin', + 'data': '{base}', + }, + 'pypy_nt': { + 'purelib': '{base}/site-packages', + 'platlib': '{base}/site-packages', + 'headers': '{base}/include/{dist_name}', + 'scripts': '{base}/Scripts', + 'data': '{base}', + }, +} + +# user site schemes +if HAS_USER_SITE: + INSTALL_SCHEMES['nt_user'] = { + 'purelib': '{usersite}', + 'platlib': '{usersite}', + 'headers': '{userbase}/{implementation}{py_version_nodot_plat}' + '/Include/{dist_name}', + 'scripts': '{userbase}/{implementation}{py_version_nodot_plat}/Scripts', + 'data': '{userbase}', + } + + INSTALL_SCHEMES['posix_user'] = { + 'purelib': '{usersite}', + 'platlib': '{usersite}', + 'headers': '{userbase}/include/{implementation_lower}' + '{py_version_short}{abiflags}/{dist_name}', + 'scripts': '{userbase}/bin', + 'data': '{userbase}', + } + + +INSTALL_SCHEMES.update(fw.schemes) + + +# The keys to an installation scheme; if any new types of files are to be +# installed, be sure to add an entry to every installation scheme above, +# and to SCHEME_KEYS here. +SCHEME_KEYS = ('purelib', 'platlib', 'headers', 'scripts', 'data') + + +def _load_sysconfig_schemes(): + with contextlib.suppress(AttributeError): + return { + scheme: sysconfig.get_paths(scheme, expand=False) + for scheme in sysconfig.get_scheme_names() + } + + +def _load_schemes(): + """ + Extend default schemes with schemes from sysconfig. + """ + + sysconfig_schemes = _load_sysconfig_schemes() or {} + + return { + scheme: { + **INSTALL_SCHEMES.get(scheme, {}), + **sysconfig_schemes.get(scheme, {}), + } + for scheme in set(itertools.chain(INSTALL_SCHEMES, sysconfig_schemes)) + } + + +def _get_implementation(): + if hasattr(sys, 'pypy_version_info'): + return 'PyPy' + else: + return 'Python' + + +def _select_scheme(ob, name): + scheme = _inject_headers(name, _load_scheme(_resolve_scheme(name))) + vars(ob).update(_remove_set(ob, _scheme_attrs(scheme))) + + +def _remove_set(ob, attrs): + """ + Include only attrs that are None in ob. + """ + return {key: value for key, value in attrs.items() if getattr(ob, key) is None} + + +def _resolve_scheme(name): + os_name, sep, key = name.partition('_') + try: + resolved = sysconfig.get_preferred_scheme(key) + except Exception: + resolved = fw.scheme(_pypy_hack(name)) + return resolved + + +def _load_scheme(name): + return _load_schemes()[name] + + +def _inject_headers(name, scheme): + """ + Given a scheme name and the resolved scheme, + if the scheme does not include headers, resolve + the fallback scheme for the name and use headers + from it. pypa/distutils#88 + """ + # Bypass the preferred scheme, which may not + # have defined headers. + fallback = _load_scheme(_pypy_hack(name)) + scheme.setdefault('headers', fallback['headers']) + return scheme + + +def _scheme_attrs(scheme): + """Resolve install directories by applying the install schemes.""" + return {f'install_{key}': scheme[key] for key in SCHEME_KEYS} + + +def _pypy_hack(name): + PY37 = sys.version_info < (3, 8) + old_pypy = hasattr(sys, 'pypy_version_info') and PY37 + prefix = not name.endswith(('_user', '_home')) + pypy_name = 'pypy' + '_nt' * (os.name == 'nt') + return pypy_name if old_pypy and prefix else name + + +class install(Command): + + description = "install everything from build directory" + + user_options = [ + # Select installation scheme and set base director(y|ies) + ('prefix=', None, "installation prefix"), + ('exec-prefix=', None, "(Unix only) prefix for platform-specific files"), + ('home=', None, "(Unix only) home directory to install under"), + # Or, just set the base director(y|ies) + ( + 'install-base=', + None, + "base installation directory (instead of --prefix or --home)", + ), + ( + 'install-platbase=', + None, + "base installation directory for platform-specific files " + + "(instead of --exec-prefix or --home)", + ), + ('root=', None, "install everything relative to this alternate root directory"), + # Or, explicitly set the installation scheme + ( + 'install-purelib=', + None, + "installation directory for pure Python module distributions", + ), + ( + 'install-platlib=', + None, + "installation directory for non-pure module distributions", + ), + ( + 'install-lib=', + None, + "installation directory for all module distributions " + + "(overrides --install-purelib and --install-platlib)", + ), + ('install-headers=', None, "installation directory for C/C++ headers"), + ('install-scripts=', None, "installation directory for Python scripts"), + ('install-data=', None, "installation directory for data files"), + # Byte-compilation options -- see install_lib.py for details, as + # these are duplicated from there (but only install_lib does + # anything with them). + ('compile', 'c', "compile .py to .pyc [default]"), + ('no-compile', None, "don't compile .py files"), + ( + 'optimize=', + 'O', + "also compile with optimization: -O1 for \"python -O\", " + "-O2 for \"python -OO\", and -O0 to disable [default: -O0]", + ), + # Miscellaneous control options + ('force', 'f', "force installation (overwrite any existing files)"), + ('skip-build', None, "skip rebuilding everything (for testing/debugging)"), + # Where to install documentation (eventually!) + # ('doc-format=', None, "format of documentation to generate"), + # ('install-man=', None, "directory for Unix man pages"), + # ('install-html=', None, "directory for HTML documentation"), + # ('install-info=', None, "directory for GNU info files"), + ('record=', None, "filename in which to record list of installed files"), + ] + + boolean_options = ['compile', 'force', 'skip-build'] + + if HAS_USER_SITE: + user_options.append( + ('user', None, "install in user site-package '%s'" % USER_SITE) + ) + boolean_options.append('user') + + negative_opt = {'no-compile': 'compile'} + + def initialize_options(self): + """Initializes options.""" + # High-level options: these select both an installation base + # and scheme. + self.prefix = None + self.exec_prefix = None + self.home = None + self.user = 0 + + # These select only the installation base; it's up to the user to + # specify the installation scheme (currently, that means supplying + # the --install-{platlib,purelib,scripts,data} options). + self.install_base = None + self.install_platbase = None + self.root = None + + # These options are the actual installation directories; if not + # supplied by the user, they are filled in using the installation + # scheme implied by prefix/exec-prefix/home and the contents of + # that installation scheme. + self.install_purelib = None # for pure module distributions + self.install_platlib = None # non-pure (dists w/ extensions) + self.install_headers = None # for C/C++ headers + self.install_lib = None # set to either purelib or platlib + self.install_scripts = None + self.install_data = None + self.install_userbase = USER_BASE + self.install_usersite = USER_SITE + + self.compile = None + self.optimize = None + + # Deprecated + # These two are for putting non-packagized distributions into their + # own directory and creating a .pth file if it makes sense. + # 'extra_path' comes from the setup file; 'install_path_file' can + # be turned off if it makes no sense to install a .pth file. (But + # better to install it uselessly than to guess wrong and not + # install it when it's necessary and would be used!) Currently, + # 'install_path_file' is always true unless some outsider meddles + # with it. + self.extra_path = None + self.install_path_file = 1 + + # 'force' forces installation, even if target files are not + # out-of-date. 'skip_build' skips running the "build" command, + # handy if you know it's not necessary. 'warn_dir' (which is *not* + # a user option, it's just there so the bdist_* commands can turn + # it off) determines whether we warn about installing to a + # directory not in sys.path. + self.force = 0 + self.skip_build = 0 + self.warn_dir = 1 + + # These are only here as a conduit from the 'build' command to the + # 'install_*' commands that do the real work. ('build_base' isn't + # actually used anywhere, but it might be useful in future.) They + # are not user options, because if the user told the install + # command where the build directory is, that wouldn't affect the + # build command. + self.build_base = None + self.build_lib = None + + # Not defined yet because we don't know anything about + # documentation yet. + # self.install_man = None + # self.install_html = None + # self.install_info = None + + self.record = None + + # -- Option finalizing methods ------------------------------------- + # (This is rather more involved than for most commands, + # because this is where the policy for installing third- + # party Python modules on various platforms given a wide + # array of user input is decided. Yes, it's quite complex!) + + def finalize_options(self): # noqa: C901 + """Finalizes options.""" + # This method (and its helpers, like 'finalize_unix()', + # 'finalize_other()', and 'select_scheme()') is where the default + # installation directories for modules, extension modules, and + # anything else we care to install from a Python module + # distribution. Thus, this code makes a pretty important policy + # statement about how third-party stuff is added to a Python + # installation! Note that the actual work of installation is done + # by the relatively simple 'install_*' commands; they just take + # their orders from the installation directory options determined + # here. + + # Check for errors/inconsistencies in the options; first, stuff + # that's wrong on any platform. + + if (self.prefix or self.exec_prefix or self.home) and ( + self.install_base or self.install_platbase + ): + raise DistutilsOptionError( + "must supply either prefix/exec-prefix/home or " + + "install-base/install-platbase -- not both" + ) + + if self.home and (self.prefix or self.exec_prefix): + raise DistutilsOptionError( + "must supply either home or prefix/exec-prefix -- not both" + ) + + if self.user and ( + self.prefix + or self.exec_prefix + or self.home + or self.install_base + or self.install_platbase + ): + raise DistutilsOptionError( + "can't combine user with prefix, " + "exec_prefix/home, or install_(plat)base" + ) + + # Next, stuff that's wrong (or dubious) only on certain platforms. + if os.name != "posix": + if self.exec_prefix: + self.warn("exec-prefix option ignored on this platform") + self.exec_prefix = None + + # Now the interesting logic -- so interesting that we farm it out + # to other methods. The goal of these methods is to set the final + # values for the install_{lib,scripts,data,...} options, using as + # input a heady brew of prefix, exec_prefix, home, install_base, + # install_platbase, user-supplied versions of + # install_{purelib,platlib,lib,scripts,data,...}, and the + # install schemes. Phew! + + self.dump_dirs("pre-finalize_{unix,other}") + + if os.name == 'posix': + self.finalize_unix() + else: + self.finalize_other() + + self.dump_dirs("post-finalize_{unix,other}()") + + # Expand configuration variables, tilde, etc. in self.install_base + # and self.install_platbase -- that way, we can use $base or + # $platbase in the other installation directories and not worry + # about needing recursive variable expansion (shudder). + + py_version = sys.version.split()[0] + (prefix, exec_prefix) = get_config_vars('prefix', 'exec_prefix') + try: + abiflags = sys.abiflags + except AttributeError: + # sys.abiflags may not be defined on all platforms. + abiflags = '' + local_vars = { + 'dist_name': self.distribution.get_name(), + 'dist_version': self.distribution.get_version(), + 'dist_fullname': self.distribution.get_fullname(), + 'py_version': py_version, + 'py_version_short': '%d.%d' % sys.version_info[:2], + 'py_version_nodot': '%d%d' % sys.version_info[:2], + 'sys_prefix': prefix, + 'prefix': prefix, + 'sys_exec_prefix': exec_prefix, + 'exec_prefix': exec_prefix, + 'abiflags': abiflags, + 'platlibdir': getattr(sys, 'platlibdir', 'lib'), + 'implementation_lower': _get_implementation().lower(), + 'implementation': _get_implementation(), + } + + # vars for compatibility on older Pythons + compat_vars = dict( + # Python 3.9 and earlier + py_version_nodot_plat=getattr(sys, 'winver', '').replace('.', ''), + ) + + if HAS_USER_SITE: + local_vars['userbase'] = self.install_userbase + local_vars['usersite'] = self.install_usersite + + self.config_vars = _collections.DictStack( + [fw.vars(), compat_vars, sysconfig.get_config_vars(), local_vars] + ) + + self.expand_basedirs() + + self.dump_dirs("post-expand_basedirs()") + + # Now define config vars for the base directories so we can expand + # everything else. + local_vars['base'] = self.install_base + local_vars['platbase'] = self.install_platbase + + if DEBUG: + from pprint import pprint + + print("config vars:") + pprint(dict(self.config_vars)) + + # Expand "~" and configuration variables in the installation + # directories. + self.expand_dirs() + + self.dump_dirs("post-expand_dirs()") + + # Create directories in the home dir: + if self.user: + self.create_home_path() + + # Pick the actual directory to install all modules to: either + # install_purelib or install_platlib, depending on whether this + # module distribution is pure or not. Of course, if the user + # already specified install_lib, use their selection. + if self.install_lib is None: + if self.distribution.has_ext_modules(): # has extensions: non-pure + self.install_lib = self.install_platlib + else: + self.install_lib = self.install_purelib + + # Convert directories from Unix /-separated syntax to the local + # convention. + self.convert_paths( + 'lib', + 'purelib', + 'platlib', + 'scripts', + 'data', + 'headers', + 'userbase', + 'usersite', + ) + + # Deprecated + # Well, we're not actually fully completely finalized yet: we still + # have to deal with 'extra_path', which is the hack for allowing + # non-packagized module distributions (hello, Numerical Python!) to + # get their own directories. + self.handle_extra_path() + self.install_libbase = self.install_lib # needed for .pth file + self.install_lib = os.path.join(self.install_lib, self.extra_dirs) + + # If a new root directory was supplied, make all the installation + # dirs relative to it. + if self.root is not None: + self.change_roots( + 'libbase', 'lib', 'purelib', 'platlib', 'scripts', 'data', 'headers' + ) + + self.dump_dirs("after prepending root") + + # Find out the build directories, ie. where to install from. + self.set_undefined_options( + 'build', ('build_base', 'build_base'), ('build_lib', 'build_lib') + ) + + # Punt on doc directories for now -- after all, we're punting on + # documentation completely! + + def dump_dirs(self, msg): + """Dumps the list of user options.""" + if not DEBUG: + return + from ..fancy_getopt import longopt_xlate + + log.debug(msg + ":") + for opt in self.user_options: + opt_name = opt[0] + if opt_name[-1] == "=": + opt_name = opt_name[0:-1] + if opt_name in self.negative_opt: + opt_name = self.negative_opt[opt_name] + opt_name = opt_name.translate(longopt_xlate) + val = not getattr(self, opt_name) + else: + opt_name = opt_name.translate(longopt_xlate) + val = getattr(self, opt_name) + log.debug(" %s: %s", opt_name, val) + + def finalize_unix(self): + """Finalizes options for posix platforms.""" + if self.install_base is not None or self.install_platbase is not None: + incomplete_scheme = ( + ( + self.install_lib is None + and self.install_purelib is None + and self.install_platlib is None + ) + or self.install_headers is None + or self.install_scripts is None + or self.install_data is None + ) + if incomplete_scheme: + raise DistutilsOptionError( + "install-base or install-platbase supplied, but " + "installation scheme is incomplete" + ) + return + + if self.user: + if self.install_userbase is None: + raise DistutilsPlatformError("User base directory is not specified") + self.install_base = self.install_platbase = self.install_userbase + self.select_scheme("posix_user") + elif self.home is not None: + self.install_base = self.install_platbase = self.home + self.select_scheme("posix_home") + else: + if self.prefix is None: + if self.exec_prefix is not None: + raise DistutilsOptionError( + "must not supply exec-prefix without prefix" + ) + + # Allow Fedora to add components to the prefix + _prefix_addition = getattr(sysconfig, '_prefix_addition', "") + + self.prefix = os.path.normpath(sys.prefix) + _prefix_addition + self.exec_prefix = os.path.normpath(sys.exec_prefix) + _prefix_addition + + else: + if self.exec_prefix is None: + self.exec_prefix = self.prefix + + self.install_base = self.prefix + self.install_platbase = self.exec_prefix + self.select_scheme("posix_prefix") + + def finalize_other(self): + """Finalizes options for non-posix platforms""" + if self.user: + if self.install_userbase is None: + raise DistutilsPlatformError("User base directory is not specified") + self.install_base = self.install_platbase = self.install_userbase + self.select_scheme(os.name + "_user") + elif self.home is not None: + self.install_base = self.install_platbase = self.home + self.select_scheme("posix_home") + else: + if self.prefix is None: + self.prefix = os.path.normpath(sys.prefix) + + self.install_base = self.install_platbase = self.prefix + try: + self.select_scheme(os.name) + except KeyError: + raise DistutilsPlatformError( + "I don't know how to install stuff on '%s'" % os.name + ) + + def select_scheme(self, name): + _select_scheme(self, name) + + def _expand_attrs(self, attrs): + for attr in attrs: + val = getattr(self, attr) + if val is not None: + if os.name == 'posix' or os.name == 'nt': + val = os.path.expanduser(val) + val = subst_vars(val, self.config_vars) + setattr(self, attr, val) + + def expand_basedirs(self): + """Calls `os.path.expanduser` on install_base, install_platbase and + root.""" + self._expand_attrs(['install_base', 'install_platbase', 'root']) + + def expand_dirs(self): + """Calls `os.path.expanduser` on install dirs.""" + self._expand_attrs( + [ + 'install_purelib', + 'install_platlib', + 'install_lib', + 'install_headers', + 'install_scripts', + 'install_data', + ] + ) + + def convert_paths(self, *names): + """Call `convert_path` over `names`.""" + for name in names: + attr = "install_" + name + setattr(self, attr, convert_path(getattr(self, attr))) + + def handle_extra_path(self): + """Set `path_file` and `extra_dirs` using `extra_path`.""" + if self.extra_path is None: + self.extra_path = self.distribution.extra_path + + if self.extra_path is not None: + log.warning( + "Distribution option extra_path is deprecated. " + "See issue27919 for details." + ) + if isinstance(self.extra_path, str): + self.extra_path = self.extra_path.split(',') + + if len(self.extra_path) == 1: + path_file = extra_dirs = self.extra_path[0] + elif len(self.extra_path) == 2: + path_file, extra_dirs = self.extra_path + else: + raise DistutilsOptionError( + "'extra_path' option must be a list, tuple, or " + "comma-separated string with 1 or 2 elements" + ) + + # convert to local form in case Unix notation used (as it + # should be in setup scripts) + extra_dirs = convert_path(extra_dirs) + else: + path_file = None + extra_dirs = '' + + # XXX should we warn if path_file and not extra_dirs? (in which + # case the path file would be harmless but pointless) + self.path_file = path_file + self.extra_dirs = extra_dirs + + def change_roots(self, *names): + """Change the install directories pointed by name using root.""" + for name in names: + attr = "install_" + name + setattr(self, attr, change_root(self.root, getattr(self, attr))) + + def create_home_path(self): + """Create directories under ~.""" + if not self.user: + return + home = convert_path(os.path.expanduser("~")) + for name, path in self.config_vars.items(): + if str(path).startswith(home) and not os.path.isdir(path): + self.debug_print("os.makedirs('%s', 0o700)" % path) + os.makedirs(path, 0o700) + + # -- Command execution methods ------------------------------------- + + def run(self): + """Runs the command.""" + # Obviously have to build before we can install + if not self.skip_build: + self.run_command('build') + # If we built for any other platform, we can't install. + build_plat = self.distribution.get_command_obj('build').plat_name + # check warn_dir - it is a clue that the 'install' is happening + # internally, and not to sys.path, so we don't check the platform + # matches what we are running. + if self.warn_dir and build_plat != get_platform(): + raise DistutilsPlatformError("Can't install when " "cross-compiling") + + # Run all sub-commands (at least those that need to be run) + for cmd_name in self.get_sub_commands(): + self.run_command(cmd_name) + + if self.path_file: + self.create_path_file() + + # write list of installed files, if requested. + if self.record: + outputs = self.get_outputs() + if self.root: # strip any package prefix + root_len = len(self.root) + for counter in range(len(outputs)): + outputs[counter] = outputs[counter][root_len:] + self.execute( + write_file, + (self.record, outputs), + "writing list of installed files to '%s'" % self.record, + ) + + sys_path = map(os.path.normpath, sys.path) + sys_path = map(os.path.normcase, sys_path) + install_lib = os.path.normcase(os.path.normpath(self.install_lib)) + if ( + self.warn_dir + and not (self.path_file and self.install_path_file) + and install_lib not in sys_path + ): + log.debug( + ( + "modules installed to '%s', which is not in " + "Python's module search path (sys.path) -- " + "you'll have to change the search path yourself" + ), + self.install_lib, + ) + + def create_path_file(self): + """Creates the .pth file""" + filename = os.path.join(self.install_libbase, self.path_file + ".pth") + if self.install_path_file: + self.execute( + write_file, (filename, [self.extra_dirs]), "creating %s" % filename + ) + else: + self.warn("path file '%s' not created" % filename) + + # -- Reporting methods --------------------------------------------- + + def get_outputs(self): + """Assembles the outputs of all the sub-commands.""" + outputs = [] + for cmd_name in self.get_sub_commands(): + cmd = self.get_finalized_command(cmd_name) + # Add the contents of cmd.get_outputs(), ensuring + # that outputs doesn't contain duplicate entries + for filename in cmd.get_outputs(): + if filename not in outputs: + outputs.append(filename) + + if self.path_file and self.install_path_file: + outputs.append(os.path.join(self.install_libbase, self.path_file + ".pth")) + + return outputs + + def get_inputs(self): + """Returns the inputs of all the sub-commands""" + # XXX gee, this looks familiar ;-( + inputs = [] + for cmd_name in self.get_sub_commands(): + cmd = self.get_finalized_command(cmd_name) + inputs.extend(cmd.get_inputs()) + + return inputs + + # -- Predicates for sub-command list ------------------------------- + + def has_lib(self): + """Returns true if the current distribution has any Python + modules to install.""" + return ( + self.distribution.has_pure_modules() or self.distribution.has_ext_modules() + ) + + def has_headers(self): + """Returns true if the current distribution has any headers to + install.""" + return self.distribution.has_headers() + + def has_scripts(self): + """Returns true if the current distribution has any scripts to. + install.""" + return self.distribution.has_scripts() + + def has_data(self): + """Returns true if the current distribution has any data to. + install.""" + return self.distribution.has_data_files() + + # 'sub_commands': a list of commands this command might have to run to + # get its work done. See cmd.py for more info. + sub_commands = [ + ('install_lib', has_lib), + ('install_headers', has_headers), + ('install_scripts', has_scripts), + ('install_data', has_data), + ('install_egg_info', lambda self: True), + ] diff --git a/libs/common/setuptools/_distutils/command/install_data.py b/libs/common/setuptools/_distutils/command/install_data.py new file mode 100644 index 00000000..d92ed87a --- /dev/null +++ b/libs/common/setuptools/_distutils/command/install_data.py @@ -0,0 +1,84 @@ +"""distutils.command.install_data + +Implements the Distutils 'install_data' command, for installing +platform-independent data files.""" + +# contributed by Bastian Kleineidam + +import os +from ..core import Command +from ..util import change_root, convert_path + + +class install_data(Command): + + description = "install data files" + + user_options = [ + ( + 'install-dir=', + 'd', + "base directory for installing data files " + "(default: installation base dir)", + ), + ('root=', None, "install everything relative to this alternate root directory"), + ('force', 'f', "force installation (overwrite existing files)"), + ] + + boolean_options = ['force'] + + def initialize_options(self): + self.install_dir = None + self.outfiles = [] + self.root = None + self.force = 0 + self.data_files = self.distribution.data_files + self.warn_dir = 1 + + def finalize_options(self): + self.set_undefined_options( + 'install', + ('install_data', 'install_dir'), + ('root', 'root'), + ('force', 'force'), + ) + + def run(self): + self.mkpath(self.install_dir) + for f in self.data_files: + if isinstance(f, str): + # it's a simple file, so copy it + f = convert_path(f) + if self.warn_dir: + self.warn( + "setup script did not provide a directory for " + "'%s' -- installing right in '%s'" % (f, self.install_dir) + ) + (out, _) = self.copy_file(f, self.install_dir) + self.outfiles.append(out) + else: + # it's a tuple with path to install to and a list of files + dir = convert_path(f[0]) + if not os.path.isabs(dir): + dir = os.path.join(self.install_dir, dir) + elif self.root: + dir = change_root(self.root, dir) + self.mkpath(dir) + + if f[1] == []: + # If there are no files listed, the user must be + # trying to create an empty directory, so add the + # directory to the list of output files. + self.outfiles.append(dir) + else: + # Copy files, adding them to the list of output files. + for data in f[1]: + data = convert_path(data) + (out, _) = self.copy_file(data, dir) + self.outfiles.append(out) + + def get_inputs(self): + return self.data_files or [] + + def get_outputs(self): + return self.outfiles diff --git a/libs/common/setuptools/_distutils/command/install_egg_info.py b/libs/common/setuptools/_distutils/command/install_egg_info.py new file mode 100644 index 00000000..f3e8f344 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/install_egg_info.py @@ -0,0 +1,92 @@ +""" +distutils.command.install_egg_info + +Implements the Distutils 'install_egg_info' command, for installing +a package's PKG-INFO metadata. +""" + +import os +import sys +import re + +from ..cmd import Command +from .. import dir_util +from .._log import log + + +class install_egg_info(Command): + """Install an .egg-info file for the package""" + + description = "Install package's PKG-INFO metadata as an .egg-info file" + user_options = [ + ('install-dir=', 'd', "directory to install to"), + ] + + def initialize_options(self): + self.install_dir = None + + @property + def basename(self): + """ + Allow basename to be overridden by child class. + Ref pypa/distutils#2. + """ + return "%s-%s-py%d.%d.egg-info" % ( + to_filename(safe_name(self.distribution.get_name())), + to_filename(safe_version(self.distribution.get_version())), + *sys.version_info[:2], + ) + + def finalize_options(self): + self.set_undefined_options('install_lib', ('install_dir', 'install_dir')) + self.target = os.path.join(self.install_dir, self.basename) + self.outputs = [self.target] + + def run(self): + target = self.target + if os.path.isdir(target) and not os.path.islink(target): + dir_util.remove_tree(target, dry_run=self.dry_run) + elif os.path.exists(target): + self.execute(os.unlink, (self.target,), "Removing " + target) + elif not os.path.isdir(self.install_dir): + self.execute( + os.makedirs, (self.install_dir,), "Creating " + self.install_dir + ) + log.info("Writing %s", target) + if not self.dry_run: + with open(target, 'w', encoding='UTF-8') as f: + self.distribution.metadata.write_pkg_file(f) + + def get_outputs(self): + return self.outputs + + +# The following routines are taken from setuptools' pkg_resources module and +# can be replaced by importing them from pkg_resources once it is included +# in the stdlib. + + +def safe_name(name): + """Convert an arbitrary string to a standard distribution name + + Any runs of non-alphanumeric/. characters are replaced with a single '-'. + """ + return re.sub('[^A-Za-z0-9.]+', '-', name) + + +def safe_version(version): + """Convert an arbitrary string to a standard version string + + Spaces become dots, and all other non-alphanumeric characters become + dashes, with runs of multiple dashes condensed to a single dash. + """ + version = version.replace(' ', '.') + return re.sub('[^A-Za-z0-9.]+', '-', version) + + +def to_filename(name): + """Convert a project or version name to its filename-escaped form + + Any '-' characters are currently replaced with '_'. + """ + return name.replace('-', '_') diff --git a/libs/common/setuptools/_distutils/command/install_headers.py b/libs/common/setuptools/_distutils/command/install_headers.py new file mode 100644 index 00000000..1cdee823 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/install_headers.py @@ -0,0 +1,45 @@ +"""distutils.command.install_headers + +Implements the Distutils 'install_headers' command, to install C/C++ header +files to the Python include directory.""" + +from ..core import Command + + +# XXX force is never used +class install_headers(Command): + + description = "install C/C++ header files" + + user_options = [ + ('install-dir=', 'd', "directory to install header files to"), + ('force', 'f', "force installation (overwrite existing files)"), + ] + + boolean_options = ['force'] + + def initialize_options(self): + self.install_dir = None + self.force = 0 + self.outfiles = [] + + def finalize_options(self): + self.set_undefined_options( + 'install', ('install_headers', 'install_dir'), ('force', 'force') + ) + + def run(self): + headers = self.distribution.headers + if not headers: + return + + self.mkpath(self.install_dir) + for header in headers: + (out, _) = self.copy_file(header, self.install_dir) + self.outfiles.append(out) + + def get_inputs(self): + return self.distribution.headers or [] + + def get_outputs(self): + return self.outfiles diff --git a/libs/common/setuptools/_distutils/command/install_lib.py b/libs/common/setuptools/_distutils/command/install_lib.py new file mode 100644 index 00000000..840d3403 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/install_lib.py @@ -0,0 +1,238 @@ +"""distutils.command.install_lib + +Implements the Distutils 'install_lib' command +(install all Python modules).""" + +import os +import importlib.util +import sys + +from ..core import Command +from ..errors import DistutilsOptionError + + +# Extension for Python source files. +PYTHON_SOURCE_EXTENSION = ".py" + + +class install_lib(Command): + + description = "install all Python modules (extensions and pure Python)" + + # The byte-compilation options are a tad confusing. Here are the + # possible scenarios: + # 1) no compilation at all (--no-compile --no-optimize) + # 2) compile .pyc only (--compile --no-optimize; default) + # 3) compile .pyc and "opt-1" .pyc (--compile --optimize) + # 4) compile "opt-1" .pyc only (--no-compile --optimize) + # 5) compile .pyc and "opt-2" .pyc (--compile --optimize-more) + # 6) compile "opt-2" .pyc only (--no-compile --optimize-more) + # + # The UI for this is two options, 'compile' and 'optimize'. + # 'compile' is strictly boolean, and only decides whether to + # generate .pyc files. 'optimize' is three-way (0, 1, or 2), and + # decides both whether to generate .pyc files and what level of + # optimization to use. + + user_options = [ + ('install-dir=', 'd', "directory to install to"), + ('build-dir=', 'b', "build directory (where to install from)"), + ('force', 'f', "force installation (overwrite existing files)"), + ('compile', 'c', "compile .py to .pyc [default]"), + ('no-compile', None, "don't compile .py files"), + ( + 'optimize=', + 'O', + "also compile with optimization: -O1 for \"python -O\", " + "-O2 for \"python -OO\", and -O0 to disable [default: -O0]", + ), + ('skip-build', None, "skip the build steps"), + ] + + boolean_options = ['force', 'compile', 'skip-build'] + negative_opt = {'no-compile': 'compile'} + + def initialize_options(self): + # let the 'install' command dictate our installation directory + self.install_dir = None + self.build_dir = None + self.force = 0 + self.compile = None + self.optimize = None + self.skip_build = None + + def finalize_options(self): + # Get all the information we need to install pure Python modules + # from the umbrella 'install' command -- build (source) directory, + # install (target) directory, and whether to compile .py files. + self.set_undefined_options( + 'install', + ('build_lib', 'build_dir'), + ('install_lib', 'install_dir'), + ('force', 'force'), + ('compile', 'compile'), + ('optimize', 'optimize'), + ('skip_build', 'skip_build'), + ) + + if self.compile is None: + self.compile = True + if self.optimize is None: + self.optimize = False + + if not isinstance(self.optimize, int): + try: + self.optimize = int(self.optimize) + if self.optimize not in (0, 1, 2): + raise AssertionError + except (ValueError, AssertionError): + raise DistutilsOptionError("optimize must be 0, 1, or 2") + + def run(self): + # Make sure we have built everything we need first + self.build() + + # Install everything: simply dump the entire contents of the build + # directory to the installation directory (that's the beauty of + # having a build directory!) + outfiles = self.install() + + # (Optionally) compile .py to .pyc + if outfiles is not None and self.distribution.has_pure_modules(): + self.byte_compile(outfiles) + + # -- Top-level worker functions ------------------------------------ + # (called from 'run()') + + def build(self): + if not self.skip_build: + if self.distribution.has_pure_modules(): + self.run_command('build_py') + if self.distribution.has_ext_modules(): + self.run_command('build_ext') + + def install(self): + if os.path.isdir(self.build_dir): + outfiles = self.copy_tree(self.build_dir, self.install_dir) + else: + self.warn( + "'%s' does not exist -- no Python modules to install" % self.build_dir + ) + return + return outfiles + + def byte_compile(self, files): + if sys.dont_write_bytecode: + self.warn('byte-compiling is disabled, skipping.') + return + + from ..util import byte_compile + + # Get the "--root" directory supplied to the "install" command, + # and use it as a prefix to strip off the purported filename + # encoded in bytecode files. This is far from complete, but it + # should at least generate usable bytecode in RPM distributions. + install_root = self.get_finalized_command('install').root + + if self.compile: + byte_compile( + files, + optimize=0, + force=self.force, + prefix=install_root, + dry_run=self.dry_run, + ) + if self.optimize > 0: + byte_compile( + files, + optimize=self.optimize, + force=self.force, + prefix=install_root, + verbose=self.verbose, + dry_run=self.dry_run, + ) + + # -- Utility methods ----------------------------------------------- + + def _mutate_outputs(self, has_any, build_cmd, cmd_option, output_dir): + if not has_any: + return [] + + build_cmd = self.get_finalized_command(build_cmd) + build_files = build_cmd.get_outputs() + build_dir = getattr(build_cmd, cmd_option) + + prefix_len = len(build_dir) + len(os.sep) + outputs = [] + for file in build_files: + outputs.append(os.path.join(output_dir, file[prefix_len:])) + + return outputs + + def _bytecode_filenames(self, py_filenames): + bytecode_files = [] + for py_file in py_filenames: + # Since build_py handles package data installation, the + # list of outputs can contain more than just .py files. + # Make sure we only report bytecode for the .py files. + ext = os.path.splitext(os.path.normcase(py_file))[1] + if ext != PYTHON_SOURCE_EXTENSION: + continue + if self.compile: + bytecode_files.append( + importlib.util.cache_from_source(py_file, optimization='') + ) + if self.optimize > 0: + bytecode_files.append( + importlib.util.cache_from_source( + py_file, optimization=self.optimize + ) + ) + + return bytecode_files + + # -- External interface -------------------------------------------- + # (called by outsiders) + + def get_outputs(self): + """Return the list of files that would be installed if this command + were actually run. Not affected by the "dry-run" flag or whether + modules have actually been built yet. + """ + pure_outputs = self._mutate_outputs( + self.distribution.has_pure_modules(), + 'build_py', + 'build_lib', + self.install_dir, + ) + if self.compile: + bytecode_outputs = self._bytecode_filenames(pure_outputs) + else: + bytecode_outputs = [] + + ext_outputs = self._mutate_outputs( + self.distribution.has_ext_modules(), + 'build_ext', + 'build_lib', + self.install_dir, + ) + + return pure_outputs + bytecode_outputs + ext_outputs + + def get_inputs(self): + """Get the list of files that are input to this command, ie. the + files that get installed as they are named in the build tree. + The files in this list correspond one-to-one to the output + filenames returned by 'get_outputs()'. + """ + inputs = [] + + if self.distribution.has_pure_modules(): + build_py = self.get_finalized_command('build_py') + inputs.extend(build_py.get_outputs()) + + if self.distribution.has_ext_modules(): + build_ext = self.get_finalized_command('build_ext') + inputs.extend(build_ext.get_outputs()) + + return inputs diff --git a/libs/common/setuptools/_distutils/command/install_scripts.py b/libs/common/setuptools/_distutils/command/install_scripts.py new file mode 100644 index 00000000..ec6ec5ac --- /dev/null +++ b/libs/common/setuptools/_distutils/command/install_scripts.py @@ -0,0 +1,61 @@ +"""distutils.command.install_scripts + +Implements the Distutils 'install_scripts' command, for installing +Python scripts.""" + +# contributed by Bastian Kleineidam + +import os +from ..core import Command +from distutils._log import log +from stat import ST_MODE + + +class install_scripts(Command): + + description = "install scripts (Python or otherwise)" + + user_options = [ + ('install-dir=', 'd', "directory to install scripts to"), + ('build-dir=', 'b', "build directory (where to install from)"), + ('force', 'f', "force installation (overwrite existing files)"), + ('skip-build', None, "skip the build steps"), + ] + + boolean_options = ['force', 'skip-build'] + + def initialize_options(self): + self.install_dir = None + self.force = 0 + self.build_dir = None + self.skip_build = None + + def finalize_options(self): + self.set_undefined_options('build', ('build_scripts', 'build_dir')) + self.set_undefined_options( + 'install', + ('install_scripts', 'install_dir'), + ('force', 'force'), + ('skip_build', 'skip_build'), + ) + + def run(self): + if not self.skip_build: + self.run_command('build_scripts') + self.outfiles = self.copy_tree(self.build_dir, self.install_dir) + if os.name == 'posix': + # Set the executable bits (owner, group, and world) on + # all the scripts we just installed. + for file in self.get_outputs(): + if self.dry_run: + log.info("changing mode of %s", file) + else: + mode = ((os.stat(file)[ST_MODE]) | 0o555) & 0o7777 + log.info("changing mode of %s to %o", file, mode) + os.chmod(file, mode) + + def get_inputs(self): + return self.distribution.scripts or [] + + def get_outputs(self): + return self.outfiles or [] diff --git a/libs/common/setuptools/_distutils/command/py37compat.py b/libs/common/setuptools/_distutils/command/py37compat.py new file mode 100644 index 00000000..aa0c0a7f --- /dev/null +++ b/libs/common/setuptools/_distutils/command/py37compat.py @@ -0,0 +1,31 @@ +import sys + + +def _pythonlib_compat(): + """ + On Python 3.7 and earlier, distutils would include the Python + library. See pypa/distutils#9. + """ + from distutils import sysconfig + + if not sysconfig.get_config_var('Py_ENABLED_SHARED'): + return + + yield 'python{}.{}{}'.format( + sys.hexversion >> 24, + (sys.hexversion >> 16) & 0xFF, + sysconfig.get_config_var('ABIFLAGS'), + ) + + +def compose(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + +pythonlib = ( + compose(list, _pythonlib_compat) + if sys.version_info < (3, 8) + and sys.platform != 'darwin' + and sys.platform[:3] != 'aix' + else list +) diff --git a/libs/common/setuptools/_distutils/command/register.py b/libs/common/setuptools/_distutils/command/register.py new file mode 100644 index 00000000..55c1045e --- /dev/null +++ b/libs/common/setuptools/_distutils/command/register.py @@ -0,0 +1,321 @@ +"""distutils.command.register + +Implements the Distutils 'register' command (register with the repository). +""" + +# created 2002/10/21, Richard Jones + +import getpass +import io +import logging +import urllib.parse +import urllib.request +from warnings import warn + +from ..core import PyPIRCCommand +from distutils._log import log + + +class register(PyPIRCCommand): + + description = "register the distribution with the Python package index" + user_options = PyPIRCCommand.user_options + [ + ('list-classifiers', None, 'list the valid Trove classifiers'), + ( + 'strict', + None, + 'Will stop the registering if the meta-data are not fully compliant', + ), + ] + boolean_options = PyPIRCCommand.boolean_options + [ + 'verify', + 'list-classifiers', + 'strict', + ] + + sub_commands = [('check', lambda self: True)] + + def initialize_options(self): + PyPIRCCommand.initialize_options(self) + self.list_classifiers = 0 + self.strict = 0 + + def finalize_options(self): + PyPIRCCommand.finalize_options(self) + # setting options for the `check` subcommand + check_options = { + 'strict': ('register', self.strict), + 'restructuredtext': ('register', 1), + } + self.distribution.command_options['check'] = check_options + + def run(self): + self.finalize_options() + self._set_config() + + # Run sub commands + for cmd_name in self.get_sub_commands(): + self.run_command(cmd_name) + + if self.dry_run: + self.verify_metadata() + elif self.list_classifiers: + self.classifiers() + else: + self.send_metadata() + + def check_metadata(self): + """Deprecated API.""" + warn( + "distutils.command.register.check_metadata is deprecated; " + "use the check command instead", + DeprecationWarning, + ) + check = self.distribution.get_command_obj('check') + check.ensure_finalized() + check.strict = self.strict + check.restructuredtext = 1 + check.run() + + def _set_config(self): + '''Reads the configuration file and set attributes.''' + config = self._read_pypirc() + if config != {}: + self.username = config['username'] + self.password = config['password'] + self.repository = config['repository'] + self.realm = config['realm'] + self.has_config = True + else: + if self.repository not in ('pypi', self.DEFAULT_REPOSITORY): + raise ValueError('%s not found in .pypirc' % self.repository) + if self.repository == 'pypi': + self.repository = self.DEFAULT_REPOSITORY + self.has_config = False + + def classifiers(self): + '''Fetch the list of classifiers from the server.''' + url = self.repository + '?:action=list_classifiers' + response = urllib.request.urlopen(url) + log.info(self._read_pypi_response(response)) + + def verify_metadata(self): + '''Send the metadata to the package index server to be checked.''' + # send the info to the server and report the result + (code, result) = self.post_to_server(self.build_post_data('verify')) + log.info('Server response (%s): %s', code, result) + + def send_metadata(self): # noqa: C901 + '''Send the metadata to the package index server. + + Well, do the following: + 1. figure who the user is, and then + 2. send the data as a Basic auth'ed POST. + + First we try to read the username/password from $HOME/.pypirc, + which is a ConfigParser-formatted file with a section + [distutils] containing username and password entries (both + in clear text). Eg: + + [distutils] + index-servers = + pypi + + [pypi] + username: fred + password: sekrit + + Otherwise, to figure who the user is, we offer the user three + choices: + + 1. use existing login, + 2. register as a new user, or + 3. set the password to a random string and email the user. + + ''' + # see if we can short-cut and get the username/password from the + # config + if self.has_config: + choice = '1' + username = self.username + password = self.password + else: + choice = 'x' + username = password = '' + + # get the user's login info + choices = '1 2 3 4'.split() + while choice not in choices: + self.announce( + '''\ +We need to know who you are, so please choose either: + 1. use your existing login, + 2. register as a new user, + 3. have the server generate a new password for you (and email it to you), or + 4. quit +Your selection [default 1]: ''', + logging.INFO, + ) + choice = input() + if not choice: + choice = '1' + elif choice not in choices: + print('Please choose one of the four options!') + + if choice == '1': + # get the username and password + while not username: + username = input('Username: ') + while not password: + password = getpass.getpass('Password: ') + + # set up the authentication + auth = urllib.request.HTTPPasswordMgr() + host = urllib.parse.urlparse(self.repository)[1] + auth.add_password(self.realm, host, username, password) + # send the info to the server and report the result + code, result = self.post_to_server(self.build_post_data('submit'), auth) + self.announce('Server response ({}): {}'.format(code, result), logging.INFO) + + # possibly save the login + if code == 200: + if self.has_config: + # sharing the password in the distribution instance + # so the upload command can reuse it + self.distribution.password = password + else: + self.announce( + ( + 'I can store your PyPI login so future ' + 'submissions will be faster.' + ), + logging.INFO, + ) + self.announce( + '(the login will be stored in %s)' % self._get_rc_file(), + logging.INFO, + ) + choice = 'X' + while choice.lower() not in 'yn': + choice = input('Save your login (y/N)?') + if not choice: + choice = 'n' + if choice.lower() == 'y': + self._store_pypirc(username, password) + + elif choice == '2': + data = {':action': 'user'} + data['name'] = data['password'] = data['email'] = '' + data['confirm'] = None + while not data['name']: + data['name'] = input('Username: ') + while data['password'] != data['confirm']: + while not data['password']: + data['password'] = getpass.getpass('Password: ') + while not data['confirm']: + data['confirm'] = getpass.getpass(' Confirm: ') + if data['password'] != data['confirm']: + data['password'] = '' + data['confirm'] = None + print("Password and confirm don't match!") + while not data['email']: + data['email'] = input(' EMail: ') + code, result = self.post_to_server(data) + if code != 200: + log.info('Server response (%s): %s', code, result) + else: + log.info('You will receive an email shortly.') + log.info('Follow the instructions in it to ' 'complete registration.') + elif choice == '3': + data = {':action': 'password_reset'} + data['email'] = '' + while not data['email']: + data['email'] = input('Your email address: ') + code, result = self.post_to_server(data) + log.info('Server response (%s): %s', code, result) + + def build_post_data(self, action): + # figure the data to send - the metadata plus some additional + # information used by the package server + meta = self.distribution.metadata + data = { + ':action': action, + 'metadata_version': '1.0', + 'name': meta.get_name(), + 'version': meta.get_version(), + 'summary': meta.get_description(), + 'home_page': meta.get_url(), + 'author': meta.get_contact(), + 'author_email': meta.get_contact_email(), + 'license': meta.get_licence(), + 'description': meta.get_long_description(), + 'keywords': meta.get_keywords(), + 'platform': meta.get_platforms(), + 'classifiers': meta.get_classifiers(), + 'download_url': meta.get_download_url(), + # PEP 314 + 'provides': meta.get_provides(), + 'requires': meta.get_requires(), + 'obsoletes': meta.get_obsoletes(), + } + if data['provides'] or data['requires'] or data['obsoletes']: + data['metadata_version'] = '1.1' + return data + + def post_to_server(self, data, auth=None): # noqa: C901 + '''Post a query to the server, and return a string response.''' + if 'name' in data: + self.announce( + 'Registering {} to {}'.format(data['name'], self.repository), + logging.INFO, + ) + # Build up the MIME payload for the urllib2 POST data + boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' + sep_boundary = '\n--' + boundary + end_boundary = sep_boundary + '--' + body = io.StringIO() + for key, value in data.items(): + # handle multiple entries for the same name + if type(value) not in (type([]), type(())): + value = [value] + for value in value: + value = str(value) + body.write(sep_boundary) + body.write('\nContent-Disposition: form-data; name="%s"' % key) + body.write("\n\n") + body.write(value) + if value and value[-1] == '\r': + body.write('\n') # write an extra newline (lurve Macs) + body.write(end_boundary) + body.write("\n") + body = body.getvalue().encode("utf-8") + + # build the Request + headers = { + 'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8' + % boundary, + 'Content-length': str(len(body)), + } + req = urllib.request.Request(self.repository, body, headers) + + # handle HTTP and include the Basic Auth handler + opener = urllib.request.build_opener( + urllib.request.HTTPBasicAuthHandler(password_mgr=auth) + ) + data = '' + try: + result = opener.open(req) + except urllib.error.HTTPError as e: + if self.show_response: + data = e.fp.read() + result = e.code, e.msg + except urllib.error.URLError as e: + result = 500, str(e) + else: + if self.show_response: + data = self._read_pypi_response(result) + result = 200, 'OK' + if self.show_response: + msg = '\n'.join(('-' * 75, data, '-' * 75)) + self.announce(msg, logging.INFO) + return result diff --git a/libs/common/setuptools/_distutils/command/sdist.py b/libs/common/setuptools/_distutils/command/sdist.py new file mode 100644 index 00000000..5cfd4c14 --- /dev/null +++ b/libs/common/setuptools/_distutils/command/sdist.py @@ -0,0 +1,531 @@ +"""distutils.command.sdist + +Implements the Distutils 'sdist' command (create a source distribution).""" + +import os +import sys +from glob import glob +from warnings import warn + +from ..core import Command +from distutils import dir_util +from distutils import file_util +from distutils import archive_util +from ..text_file import TextFile +from ..filelist import FileList +from distutils._log import log +from ..util import convert_path +from ..errors import DistutilsOptionError, DistutilsTemplateError + + +def show_formats(): + """Print all possible values for the 'formats' option (used by + the "--help-formats" command-line option). + """ + from ..fancy_getopt import FancyGetopt + from ..archive_util import ARCHIVE_FORMATS + + formats = [] + for format in ARCHIVE_FORMATS.keys(): + formats.append(("formats=" + format, None, ARCHIVE_FORMATS[format][2])) + formats.sort() + FancyGetopt(formats).print_help("List of available source distribution formats:") + + +class sdist(Command): + + description = "create a source distribution (tarball, zip file, etc.)" + + def checking_metadata(self): + """Callable used for the check sub-command. + + Placed here so user_options can view it""" + return self.metadata_check + + user_options = [ + ('template=', 't', "name of manifest template file [default: MANIFEST.in]"), + ('manifest=', 'm', "name of manifest file [default: MANIFEST]"), + ( + 'use-defaults', + None, + "include the default file set in the manifest " + "[default; disable with --no-defaults]", + ), + ('no-defaults', None, "don't include the default file set"), + ( + 'prune', + None, + "specifically exclude files/directories that should not be " + "distributed (build tree, RCS/CVS dirs, etc.) " + "[default; disable with --no-prune]", + ), + ('no-prune', None, "don't automatically exclude anything"), + ( + 'manifest-only', + 'o', + "just regenerate the manifest and then stop " "(implies --force-manifest)", + ), + ( + 'force-manifest', + 'f', + "forcibly regenerate the manifest and carry on as usual. " + "Deprecated: now the manifest is always regenerated.", + ), + ('formats=', None, "formats for source distribution (comma-separated list)"), + ( + 'keep-temp', + 'k', + "keep the distribution tree around after creating " + "archive file(s)", + ), + ( + 'dist-dir=', + 'd', + "directory to put the source distribution archive(s) in " "[default: dist]", + ), + ( + 'metadata-check', + None, + "Ensure that all required elements of meta-data " + "are supplied. Warn if any missing. [default]", + ), + ( + 'owner=', + 'u', + "Owner name used when creating a tar file [default: current user]", + ), + ( + 'group=', + 'g', + "Group name used when creating a tar file [default: current group]", + ), + ] + + boolean_options = [ + 'use-defaults', + 'prune', + 'manifest-only', + 'force-manifest', + 'keep-temp', + 'metadata-check', + ] + + help_options = [ + ('help-formats', None, "list available distribution formats", show_formats), + ] + + negative_opt = {'no-defaults': 'use-defaults', 'no-prune': 'prune'} + + sub_commands = [('check', checking_metadata)] + + READMES = ('README', 'README.txt', 'README.rst') + + def initialize_options(self): + # 'template' and 'manifest' are, respectively, the names of + # the manifest template and manifest file. + self.template = None + self.manifest = None + + # 'use_defaults': if true, we will include the default file set + # in the manifest + self.use_defaults = 1 + self.prune = 1 + + self.manifest_only = 0 + self.force_manifest = 0 + + self.formats = ['gztar'] + self.keep_temp = 0 + self.dist_dir = None + + self.archive_files = None + self.metadata_check = 1 + self.owner = None + self.group = None + + def finalize_options(self): + if self.manifest is None: + self.manifest = "MANIFEST" + if self.template is None: + self.template = "MANIFEST.in" + + self.ensure_string_list('formats') + + bad_format = archive_util.check_archive_formats(self.formats) + if bad_format: + raise DistutilsOptionError("unknown archive format '%s'" % bad_format) + + if self.dist_dir is None: + self.dist_dir = "dist" + + def run(self): + # 'filelist' contains the list of files that will make up the + # manifest + self.filelist = FileList() + + # Run sub commands + for cmd_name in self.get_sub_commands(): + self.run_command(cmd_name) + + # Do whatever it takes to get the list of files to process + # (process the manifest template, read an existing manifest, + # whatever). File list is accumulated in 'self.filelist'. + self.get_file_list() + + # If user just wanted us to regenerate the manifest, stop now. + if self.manifest_only: + return + + # Otherwise, go ahead and create the source distribution tarball, + # or zipfile, or whatever. + self.make_distribution() + + def check_metadata(self): + """Deprecated API.""" + warn( + "distutils.command.sdist.check_metadata is deprecated, \ + use the check command instead", + PendingDeprecationWarning, + ) + check = self.distribution.get_command_obj('check') + check.ensure_finalized() + check.run() + + def get_file_list(self): + """Figure out the list of files to include in the source + distribution, and put it in 'self.filelist'. This might involve + reading the manifest template (and writing the manifest), or just + reading the manifest, or just using the default file set -- it all + depends on the user's options. + """ + # new behavior when using a template: + # the file list is recalculated every time because + # even if MANIFEST.in or setup.py are not changed + # the user might have added some files in the tree that + # need to be included. + # + # This makes --force the default and only behavior with templates. + template_exists = os.path.isfile(self.template) + if not template_exists and self._manifest_is_not_generated(): + self.read_manifest() + self.filelist.sort() + self.filelist.remove_duplicates() + return + + if not template_exists: + self.warn( + ("manifest template '%s' does not exist " + "(using default file list)") + % self.template + ) + self.filelist.findall() + + if self.use_defaults: + self.add_defaults() + + if template_exists: + self.read_template() + + if self.prune: + self.prune_file_list() + + self.filelist.sort() + self.filelist.remove_duplicates() + self.write_manifest() + + def add_defaults(self): + """Add all the default files to self.filelist: + - README or README.txt + - setup.py + - test/test*.py + - all pure Python modules mentioned in setup script + - all files pointed by package_data (build_py) + - all files defined in data_files. + - all files defined as scripts. + - all C sources listed as part of extensions or C libraries + in the setup script (doesn't catch C headers!) + Warns if (README or README.txt) or setup.py are missing; everything + else is optional. + """ + self._add_defaults_standards() + self._add_defaults_optional() + self._add_defaults_python() + self._add_defaults_data_files() + self._add_defaults_ext() + self._add_defaults_c_libs() + self._add_defaults_scripts() + + @staticmethod + def _cs_path_exists(fspath): + """ + Case-sensitive path existence check + + >>> sdist._cs_path_exists(__file__) + True + >>> sdist._cs_path_exists(__file__.upper()) + False + """ + if not os.path.exists(fspath): + return False + # make absolute so we always have a directory + abspath = os.path.abspath(fspath) + directory, filename = os.path.split(abspath) + return filename in os.listdir(directory) + + def _add_defaults_standards(self): + standards = [self.READMES, self.distribution.script_name] + for fn in standards: + if isinstance(fn, tuple): + alts = fn + got_it = False + for fn in alts: + if self._cs_path_exists(fn): + got_it = True + self.filelist.append(fn) + break + + if not got_it: + self.warn( + "standard file not found: should have one of " + ', '.join(alts) + ) + else: + if self._cs_path_exists(fn): + self.filelist.append(fn) + else: + self.warn("standard file '%s' not found" % fn) + + def _add_defaults_optional(self): + optional = ['test/test*.py', 'setup.cfg'] + for pattern in optional: + files = filter(os.path.isfile, glob(pattern)) + self.filelist.extend(files) + + def _add_defaults_python(self): + # build_py is used to get: + # - python modules + # - files defined in package_data + build_py = self.get_finalized_command('build_py') + + # getting python files + if self.distribution.has_pure_modules(): + self.filelist.extend(build_py.get_source_files()) + + # getting package_data files + # (computed in build_py.data_files by build_py.finalize_options) + for pkg, src_dir, build_dir, filenames in build_py.data_files: + for filename in filenames: + self.filelist.append(os.path.join(src_dir, filename)) + + def _add_defaults_data_files(self): + # getting distribution.data_files + if self.distribution.has_data_files(): + for item in self.distribution.data_files: + if isinstance(item, str): + # plain file + item = convert_path(item) + if os.path.isfile(item): + self.filelist.append(item) + else: + # a (dirname, filenames) tuple + dirname, filenames = item + for f in filenames: + f = convert_path(f) + if os.path.isfile(f): + self.filelist.append(f) + + def _add_defaults_ext(self): + if self.distribution.has_ext_modules(): + build_ext = self.get_finalized_command('build_ext') + self.filelist.extend(build_ext.get_source_files()) + + def _add_defaults_c_libs(self): + if self.distribution.has_c_libraries(): + build_clib = self.get_finalized_command('build_clib') + self.filelist.extend(build_clib.get_source_files()) + + def _add_defaults_scripts(self): + if self.distribution.has_scripts(): + build_scripts = self.get_finalized_command('build_scripts') + self.filelist.extend(build_scripts.get_source_files()) + + def read_template(self): + """Read and parse manifest template file named by self.template. + + (usually "MANIFEST.in") The parsing and processing is done by + 'self.filelist', which updates itself accordingly. + """ + log.info("reading manifest template '%s'", self.template) + template = TextFile( + self.template, + strip_comments=1, + skip_blanks=1, + join_lines=1, + lstrip_ws=1, + rstrip_ws=1, + collapse_join=1, + ) + + try: + while True: + line = template.readline() + if line is None: # end of file + break + + try: + self.filelist.process_template_line(line) + # the call above can raise a DistutilsTemplateError for + # malformed lines, or a ValueError from the lower-level + # convert_path function + except (DistutilsTemplateError, ValueError) as msg: + self.warn( + "%s, line %d: %s" + % (template.filename, template.current_line, msg) + ) + finally: + template.close() + + def prune_file_list(self): + """Prune off branches that might slip into the file list as created + by 'read_template()', but really don't belong there: + * the build tree (typically "build") + * the release tree itself (only an issue if we ran "sdist" + previously with --keep-temp, or it aborted) + * any RCS, CVS, .svn, .hg, .git, .bzr, _darcs directories + """ + build = self.get_finalized_command('build') + base_dir = self.distribution.get_fullname() + + self.filelist.exclude_pattern(None, prefix=build.build_base) + self.filelist.exclude_pattern(None, prefix=base_dir) + + if sys.platform == 'win32': + seps = r'/|\\' + else: + seps = '/' + + vcs_dirs = ['RCS', 'CVS', r'\.svn', r'\.hg', r'\.git', r'\.bzr', '_darcs'] + vcs_ptrn = r'(^|{})({})({}).*'.format(seps, '|'.join(vcs_dirs), seps) + self.filelist.exclude_pattern(vcs_ptrn, is_regex=1) + + def write_manifest(self): + """Write the file list in 'self.filelist' (presumably as filled in + by 'add_defaults()' and 'read_template()') to the manifest file + named by 'self.manifest'. + """ + if self._manifest_is_not_generated(): + log.info( + "not writing to manually maintained " + "manifest file '%s'" % self.manifest + ) + return + + content = self.filelist.files[:] + content.insert(0, '# file GENERATED by distutils, do NOT edit') + self.execute( + file_util.write_file, + (self.manifest, content), + "writing manifest file '%s'" % self.manifest, + ) + + def _manifest_is_not_generated(self): + # check for special comment used in 3.1.3 and higher + if not os.path.isfile(self.manifest): + return False + + fp = open(self.manifest) + try: + first_line = fp.readline() + finally: + fp.close() + return first_line != '# file GENERATED by distutils, do NOT edit\n' + + def read_manifest(self): + """Read the manifest file (named by 'self.manifest') and use it to + fill in 'self.filelist', the list of files to include in the source + distribution. + """ + log.info("reading manifest file '%s'", self.manifest) + with open(self.manifest) as manifest: + for line in manifest: + # ignore comments and blank lines + line = line.strip() + if line.startswith('#') or not line: + continue + self.filelist.append(line) + + def make_release_tree(self, base_dir, files): + """Create the directory tree that will become the source + distribution archive. All directories implied by the filenames in + 'files' are created under 'base_dir', and then we hard link or copy + (if hard linking is unavailable) those files into place. + Essentially, this duplicates the developer's source tree, but in a + directory named after the distribution, containing only the files + to be distributed. + """ + # Create all the directories under 'base_dir' necessary to + # put 'files' there; the 'mkpath()' is just so we don't die + # if the manifest happens to be empty. + self.mkpath(base_dir) + dir_util.create_tree(base_dir, files, dry_run=self.dry_run) + + # And walk over the list of files, either making a hard link (if + # os.link exists) to each one that doesn't already exist in its + # corresponding location under 'base_dir', or copying each file + # that's out-of-date in 'base_dir'. (Usually, all files will be + # out-of-date, because by default we blow away 'base_dir' when + # we're done making the distribution archives.) + + if hasattr(os, 'link'): # can make hard links on this system + link = 'hard' + msg = "making hard links in %s..." % base_dir + else: # nope, have to copy + link = None + msg = "copying files to %s..." % base_dir + + if not files: + log.warning("no files to distribute -- empty manifest?") + else: + log.info(msg) + for file in files: + if not os.path.isfile(file): + log.warning("'%s' not a regular file -- skipping", file) + else: + dest = os.path.join(base_dir, file) + self.copy_file(file, dest, link=link) + + self.distribution.metadata.write_pkg_info(base_dir) + + def make_distribution(self): + """Create the source distribution(s). First, we create the release + tree with 'make_release_tree()'; then, we create all required + archive files (according to 'self.formats') from the release tree. + Finally, we clean up by blowing away the release tree (unless + 'self.keep_temp' is true). The list of archive files created is + stored so it can be retrieved later by 'get_archive_files()'. + """ + # Don't warn about missing meta-data here -- should be (and is!) + # done elsewhere. + base_dir = self.distribution.get_fullname() + base_name = os.path.join(self.dist_dir, base_dir) + + self.make_release_tree(base_dir, self.filelist.files) + archive_files = [] # remember names of files we create + # tar archive must be created last to avoid overwrite and remove + if 'tar' in self.formats: + self.formats.append(self.formats.pop(self.formats.index('tar'))) + + for fmt in self.formats: + file = self.make_archive( + base_name, fmt, base_dir=base_dir, owner=self.owner, group=self.group + ) + archive_files.append(file) + self.distribution.dist_files.append(('sdist', '', file)) + + self.archive_files = archive_files + + if not self.keep_temp: + dir_util.remove_tree(base_dir, dry_run=self.dry_run) + + def get_archive_files(self): + """Return the list of archive files created when the command + was run, or None if the command hasn't run yet. + """ + return self.archive_files diff --git a/libs/common/setuptools/_distutils/command/upload.py b/libs/common/setuptools/_distutils/command/upload.py new file mode 100644 index 00000000..16e15d8b --- /dev/null +++ b/libs/common/setuptools/_distutils/command/upload.py @@ -0,0 +1,207 @@ +""" +distutils.command.upload + +Implements the Distutils 'upload' subcommand (upload package to a package +index). +""" + +import os +import io +import hashlib +import logging +from base64 import standard_b64encode +from urllib.request import urlopen, Request, HTTPError +from urllib.parse import urlparse +from ..errors import DistutilsError, DistutilsOptionError +from ..core import PyPIRCCommand +from ..spawn import spawn + + +# PyPI Warehouse supports MD5, SHA256, and Blake2 (blake2-256) +# https://bugs.python.org/issue40698 +_FILE_CONTENT_DIGESTS = { + "md5_digest": getattr(hashlib, "md5", None), + "sha256_digest": getattr(hashlib, "sha256", None), + "blake2_256_digest": getattr(hashlib, "blake2b", None), +} + + +class upload(PyPIRCCommand): + + description = "upload binary package to PyPI" + + user_options = PyPIRCCommand.user_options + [ + ('sign', 's', 'sign files to upload using gpg'), + ('identity=', 'i', 'GPG identity used to sign files'), + ] + + boolean_options = PyPIRCCommand.boolean_options + ['sign'] + + def initialize_options(self): + PyPIRCCommand.initialize_options(self) + self.username = '' + self.password = '' + self.show_response = 0 + self.sign = False + self.identity = None + + def finalize_options(self): + PyPIRCCommand.finalize_options(self) + if self.identity and not self.sign: + raise DistutilsOptionError("Must use --sign for --identity to have meaning") + config = self._read_pypirc() + if config != {}: + self.username = config['username'] + self.password = config['password'] + self.repository = config['repository'] + self.realm = config['realm'] + + # getting the password from the distribution + # if previously set by the register command + if not self.password and self.distribution.password: + self.password = self.distribution.password + + def run(self): + if not self.distribution.dist_files: + msg = ( + "Must create and upload files in one command " + "(e.g. setup.py sdist upload)" + ) + raise DistutilsOptionError(msg) + for command, pyversion, filename in self.distribution.dist_files: + self.upload_file(command, pyversion, filename) + + def upload_file(self, command, pyversion, filename): # noqa: C901 + # Makes sure the repository URL is compliant + schema, netloc, url, params, query, fragments = urlparse(self.repository) + if params or query or fragments: + raise AssertionError("Incompatible url %s" % self.repository) + + if schema not in ('http', 'https'): + raise AssertionError("unsupported schema " + schema) + + # Sign if requested + if self.sign: + gpg_args = ["gpg", "--detach-sign", "-a", filename] + if self.identity: + gpg_args[2:2] = ["--local-user", self.identity] + spawn(gpg_args, dry_run=self.dry_run) + + # Fill in the data - send all the meta-data in case we need to + # register a new release + f = open(filename, 'rb') + try: + content = f.read() + finally: + f.close() + + meta = self.distribution.metadata + data = { + # action + ':action': 'file_upload', + 'protocol_version': '1', + # identify release + 'name': meta.get_name(), + 'version': meta.get_version(), + # file content + 'content': (os.path.basename(filename), content), + 'filetype': command, + 'pyversion': pyversion, + # additional meta-data + 'metadata_version': '1.0', + 'summary': meta.get_description(), + 'home_page': meta.get_url(), + 'author': meta.get_contact(), + 'author_email': meta.get_contact_email(), + 'license': meta.get_licence(), + 'description': meta.get_long_description(), + 'keywords': meta.get_keywords(), + 'platform': meta.get_platforms(), + 'classifiers': meta.get_classifiers(), + 'download_url': meta.get_download_url(), + # PEP 314 + 'provides': meta.get_provides(), + 'requires': meta.get_requires(), + 'obsoletes': meta.get_obsoletes(), + } + + data['comment'] = '' + + # file content digests + for digest_name, digest_cons in _FILE_CONTENT_DIGESTS.items(): + if digest_cons is None: + continue + try: + data[digest_name] = digest_cons(content).hexdigest() + except ValueError: + # hash digest not available or blocked by security policy + pass + + if self.sign: + with open(filename + ".asc", "rb") as f: + data['gpg_signature'] = (os.path.basename(filename) + ".asc", f.read()) + + # set up the authentication + user_pass = (self.username + ":" + self.password).encode('ascii') + # The exact encoding of the authentication string is debated. + # Anyway PyPI only accepts ascii for both username or password. + auth = "Basic " + standard_b64encode(user_pass).decode('ascii') + + # Build up the MIME payload for the POST data + boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' + sep_boundary = b'\r\n--' + boundary.encode('ascii') + end_boundary = sep_boundary + b'--\r\n' + body = io.BytesIO() + for key, value in data.items(): + title = '\r\nContent-Disposition: form-data; name="%s"' % key + # handle multiple entries for the same name + if not isinstance(value, list): + value = [value] + for value in value: + if type(value) is tuple: + title += '; filename="%s"' % value[0] + value = value[1] + else: + value = str(value).encode('utf-8') + body.write(sep_boundary) + body.write(title.encode('utf-8')) + body.write(b"\r\n\r\n") + body.write(value) + body.write(end_boundary) + body = body.getvalue() + + msg = "Submitting {} to {}".format(filename, self.repository) + self.announce(msg, logging.INFO) + + # build the Request + headers = { + 'Content-type': 'multipart/form-data; boundary=%s' % boundary, + 'Content-length': str(len(body)), + 'Authorization': auth, + } + + request = Request(self.repository, data=body, headers=headers) + # send the data + try: + result = urlopen(request) + status = result.getcode() + reason = result.msg + except HTTPError as e: + status = e.code + reason = e.msg + except OSError as e: + self.announce(str(e), logging.ERROR) + raise + + if status == 200: + self.announce( + 'Server response ({}): {}'.format(status, reason), logging.INFO + ) + if self.show_response: + text = self._read_pypi_response(result) + msg = '\n'.join(('-' * 75, text, '-' * 75)) + self.announce(msg, logging.INFO) + else: + msg = 'Upload failed ({}): {}'.format(status, reason) + self.announce(msg, logging.ERROR) + raise DistutilsError(msg) diff --git a/libs/common/setuptools/_distutils/config.py b/libs/common/setuptools/_distutils/config.py new file mode 100644 index 00000000..9a4044ad --- /dev/null +++ b/libs/common/setuptools/_distutils/config.py @@ -0,0 +1,139 @@ +"""distutils.pypirc + +Provides the PyPIRCCommand class, the base class for the command classes +that uses .pypirc in the distutils.command package. +""" +import os +from configparser import RawConfigParser + +from .cmd import Command + +DEFAULT_PYPIRC = """\ +[distutils] +index-servers = + pypi + +[pypi] +username:%s +password:%s +""" + + +class PyPIRCCommand(Command): + """Base command that knows how to handle the .pypirc file""" + + DEFAULT_REPOSITORY = 'https://upload.pypi.org/legacy/' + DEFAULT_REALM = 'pypi' + repository = None + realm = None + + user_options = [ + ('repository=', 'r', "url of repository [default: %s]" % DEFAULT_REPOSITORY), + ('show-response', None, 'display full response text from server'), + ] + + boolean_options = ['show-response'] + + def _get_rc_file(self): + """Returns rc file path.""" + return os.path.join(os.path.expanduser('~'), '.pypirc') + + def _store_pypirc(self, username, password): + """Creates a default .pypirc file.""" + rc = self._get_rc_file() + with os.fdopen(os.open(rc, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: + f.write(DEFAULT_PYPIRC % (username, password)) + + def _read_pypirc(self): # noqa: C901 + """Reads the .pypirc file.""" + rc = self._get_rc_file() + if os.path.exists(rc): + self.announce('Using PyPI login from %s' % rc) + repository = self.repository or self.DEFAULT_REPOSITORY + + config = RawConfigParser() + config.read(rc) + sections = config.sections() + if 'distutils' in sections: + # let's get the list of servers + index_servers = config.get('distutils', 'index-servers') + _servers = [ + server.strip() + for server in index_servers.split('\n') + if server.strip() != '' + ] + if _servers == []: + # nothing set, let's try to get the default pypi + if 'pypi' in sections: + _servers = ['pypi'] + else: + # the file is not properly defined, returning + # an empty dict + return {} + for server in _servers: + current = {'server': server} + current['username'] = config.get(server, 'username') + + # optional params + for key, default in ( + ('repository', self.DEFAULT_REPOSITORY), + ('realm', self.DEFAULT_REALM), + ('password', None), + ): + if config.has_option(server, key): + current[key] = config.get(server, key) + else: + current[key] = default + + # work around people having "repository" for the "pypi" + # section of their config set to the HTTP (rather than + # HTTPS) URL + if server == 'pypi' and repository in ( + self.DEFAULT_REPOSITORY, + 'pypi', + ): + current['repository'] = self.DEFAULT_REPOSITORY + return current + + if ( + current['server'] == repository + or current['repository'] == repository + ): + return current + elif 'server-login' in sections: + # old format + server = 'server-login' + if config.has_option(server, 'repository'): + repository = config.get(server, 'repository') + else: + repository = self.DEFAULT_REPOSITORY + return { + 'username': config.get(server, 'username'), + 'password': config.get(server, 'password'), + 'repository': repository, + 'server': server, + 'realm': self.DEFAULT_REALM, + } + + return {} + + def _read_pypi_response(self, response): + """Read and decode a PyPI HTTP response.""" + import cgi + + content_type = response.getheader('content-type', 'text/plain') + encoding = cgi.parse_header(content_type)[1].get('charset', 'ascii') + return response.read().decode(encoding) + + def initialize_options(self): + """Initialize options.""" + self.repository = None + self.realm = None + self.show_response = 0 + + def finalize_options(self): + """Finalizes options.""" + if self.repository is None: + self.repository = self.DEFAULT_REPOSITORY + if self.realm is None: + self.realm = self.DEFAULT_REALM diff --git a/libs/common/setuptools/_distutils/core.py b/libs/common/setuptools/_distutils/core.py new file mode 100644 index 00000000..34cafbce --- /dev/null +++ b/libs/common/setuptools/_distutils/core.py @@ -0,0 +1,291 @@ +"""distutils.core + +The only module that needs to be imported to use the Distutils; provides +the 'setup' function (which is to be called from the setup script). Also +indirectly provides the Distribution and Command classes, although they are +really defined in distutils.dist and distutils.cmd. +""" + +import os +import sys +import tokenize + +from .debug import DEBUG +from .errors import ( + DistutilsSetupError, + DistutilsError, + CCompilerError, + DistutilsArgError, +) + +# Mainly import these so setup scripts can "from distutils.core import" them. +from .dist import Distribution +from .cmd import Command +from .config import PyPIRCCommand +from .extension import Extension + + +__all__ = ['Distribution', 'Command', 'PyPIRCCommand', 'Extension', 'setup'] + +# This is a barebones help message generated displayed when the user +# runs the setup script with no arguments at all. More useful help +# is generated with various --help options: global help, list commands, +# and per-command help. +USAGE = """\ +usage: %(script)s [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...] + or: %(script)s --help [cmd1 cmd2 ...] + or: %(script)s --help-commands + or: %(script)s cmd --help +""" + + +def gen_usage(script_name): + script = os.path.basename(script_name) + return USAGE % locals() + + +# Some mild magic to control the behaviour of 'setup()' from 'run_setup()'. +_setup_stop_after = None +_setup_distribution = None + +# Legal keyword arguments for the setup() function +setup_keywords = ( + 'distclass', + 'script_name', + 'script_args', + 'options', + 'name', + 'version', + 'author', + 'author_email', + 'maintainer', + 'maintainer_email', + 'url', + 'license', + 'description', + 'long_description', + 'keywords', + 'platforms', + 'classifiers', + 'download_url', + 'requires', + 'provides', + 'obsoletes', +) + +# Legal keyword arguments for the Extension constructor +extension_keywords = ( + 'name', + 'sources', + 'include_dirs', + 'define_macros', + 'undef_macros', + 'library_dirs', + 'libraries', + 'runtime_library_dirs', + 'extra_objects', + 'extra_compile_args', + 'extra_link_args', + 'swig_opts', + 'export_symbols', + 'depends', + 'language', +) + + +def setup(**attrs): # noqa: C901 + """The gateway to the Distutils: do everything your setup script needs + to do, in a highly flexible and user-driven way. Briefly: create a + Distribution instance; find and parse config files; parse the command + line; run each Distutils command found there, customized by the options + supplied to 'setup()' (as keyword arguments), in config files, and on + the command line. + + The Distribution instance might be an instance of a class supplied via + the 'distclass' keyword argument to 'setup'; if no such class is + supplied, then the Distribution class (in dist.py) is instantiated. + All other arguments to 'setup' (except for 'cmdclass') are used to set + attributes of the Distribution instance. + + The 'cmdclass' argument, if supplied, is a dictionary mapping command + names to command classes. Each command encountered on the command line + will be turned into a command class, which is in turn instantiated; any + class found in 'cmdclass' is used in place of the default, which is + (for command 'foo_bar') class 'foo_bar' in module + 'distutils.command.foo_bar'. The command class must provide a + 'user_options' attribute which is a list of option specifiers for + 'distutils.fancy_getopt'. Any command-line options between the current + and the next command are used to set attributes of the current command + object. + + When the entire command-line has been successfully parsed, calls the + 'run()' method on each command object in turn. This method will be + driven entirely by the Distribution object (which each command object + has a reference to, thanks to its constructor), and the + command-specific options that became attributes of each command + object. + """ + + global _setup_stop_after, _setup_distribution + + # Determine the distribution class -- either caller-supplied or + # our Distribution (see below). + klass = attrs.get('distclass') + if klass: + del attrs['distclass'] + else: + klass = Distribution + + if 'script_name' not in attrs: + attrs['script_name'] = os.path.basename(sys.argv[0]) + if 'script_args' not in attrs: + attrs['script_args'] = sys.argv[1:] + + # Create the Distribution instance, using the remaining arguments + # (ie. everything except distclass) to initialize it + try: + _setup_distribution = dist = klass(attrs) + except DistutilsSetupError as msg: + if 'name' not in attrs: + raise SystemExit("error in setup command: %s" % msg) + else: + raise SystemExit("error in {} setup command: {}".format(attrs['name'], msg)) + + if _setup_stop_after == "init": + return dist + + # Find and parse the config file(s): they will override options from + # the setup script, but be overridden by the command line. + dist.parse_config_files() + + if DEBUG: + print("options (after parsing config files):") + dist.dump_option_dicts() + + if _setup_stop_after == "config": + return dist + + # Parse the command line and override config files; any + # command-line errors are the end user's fault, so turn them into + # SystemExit to suppress tracebacks. + try: + ok = dist.parse_command_line() + except DistutilsArgError as msg: + raise SystemExit(gen_usage(dist.script_name) + "\nerror: %s" % msg) + + if DEBUG: + print("options (after parsing command line):") + dist.dump_option_dicts() + + if _setup_stop_after == "commandline": + return dist + + # And finally, run all the commands found on the command line. + if ok: + return run_commands(dist) + + return dist + + +# setup () + + +def run_commands(dist): + """Given a Distribution object run all the commands, + raising ``SystemExit`` errors in the case of failure. + + This function assumes that either ``sys.argv`` or ``dist.script_args`` + is already set accordingly. + """ + try: + dist.run_commands() + except KeyboardInterrupt: + raise SystemExit("interrupted") + except OSError as exc: + if DEBUG: + sys.stderr.write("error: {}\n".format(exc)) + raise + else: + raise SystemExit("error: {}".format(exc)) + + except (DistutilsError, CCompilerError) as msg: + if DEBUG: + raise + else: + raise SystemExit("error: " + str(msg)) + + return dist + + +def run_setup(script_name, script_args=None, stop_after="run"): + """Run a setup script in a somewhat controlled environment, and + return the Distribution instance that drives things. This is useful + if you need to find out the distribution meta-data (passed as + keyword args from 'script' to 'setup()', or the contents of the + config files or command-line. + + 'script_name' is a file that will be read and run with 'exec()'; + 'sys.argv[0]' will be replaced with 'script' for the duration of the + call. 'script_args' is a list of strings; if supplied, + 'sys.argv[1:]' will be replaced by 'script_args' for the duration of + the call. + + 'stop_after' tells 'setup()' when to stop processing; possible + values: + init + stop after the Distribution instance has been created and + populated with the keyword arguments to 'setup()' + config + stop after config files have been parsed (and their data + stored in the Distribution instance) + commandline + stop after the command-line ('sys.argv[1:]' or 'script_args') + have been parsed (and the data stored in the Distribution) + run [default] + stop after all commands have been run (the same as if 'setup()' + had been called in the usual way + + Returns the Distribution instance, which provides all information + used to drive the Distutils. + """ + if stop_after not in ('init', 'config', 'commandline', 'run'): + raise ValueError("invalid value for 'stop_after': {!r}".format(stop_after)) + + global _setup_stop_after, _setup_distribution + _setup_stop_after = stop_after + + save_argv = sys.argv.copy() + g = {'__file__': script_name, '__name__': '__main__'} + try: + try: + sys.argv[0] = script_name + if script_args is not None: + sys.argv[1:] = script_args + # tokenize.open supports automatic encoding detection + with tokenize.open(script_name) as f: + code = f.read().replace(r'\r\n', r'\n') + exec(code, g) + finally: + sys.argv = save_argv + _setup_stop_after = None + except SystemExit: + # Hmm, should we do something if exiting with a non-zero code + # (ie. error)? + pass + + if _setup_distribution is None: + raise RuntimeError( + ( + "'distutils.core.setup()' was never called -- " + "perhaps '%s' is not a Distutils setup script?" + ) + % script_name + ) + + # I wonder if the setup script's namespace -- g and l -- would be of + # any interest to callers? + # print "_setup_distribution:", _setup_distribution + return _setup_distribution + + +# run_setup () diff --git a/libs/common/setuptools/_distutils/cygwinccompiler.py b/libs/common/setuptools/_distutils/cygwinccompiler.py new file mode 100644 index 00000000..f15b8eee --- /dev/null +++ b/libs/common/setuptools/_distutils/cygwinccompiler.py @@ -0,0 +1,358 @@ +"""distutils.cygwinccompiler + +Provides the CygwinCCompiler class, a subclass of UnixCCompiler that +handles the Cygwin port of the GNU C compiler to Windows. It also contains +the Mingw32CCompiler class which handles the mingw32 port of GCC (same as +cygwin in no-cygwin mode). +""" + +import os +import re +import sys +import copy +import shlex +import warnings +from subprocess import check_output + +from .unixccompiler import UnixCCompiler +from .file_util import write_file +from .errors import ( + DistutilsExecError, + DistutilsPlatformError, + CCompilerError, + CompileError, +) +from .version import LooseVersion, suppress_known_deprecation +from ._collections import RangeMap + + +_msvcr_lookup = RangeMap.left( + { + # MSVC 7.0 + 1300: ['msvcr70'], + # MSVC 7.1 + 1310: ['msvcr71'], + # VS2005 / MSVC 8.0 + 1400: ['msvcr80'], + # VS2008 / MSVC 9.0 + 1500: ['msvcr90'], + # VS2010 / MSVC 10.0 + 1600: ['msvcr100'], + # VS2012 / MSVC 11.0 + 1700: ['msvcr110'], + # VS2013 / MSVC 12.0 + 1800: ['msvcr120'], + # VS2015 / MSVC 14.0 + 1900: ['ucrt', 'vcruntime140'], + 2000: RangeMap.undefined_value, + }, +) + + +def get_msvcr(): + """Include the appropriate MSVC runtime library if Python was built + with MSVC 7.0 or later. + """ + match = re.search(r'MSC v\.(\d{4})', sys.version) + try: + msc_ver = int(match.group(1)) + except AttributeError: + return + try: + return _msvcr_lookup[msc_ver] + except KeyError: + raise ValueError("Unknown MS Compiler version %s " % msc_ver) + + +_runtime_library_dirs_msg = ( + "Unable to set runtime library search path on Windows, " + "usually indicated by `runtime_library_dirs` parameter to Extension" +) + + +class CygwinCCompiler(UnixCCompiler): + """Handles the Cygwin port of the GNU C compiler to Windows.""" + + compiler_type = 'cygwin' + obj_extension = ".o" + static_lib_extension = ".a" + shared_lib_extension = ".dll.a" + dylib_lib_extension = ".dll" + static_lib_format = "lib%s%s" + shared_lib_format = "lib%s%s" + dylib_lib_format = "cyg%s%s" + exe_extension = ".exe" + + def __init__(self, verbose=0, dry_run=0, force=0): + + super().__init__(verbose, dry_run, force) + + status, details = check_config_h() + self.debug_print( + "Python's GCC status: {} (details: {})".format(status, details) + ) + if status is not CONFIG_H_OK: + self.warn( + "Python's pyconfig.h doesn't seem to support your compiler. " + "Reason: %s. " + "Compiling may fail because of undefined preprocessor macros." % details + ) + + self.cc = os.environ.get('CC', 'gcc') + self.cxx = os.environ.get('CXX', 'g++') + + self.linker_dll = self.cc + shared_option = "-shared" + + self.set_executables( + compiler='%s -mcygwin -O -Wall' % self.cc, + compiler_so='%s -mcygwin -mdll -O -Wall' % self.cc, + compiler_cxx='%s -mcygwin -O -Wall' % self.cxx, + linker_exe='%s -mcygwin' % self.cc, + linker_so=('{} -mcygwin {}'.format(self.linker_dll, shared_option)), + ) + + # Include the appropriate MSVC runtime library if Python was built + # with MSVC 7.0 or later. + self.dll_libraries = get_msvcr() + + @property + def gcc_version(self): + # Older numpy dependend on this existing to check for ancient + # gcc versions. This doesn't make much sense with clang etc so + # just hardcode to something recent. + # https://github.com/numpy/numpy/pull/20333 + warnings.warn( + "gcc_version attribute of CygwinCCompiler is deprecated. " + "Instead of returning actual gcc version a fixed value 11.2.0 is returned.", + DeprecationWarning, + stacklevel=2, + ) + with suppress_known_deprecation(): + return LooseVersion("11.2.0") + + def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): + """Compiles the source by spawning GCC and windres if needed.""" + if ext == '.rc' or ext == '.res': + # gcc needs '.res' and '.rc' compiled to object files !!! + try: + self.spawn(["windres", "-i", src, "-o", obj]) + except DistutilsExecError as msg: + raise CompileError(msg) + else: # for other files use the C-compiler + try: + self.spawn( + self.compiler_so + cc_args + [src, '-o', obj] + extra_postargs + ) + except DistutilsExecError as msg: + raise CompileError(msg) + + def link( + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + """Link the objects.""" + # use separate copies, so we can modify the lists + extra_preargs = copy.copy(extra_preargs or []) + libraries = copy.copy(libraries or []) + objects = copy.copy(objects or []) + + if runtime_library_dirs: + self.warn(_runtime_library_dirs_msg) + + # Additional libraries + libraries.extend(self.dll_libraries) + + # handle export symbols by creating a def-file + # with executables this only works with gcc/ld as linker + if (export_symbols is not None) and ( + target_desc != self.EXECUTABLE or self.linker_dll == "gcc" + ): + # (The linker doesn't do anything if output is up-to-date. + # So it would probably better to check if we really need this, + # but for this we had to insert some unchanged parts of + # UnixCCompiler, and this is not what we want.) + + # we want to put some files in the same directory as the + # object files are, build_temp doesn't help much + # where are the object files + temp_dir = os.path.dirname(objects[0]) + # name of dll to give the helper files the same base name + (dll_name, dll_extension) = os.path.splitext( + os.path.basename(output_filename) + ) + + # generate the filenames for these files + def_file = os.path.join(temp_dir, dll_name + ".def") + + # Generate .def file + contents = ["LIBRARY %s" % os.path.basename(output_filename), "EXPORTS"] + for sym in export_symbols: + contents.append(sym) + self.execute(write_file, (def_file, contents), "writing %s" % def_file) + + # next add options for def-file + + # for gcc/ld the def-file is specified as any object files + objects.append(def_file) + + # end: if ((export_symbols is not None) and + # (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")): + + # who wants symbols and a many times larger output file + # should explicitly switch the debug mode on + # otherwise we let ld strip the output file + # (On my machine: 10KiB < stripped_file < ??100KiB + # unstripped_file = stripped_file + XXX KiB + # ( XXX=254 for a typical python extension)) + if not debug: + extra_preargs.append("-s") + + UnixCCompiler.link( + self, + target_desc, + objects, + output_filename, + output_dir, + libraries, + library_dirs, + runtime_library_dirs, + None, # export_symbols, we do this in our def-file + debug, + extra_preargs, + extra_postargs, + build_temp, + target_lang, + ) + + def runtime_library_dir_option(self, dir): + # cygwin doesn't support rpath. While in theory we could error + # out like MSVC does, code might expect it to work like on Unix, so + # just warn and hope for the best. + self.warn(_runtime_library_dirs_msg) + return [] + + # -- Miscellaneous methods ----------------------------------------- + + def _make_out_path(self, output_dir, strip_dir, src_name): + # use normcase to make sure '.rc' is really '.rc' and not '.RC' + norm_src_name = os.path.normcase(src_name) + return super()._make_out_path(output_dir, strip_dir, norm_src_name) + + @property + def out_extensions(self): + """ + Add support for rc and res files. + """ + return { + **super().out_extensions, + **{ext: ext + self.obj_extension for ext in ('.res', '.rc')}, + } + + +# the same as cygwin plus some additional parameters +class Mingw32CCompiler(CygwinCCompiler): + """Handles the Mingw32 port of the GNU C compiler to Windows.""" + + compiler_type = 'mingw32' + + def __init__(self, verbose=0, dry_run=0, force=0): + + super().__init__(verbose, dry_run, force) + + shared_option = "-shared" + + if is_cygwincc(self.cc): + raise CCompilerError('Cygwin gcc cannot be used with --compiler=mingw32') + + self.set_executables( + compiler='%s -O -Wall' % self.cc, + compiler_so='%s -mdll -O -Wall' % self.cc, + compiler_cxx='%s -O -Wall' % self.cxx, + linker_exe='%s' % self.cc, + linker_so='{} {}'.format(self.linker_dll, shared_option), + ) + + def runtime_library_dir_option(self, dir): + raise DistutilsPlatformError(_runtime_library_dirs_msg) + + +# Because these compilers aren't configured in Python's pyconfig.h file by +# default, we should at least warn the user if he is using an unmodified +# version. + +CONFIG_H_OK = "ok" +CONFIG_H_NOTOK = "not ok" +CONFIG_H_UNCERTAIN = "uncertain" + + +def check_config_h(): + """Check if the current Python installation appears amenable to building + extensions with GCC. + + Returns a tuple (status, details), where 'status' is one of the following + constants: + + - CONFIG_H_OK: all is well, go ahead and compile + - CONFIG_H_NOTOK: doesn't look good + - CONFIG_H_UNCERTAIN: not sure -- unable to read pyconfig.h + + 'details' is a human-readable string explaining the situation. + + Note there are two ways to conclude "OK": either 'sys.version' contains + the string "GCC" (implying that this Python was built with GCC), or the + installed "pyconfig.h" contains the string "__GNUC__". + """ + + # XXX since this function also checks sys.version, it's not strictly a + # "pyconfig.h" check -- should probably be renamed... + + from distutils import sysconfig + + # if sys.version contains GCC then python was compiled with GCC, and the + # pyconfig.h file should be OK + if "GCC" in sys.version: + return CONFIG_H_OK, "sys.version mentions 'GCC'" + + # Clang would also work + if "Clang" in sys.version: + return CONFIG_H_OK, "sys.version mentions 'Clang'" + + # let's see if __GNUC__ is mentioned in python.h + fn = sysconfig.get_config_h_filename() + try: + config_h = open(fn) + try: + if "__GNUC__" in config_h.read(): + return CONFIG_H_OK, "'%s' mentions '__GNUC__'" % fn + else: + return CONFIG_H_NOTOK, "'%s' does not mention '__GNUC__'" % fn + finally: + config_h.close() + except OSError as exc: + return (CONFIG_H_UNCERTAIN, "couldn't read '{}': {}".format(fn, exc.strerror)) + + +def is_cygwincc(cc): + '''Try to determine if the compiler that would be used is from cygwin.''' + out_string = check_output(shlex.split(cc) + ['-dumpmachine']) + return out_string.strip().endswith(b'cygwin') + + +get_versions = None +""" +A stand-in for the previous get_versions() function to prevent failures +when monkeypatched. See pypa/setuptools#2969. +""" diff --git a/libs/common/setuptools/_distutils/debug.py b/libs/common/setuptools/_distutils/debug.py new file mode 100644 index 00000000..daf1660f --- /dev/null +++ b/libs/common/setuptools/_distutils/debug.py @@ -0,0 +1,5 @@ +import os + +# If DISTUTILS_DEBUG is anything other than the empty string, we run in +# debug mode. +DEBUG = os.environ.get('DISTUTILS_DEBUG') diff --git a/libs/common/setuptools/_distutils/dep_util.py b/libs/common/setuptools/_distutils/dep_util.py new file mode 100644 index 00000000..48da8641 --- /dev/null +++ b/libs/common/setuptools/_distutils/dep_util.py @@ -0,0 +1,96 @@ +"""distutils.dep_util + +Utility functions for simple, timestamp-based dependency of files +and groups of files; also, function based entirely on such +timestamp dependency analysis.""" + +import os +from .errors import DistutilsFileError + + +def newer(source, target): + """Return true if 'source' exists and is more recently modified than + 'target', or if 'source' exists and 'target' doesn't. Return false if + both exist and 'target' is the same age or younger than 'source'. + Raise DistutilsFileError if 'source' does not exist. + """ + if not os.path.exists(source): + raise DistutilsFileError("file '%s' does not exist" % os.path.abspath(source)) + if not os.path.exists(target): + return 1 + + from stat import ST_MTIME + + mtime1 = os.stat(source)[ST_MTIME] + mtime2 = os.stat(target)[ST_MTIME] + + return mtime1 > mtime2 + + +# newer () + + +def newer_pairwise(sources, targets): + """Walk two filename lists in parallel, testing if each source is newer + than its corresponding target. Return a pair of lists (sources, + targets) where source is newer than target, according to the semantics + of 'newer()'. + """ + if len(sources) != len(targets): + raise ValueError("'sources' and 'targets' must be same length") + + # build a pair of lists (sources, targets) where source is newer + n_sources = [] + n_targets = [] + for i in range(len(sources)): + if newer(sources[i], targets[i]): + n_sources.append(sources[i]) + n_targets.append(targets[i]) + + return (n_sources, n_targets) + + +# newer_pairwise () + + +def newer_group(sources, target, missing='error'): + """Return true if 'target' is out-of-date with respect to any file + listed in 'sources'. In other words, if 'target' exists and is newer + than every file in 'sources', return false; otherwise return true. + 'missing' controls what we do when a source file is missing; the + default ("error") is to blow up with an OSError from inside 'stat()'; + if it is "ignore", we silently drop any missing source files; if it is + "newer", any missing source files make us assume that 'target' is + out-of-date (this is handy in "dry-run" mode: it'll make you pretend to + carry out commands that wouldn't work because inputs are missing, but + that doesn't matter because you're not actually going to run the + commands). + """ + # If the target doesn't even exist, then it's definitely out-of-date. + if not os.path.exists(target): + return 1 + + # Otherwise we have to find out the hard way: if *any* source file + # is more recent than 'target', then 'target' is out-of-date and + # we can immediately return true. If we fall through to the end + # of the loop, then 'target' is up-to-date and we return false. + from stat import ST_MTIME + + target_mtime = os.stat(target)[ST_MTIME] + for source in sources: + if not os.path.exists(source): + if missing == 'error': # blow up when we stat() the file + pass + elif missing == 'ignore': # missing source dropped from + continue # target's dependency list + elif missing == 'newer': # missing source means target is + return 1 # out-of-date + + source_mtime = os.stat(source)[ST_MTIME] + if source_mtime > target_mtime: + return 1 + else: + return 0 + + +# newer_group () diff --git a/libs/common/setuptools/_distutils/dir_util.py b/libs/common/setuptools/_distutils/dir_util.py new file mode 100644 index 00000000..80f77649 --- /dev/null +++ b/libs/common/setuptools/_distutils/dir_util.py @@ -0,0 +1,243 @@ +"""distutils.dir_util + +Utility functions for manipulating directories and directory trees.""" + +import os +import errno +from .errors import DistutilsInternalError, DistutilsFileError +from ._log import log + +# cache for by mkpath() -- in addition to cheapening redundant calls, +# eliminates redundant "creating /foo/bar/baz" messages in dry-run mode +_path_created = {} + + +def mkpath(name, mode=0o777, verbose=1, dry_run=0): # noqa: C901 + """Create a directory and any missing ancestor directories. + + If the directory already exists (or if 'name' is the empty string, which + means the current directory, which of course exists), then do nothing. + Raise DistutilsFileError if unable to create some directory along the way + (eg. some sub-path exists, but is a file rather than a directory). + If 'verbose' is true, print a one-line summary of each mkdir to stdout. + Return the list of directories actually created. + + os.makedirs is not used because: + + a) It's new to Python 1.5.2, and + b) it blows up if the directory already exists (in which case it should + silently succeed). + """ + + global _path_created + + # Detect a common bug -- name is None + if not isinstance(name, str): + raise DistutilsInternalError( + "mkpath: 'name' must be a string (got {!r})".format(name) + ) + + # XXX what's the better way to handle verbosity? print as we create + # each directory in the path (the current behaviour), or only announce + # the creation of the whole path? (quite easy to do the latter since + # we're not using a recursive algorithm) + + name = os.path.normpath(name) + created_dirs = [] + if os.path.isdir(name) or name == '': + return created_dirs + if _path_created.get(os.path.abspath(name)): + return created_dirs + + (head, tail) = os.path.split(name) + tails = [tail] # stack of lone dirs to create + + while head and tail and not os.path.isdir(head): + (head, tail) = os.path.split(head) + tails.insert(0, tail) # push next higher dir onto stack + + # now 'head' contains the deepest directory that already exists + # (that is, the child of 'head' in 'name' is the highest directory + # that does *not* exist) + for d in tails: + # print "head = %s, d = %s: " % (head, d), + head = os.path.join(head, d) + abs_head = os.path.abspath(head) + + if _path_created.get(abs_head): + continue + + if verbose >= 1: + log.info("creating %s", head) + + if not dry_run: + try: + os.mkdir(head, mode) + except OSError as exc: + if not (exc.errno == errno.EEXIST and os.path.isdir(head)): + raise DistutilsFileError( + "could not create '{}': {}".format(head, exc.args[-1]) + ) + created_dirs.append(head) + + _path_created[abs_head] = 1 + return created_dirs + + +def create_tree(base_dir, files, mode=0o777, verbose=1, dry_run=0): + """Create all the empty directories under 'base_dir' needed to put 'files' + there. + + 'base_dir' is just the name of a directory which doesn't necessarily + exist yet; 'files' is a list of filenames to be interpreted relative to + 'base_dir'. 'base_dir' + the directory portion of every file in 'files' + will be created if it doesn't already exist. 'mode', 'verbose' and + 'dry_run' flags are as for 'mkpath()'. + """ + # First get the list of directories to create + need_dir = set() + for file in files: + need_dir.add(os.path.join(base_dir, os.path.dirname(file))) + + # Now create them + for dir in sorted(need_dir): + mkpath(dir, mode, verbose=verbose, dry_run=dry_run) + + +def copy_tree( # noqa: C901 + src, + dst, + preserve_mode=1, + preserve_times=1, + preserve_symlinks=0, + update=0, + verbose=1, + dry_run=0, +): + """Copy an entire directory tree 'src' to a new location 'dst'. + + Both 'src' and 'dst' must be directory names. If 'src' is not a + directory, raise DistutilsFileError. If 'dst' does not exist, it is + created with 'mkpath()'. The end result of the copy is that every + file in 'src' is copied to 'dst', and directories under 'src' are + recursively copied to 'dst'. Return the list of files that were + copied or might have been copied, using their output name. The + return value is unaffected by 'update' or 'dry_run': it is simply + the list of all files under 'src', with the names changed to be + under 'dst'. + + 'preserve_mode' and 'preserve_times' are the same as for + 'copy_file'; note that they only apply to regular files, not to + directories. If 'preserve_symlinks' is true, symlinks will be + copied as symlinks (on platforms that support them!); otherwise + (the default), the destination of the symlink will be copied. + 'update' and 'verbose' are the same as for 'copy_file'. + """ + from distutils.file_util import copy_file + + if not dry_run and not os.path.isdir(src): + raise DistutilsFileError("cannot copy tree '%s': not a directory" % src) + try: + names = os.listdir(src) + except OSError as e: + if dry_run: + names = [] + else: + raise DistutilsFileError( + "error listing files in '{}': {}".format(src, e.strerror) + ) + + if not dry_run: + mkpath(dst, verbose=verbose) + + outputs = [] + + for n in names: + src_name = os.path.join(src, n) + dst_name = os.path.join(dst, n) + + if n.startswith('.nfs'): + # skip NFS rename files + continue + + if preserve_symlinks and os.path.islink(src_name): + link_dest = os.readlink(src_name) + if verbose >= 1: + log.info("linking %s -> %s", dst_name, link_dest) + if not dry_run: + os.symlink(link_dest, dst_name) + outputs.append(dst_name) + + elif os.path.isdir(src_name): + outputs.extend( + copy_tree( + src_name, + dst_name, + preserve_mode, + preserve_times, + preserve_symlinks, + update, + verbose=verbose, + dry_run=dry_run, + ) + ) + else: + copy_file( + src_name, + dst_name, + preserve_mode, + preserve_times, + update, + verbose=verbose, + dry_run=dry_run, + ) + outputs.append(dst_name) + + return outputs + + +def _build_cmdtuple(path, cmdtuples): + """Helper for remove_tree().""" + for f in os.listdir(path): + real_f = os.path.join(path, f) + if os.path.isdir(real_f) and not os.path.islink(real_f): + _build_cmdtuple(real_f, cmdtuples) + else: + cmdtuples.append((os.remove, real_f)) + cmdtuples.append((os.rmdir, path)) + + +def remove_tree(directory, verbose=1, dry_run=0): + """Recursively remove an entire directory tree. + + Any errors are ignored (apart from being reported to stdout if 'verbose' + is true). + """ + global _path_created + + if verbose >= 1: + log.info("removing '%s' (and everything under it)", directory) + if dry_run: + return + cmdtuples = [] + _build_cmdtuple(directory, cmdtuples) + for cmd in cmdtuples: + try: + cmd[0](cmd[1]) + # remove dir from cache if it's already there + abspath = os.path.abspath(cmd[1]) + if abspath in _path_created: + del _path_created[abspath] + except OSError as exc: + log.warning("error removing %s: %s", directory, exc) + + +def ensure_relative(path): + """Take the full path 'path', and make it a relative path. + + This is useful to make 'path' the second argument to os.path.join(). + """ + drive, path = os.path.splitdrive(path) + if path[0:1] == os.sep: + path = drive + path[1:] + return path diff --git a/libs/common/setuptools/_distutils/dist.py b/libs/common/setuptools/_distutils/dist.py new file mode 100644 index 00000000..d7458a05 --- /dev/null +++ b/libs/common/setuptools/_distutils/dist.py @@ -0,0 +1,1287 @@ +"""distutils.dist + +Provides the Distribution class, which represents the module distribution +being built/installed/distributed. +""" + +import sys +import os +import re +import pathlib +import contextlib +import logging +from email import message_from_file + +try: + import warnings +except ImportError: + warnings = None + +from .errors import ( + DistutilsOptionError, + DistutilsModuleError, + DistutilsArgError, + DistutilsClassError, +) +from .fancy_getopt import FancyGetopt, translate_longopt +from .util import check_environ, strtobool, rfc822_escape +from ._log import log +from .debug import DEBUG + +# Regex to define acceptable Distutils command names. This is not *quite* +# the same as a Python NAME -- I don't allow leading underscores. The fact +# that they're very similar is no coincidence; the default naming scheme is +# to look for a Python module named after the command. +command_re = re.compile(r'^[a-zA-Z]([a-zA-Z0-9_]*)$') + + +def _ensure_list(value, fieldname): + if isinstance(value, str): + # a string containing comma separated values is okay. It will + # be converted to a list by Distribution.finalize_options(). + pass + elif not isinstance(value, list): + # passing a tuple or an iterator perhaps, warn and convert + typename = type(value).__name__ + msg = "Warning: '{fieldname}' should be a list, got type '{typename}'" + msg = msg.format(**locals()) + log.warning(msg) + value = list(value) + return value + + +class Distribution: + """The core of the Distutils. Most of the work hiding behind 'setup' + is really done within a Distribution instance, which farms the work out + to the Distutils commands specified on the command line. + + Setup scripts will almost never instantiate Distribution directly, + unless the 'setup()' function is totally inadequate to their needs. + However, it is conceivable that a setup script might wish to subclass + Distribution for some specialized purpose, and then pass the subclass + to 'setup()' as the 'distclass' keyword argument. If so, it is + necessary to respect the expectations that 'setup' has of Distribution. + See the code for 'setup()', in core.py, for details. + """ + + # 'global_options' describes the command-line options that may be + # supplied to the setup script prior to any actual commands. + # Eg. "./setup.py -n" or "./setup.py --quiet" both take advantage of + # these global options. This list should be kept to a bare minimum, + # since every global option is also valid as a command option -- and we + # don't want to pollute the commands with too many options that they + # have minimal control over. + # The fourth entry for verbose means that it can be repeated. + global_options = [ + ('verbose', 'v', "run verbosely (default)", 1), + ('quiet', 'q', "run quietly (turns verbosity off)"), + ('dry-run', 'n', "don't actually do anything"), + ('help', 'h', "show detailed help message"), + ('no-user-cfg', None, 'ignore pydistutils.cfg in your home directory'), + ] + + # 'common_usage' is a short (2-3 line) string describing the common + # usage of the setup script. + common_usage = """\ +Common commands: (see '--help-commands' for more) + + setup.py build will build the package underneath 'build/' + setup.py install will install the package +""" + + # options that are not propagated to the commands + display_options = [ + ('help-commands', None, "list all available commands"), + ('name', None, "print package name"), + ('version', 'V', "print package version"), + ('fullname', None, "print -"), + ('author', None, "print the author's name"), + ('author-email', None, "print the author's email address"), + ('maintainer', None, "print the maintainer's name"), + ('maintainer-email', None, "print the maintainer's email address"), + ('contact', None, "print the maintainer's name if known, else the author's"), + ( + 'contact-email', + None, + "print the maintainer's email address if known, else the author's", + ), + ('url', None, "print the URL for this package"), + ('license', None, "print the license of the package"), + ('licence', None, "alias for --license"), + ('description', None, "print the package description"), + ('long-description', None, "print the long package description"), + ('platforms', None, "print the list of platforms"), + ('classifiers', None, "print the list of classifiers"), + ('keywords', None, "print the list of keywords"), + ('provides', None, "print the list of packages/modules provided"), + ('requires', None, "print the list of packages/modules required"), + ('obsoletes', None, "print the list of packages/modules made obsolete"), + ] + display_option_names = [translate_longopt(x[0]) for x in display_options] + + # negative options are options that exclude other options + negative_opt = {'quiet': 'verbose'} + + # -- Creation/initialization methods ------------------------------- + + def __init__(self, attrs=None): # noqa: C901 + """Construct a new Distribution instance: initialize all the + attributes of a Distribution, and then use 'attrs' (a dictionary + mapping attribute names to values) to assign some of those + attributes their "real" values. (Any attributes not mentioned in + 'attrs' will be assigned to some null value: 0, None, an empty list + or dictionary, etc.) Most importantly, initialize the + 'command_obj' attribute to the empty dictionary; this will be + filled in with real command objects by 'parse_command_line()'. + """ + + # Default values for our command-line options + self.verbose = 1 + self.dry_run = 0 + self.help = 0 + for attr in self.display_option_names: + setattr(self, attr, 0) + + # Store the distribution meta-data (name, version, author, and so + # forth) in a separate object -- we're getting to have enough + # information here (and enough command-line options) that it's + # worth it. Also delegate 'get_XXX()' methods to the 'metadata' + # object in a sneaky and underhanded (but efficient!) way. + self.metadata = DistributionMetadata() + for basename in self.metadata._METHOD_BASENAMES: + method_name = "get_" + basename + setattr(self, method_name, getattr(self.metadata, method_name)) + + # 'cmdclass' maps command names to class objects, so we + # can 1) quickly figure out which class to instantiate when + # we need to create a new command object, and 2) have a way + # for the setup script to override command classes + self.cmdclass = {} + + # 'command_packages' is a list of packages in which commands + # are searched for. The factory for command 'foo' is expected + # to be named 'foo' in the module 'foo' in one of the packages + # named here. This list is searched from the left; an error + # is raised if no named package provides the command being + # searched for. (Always access using get_command_packages().) + self.command_packages = None + + # 'script_name' and 'script_args' are usually set to sys.argv[0] + # and sys.argv[1:], but they can be overridden when the caller is + # not necessarily a setup script run from the command-line. + self.script_name = None + self.script_args = None + + # 'command_options' is where we store command options between + # parsing them (from config files, the command-line, etc.) and when + # they are actually needed -- ie. when the command in question is + # instantiated. It is a dictionary of dictionaries of 2-tuples: + # command_options = { command_name : { option : (source, value) } } + self.command_options = {} + + # 'dist_files' is the list of (command, pyversion, file) that + # have been created by any dist commands run so far. This is + # filled regardless of whether the run is dry or not. pyversion + # gives sysconfig.get_python_version() if the dist file is + # specific to a Python version, 'any' if it is good for all + # Python versions on the target platform, and '' for a source + # file. pyversion should not be used to specify minimum or + # maximum required Python versions; use the metainfo for that + # instead. + self.dist_files = [] + + # These options are really the business of various commands, rather + # than of the Distribution itself. We provide aliases for them in + # Distribution as a convenience to the developer. + self.packages = None + self.package_data = {} + self.package_dir = None + self.py_modules = None + self.libraries = None + self.headers = None + self.ext_modules = None + self.ext_package = None + self.include_dirs = None + self.extra_path = None + self.scripts = None + self.data_files = None + self.password = '' + + # And now initialize bookkeeping stuff that can't be supplied by + # the caller at all. 'command_obj' maps command names to + # Command instances -- that's how we enforce that every command + # class is a singleton. + self.command_obj = {} + + # 'have_run' maps command names to boolean values; it keeps track + # of whether we have actually run a particular command, to make it + # cheap to "run" a command whenever we think we might need to -- if + # it's already been done, no need for expensive filesystem + # operations, we just check the 'have_run' dictionary and carry on. + # It's only safe to query 'have_run' for a command class that has + # been instantiated -- a false value will be inserted when the + # command object is created, and replaced with a true value when + # the command is successfully run. Thus it's probably best to use + # '.get()' rather than a straight lookup. + self.have_run = {} + + # Now we'll use the attrs dictionary (ultimately, keyword args from + # the setup script) to possibly override any or all of these + # distribution options. + + if attrs: + # Pull out the set of command options and work on them + # specifically. Note that this order guarantees that aliased + # command options will override any supplied redundantly + # through the general options dictionary. + options = attrs.get('options') + if options is not None: + del attrs['options'] + for (command, cmd_options) in options.items(): + opt_dict = self.get_option_dict(command) + for (opt, val) in cmd_options.items(): + opt_dict[opt] = ("setup script", val) + + if 'licence' in attrs: + attrs['license'] = attrs['licence'] + del attrs['licence'] + msg = "'licence' distribution option is deprecated; use 'license'" + if warnings is not None: + warnings.warn(msg) + else: + sys.stderr.write(msg + "\n") + + # Now work on the rest of the attributes. Any attribute that's + # not already defined is invalid! + for (key, val) in attrs.items(): + if hasattr(self.metadata, "set_" + key): + getattr(self.metadata, "set_" + key)(val) + elif hasattr(self.metadata, key): + setattr(self.metadata, key, val) + elif hasattr(self, key): + setattr(self, key, val) + else: + msg = "Unknown distribution option: %s" % repr(key) + warnings.warn(msg) + + # no-user-cfg is handled before other command line args + # because other args override the config files, and this + # one is needed before we can load the config files. + # If attrs['script_args'] wasn't passed, assume false. + # + # This also make sure we just look at the global options + self.want_user_cfg = True + + if self.script_args is not None: + for arg in self.script_args: + if not arg.startswith('-'): + break + if arg == '--no-user-cfg': + self.want_user_cfg = False + break + + self.finalize_options() + + def get_option_dict(self, command): + """Get the option dictionary for a given command. If that + command's option dictionary hasn't been created yet, then create it + and return the new dictionary; otherwise, return the existing + option dictionary. + """ + dict = self.command_options.get(command) + if dict is None: + dict = self.command_options[command] = {} + return dict + + def dump_option_dicts(self, header=None, commands=None, indent=""): + from pprint import pformat + + if commands is None: # dump all command option dicts + commands = sorted(self.command_options.keys()) + + if header is not None: + self.announce(indent + header) + indent = indent + " " + + if not commands: + self.announce(indent + "no commands known yet") + return + + for cmd_name in commands: + opt_dict = self.command_options.get(cmd_name) + if opt_dict is None: + self.announce(indent + "no option dict for '%s' command" % cmd_name) + else: + self.announce(indent + "option dict for '%s' command:" % cmd_name) + out = pformat(opt_dict) + for line in out.split('\n'): + self.announce(indent + " " + line) + + # -- Config file finding/parsing methods --------------------------- + + def find_config_files(self): + """Find as many configuration files as should be processed for this + platform, and return a list of filenames in the order in which they + should be parsed. The filenames returned are guaranteed to exist + (modulo nasty race conditions). + + There are multiple possible config files: + - distutils.cfg in the Distutils installation directory (i.e. + where the top-level Distutils __inst__.py file lives) + - a file in the user's home directory named .pydistutils.cfg + on Unix and pydistutils.cfg on Windows/Mac; may be disabled + with the ``--no-user-cfg`` option + - setup.cfg in the current directory + - a file named by an environment variable + """ + check_environ() + files = [str(path) for path in self._gen_paths() if os.path.isfile(path)] + + if DEBUG: + self.announce("using config files: %s" % ', '.join(files)) + + return files + + def _gen_paths(self): + # The system-wide Distutils config file + sys_dir = pathlib.Path(sys.modules['distutils'].__file__).parent + yield sys_dir / "distutils.cfg" + + # The per-user config file + prefix = '.' * (os.name == 'posix') + filename = prefix + 'pydistutils.cfg' + if self.want_user_cfg: + yield pathlib.Path('~').expanduser() / filename + + # All platforms support local setup.cfg + yield pathlib.Path('setup.cfg') + + # Additional config indicated in the environment + with contextlib.suppress(TypeError): + yield pathlib.Path(os.getenv("DIST_EXTRA_CONFIG")) + + def parse_config_files(self, filenames=None): # noqa: C901 + from configparser import ConfigParser + + # Ignore install directory options if we have a venv + if sys.prefix != sys.base_prefix: + ignore_options = [ + 'install-base', + 'install-platbase', + 'install-lib', + 'install-platlib', + 'install-purelib', + 'install-headers', + 'install-scripts', + 'install-data', + 'prefix', + 'exec-prefix', + 'home', + 'user', + 'root', + ] + else: + ignore_options = [] + + ignore_options = frozenset(ignore_options) + + if filenames is None: + filenames = self.find_config_files() + + if DEBUG: + self.announce("Distribution.parse_config_files():") + + parser = ConfigParser() + for filename in filenames: + if DEBUG: + self.announce(" reading %s" % filename) + parser.read(filename) + for section in parser.sections(): + options = parser.options(section) + opt_dict = self.get_option_dict(section) + + for opt in options: + if opt != '__name__' and opt not in ignore_options: + val = parser.get(section, opt) + opt = opt.replace('-', '_') + opt_dict[opt] = (filename, val) + + # Make the ConfigParser forget everything (so we retain + # the original filenames that options come from) + parser.__init__() + + # If there was a "global" section in the config file, use it + # to set Distribution options. + + if 'global' in self.command_options: + for (opt, (src, val)) in self.command_options['global'].items(): + alias = self.negative_opt.get(opt) + try: + if alias: + setattr(self, alias, not strtobool(val)) + elif opt in ('verbose', 'dry_run'): # ugh! + setattr(self, opt, strtobool(val)) + else: + setattr(self, opt, val) + except ValueError as msg: + raise DistutilsOptionError(msg) + + # -- Command-line parsing methods ---------------------------------- + + def parse_command_line(self): + """Parse the setup script's command line, taken from the + 'script_args' instance attribute (which defaults to 'sys.argv[1:]' + -- see 'setup()' in core.py). This list is first processed for + "global options" -- options that set attributes of the Distribution + instance. Then, it is alternately scanned for Distutils commands + and options for that command. Each new command terminates the + options for the previous command. The allowed options for a + command are determined by the 'user_options' attribute of the + command class -- thus, we have to be able to load command classes + in order to parse the command line. Any error in that 'options' + attribute raises DistutilsGetoptError; any error on the + command-line raises DistutilsArgError. If no Distutils commands + were found on the command line, raises DistutilsArgError. Return + true if command-line was successfully parsed and we should carry + on with executing commands; false if no errors but we shouldn't + execute commands (currently, this only happens if user asks for + help). + """ + # + # We now have enough information to show the Macintosh dialog + # that allows the user to interactively specify the "command line". + # + toplevel_options = self._get_toplevel_options() + + # We have to parse the command line a bit at a time -- global + # options, then the first command, then its options, and so on -- + # because each command will be handled by a different class, and + # the options that are valid for a particular class aren't known + # until we have loaded the command class, which doesn't happen + # until we know what the command is. + + self.commands = [] + parser = FancyGetopt(toplevel_options + self.display_options) + parser.set_negative_aliases(self.negative_opt) + parser.set_aliases({'licence': 'license'}) + args = parser.getopt(args=self.script_args, object=self) + option_order = parser.get_option_order() + logging.getLogger().setLevel(logging.WARN - 10 * self.verbose) + + # for display options we return immediately + if self.handle_display_options(option_order): + return + while args: + args = self._parse_command_opts(parser, args) + if args is None: # user asked for help (and got it) + return + + # Handle the cases of --help as a "global" option, ie. + # "setup.py --help" and "setup.py --help command ...". For the + # former, we show global options (--verbose, --dry-run, etc.) + # and display-only options (--name, --version, etc.); for the + # latter, we omit the display-only options and show help for + # each command listed on the command line. + if self.help: + self._show_help( + parser, display_options=len(self.commands) == 0, commands=self.commands + ) + return + + # Oops, no commands found -- an end-user error + if not self.commands: + raise DistutilsArgError("no commands supplied") + + # All is well: return true + return True + + def _get_toplevel_options(self): + """Return the non-display options recognized at the top level. + + This includes options that are recognized *only* at the top + level as well as options recognized for commands. + """ + return self.global_options + [ + ( + "command-packages=", + None, + "list of packages that provide distutils commands", + ), + ] + + def _parse_command_opts(self, parser, args): # noqa: C901 + """Parse the command-line options for a single command. + 'parser' must be a FancyGetopt instance; 'args' must be the list + of arguments, starting with the current command (whose options + we are about to parse). Returns a new version of 'args' with + the next command at the front of the list; will be the empty + list if there are no more commands on the command line. Returns + None if the user asked for help on this command. + """ + # late import because of mutual dependence between these modules + from distutils.cmd import Command + + # Pull the current command from the head of the command line + command = args[0] + if not command_re.match(command): + raise SystemExit("invalid command name '%s'" % command) + self.commands.append(command) + + # Dig up the command class that implements this command, so we + # 1) know that it's a valid command, and 2) know which options + # it takes. + try: + cmd_class = self.get_command_class(command) + except DistutilsModuleError as msg: + raise DistutilsArgError(msg) + + # Require that the command class be derived from Command -- want + # to be sure that the basic "command" interface is implemented. + if not issubclass(cmd_class, Command): + raise DistutilsClassError( + "command class %s must subclass Command" % cmd_class + ) + + # Also make sure that the command object provides a list of its + # known options. + if not ( + hasattr(cmd_class, 'user_options') + and isinstance(cmd_class.user_options, list) + ): + msg = ( + "command class %s must provide " + "'user_options' attribute (a list of tuples)" + ) + raise DistutilsClassError(msg % cmd_class) + + # If the command class has a list of negative alias options, + # merge it in with the global negative aliases. + negative_opt = self.negative_opt + if hasattr(cmd_class, 'negative_opt'): + negative_opt = negative_opt.copy() + negative_opt.update(cmd_class.negative_opt) + + # Check for help_options in command class. They have a different + # format (tuple of four) so we need to preprocess them here. + if hasattr(cmd_class, 'help_options') and isinstance( + cmd_class.help_options, list + ): + help_options = fix_help_options(cmd_class.help_options) + else: + help_options = [] + + # All commands support the global options too, just by adding + # in 'global_options'. + parser.set_option_table( + self.global_options + cmd_class.user_options + help_options + ) + parser.set_negative_aliases(negative_opt) + (args, opts) = parser.getopt(args[1:]) + if hasattr(opts, 'help') and opts.help: + self._show_help(parser, display_options=0, commands=[cmd_class]) + return + + if hasattr(cmd_class, 'help_options') and isinstance( + cmd_class.help_options, list + ): + help_option_found = 0 + for (help_option, short, desc, func) in cmd_class.help_options: + if hasattr(opts, parser.get_attr_name(help_option)): + help_option_found = 1 + if callable(func): + func() + else: + raise DistutilsClassError( + "invalid help function %r for help option '%s': " + "must be a callable object (function, etc.)" + % (func, help_option) + ) + + if help_option_found: + return + + # Put the options from the command-line into their official + # holding pen, the 'command_options' dictionary. + opt_dict = self.get_option_dict(command) + for (name, value) in vars(opts).items(): + opt_dict[name] = ("command line", value) + + return args + + def finalize_options(self): + """Set final values for all the options on the Distribution + instance, analogous to the .finalize_options() method of Command + objects. + """ + for attr in ('keywords', 'platforms'): + value = getattr(self.metadata, attr) + if value is None: + continue + if isinstance(value, str): + value = [elm.strip() for elm in value.split(',')] + setattr(self.metadata, attr, value) + + def _show_help(self, parser, global_options=1, display_options=1, commands=[]): + """Show help for the setup script command-line in the form of + several lists of command-line options. 'parser' should be a + FancyGetopt instance; do not expect it to be returned in the + same state, as its option table will be reset to make it + generate the correct help text. + + If 'global_options' is true, lists the global options: + --verbose, --dry-run, etc. If 'display_options' is true, lists + the "display-only" options: --name, --version, etc. Finally, + lists per-command help for every command name or command class + in 'commands'. + """ + # late import because of mutual dependence between these modules + from distutils.core import gen_usage + from distutils.cmd import Command + + if global_options: + if display_options: + options = self._get_toplevel_options() + else: + options = self.global_options + parser.set_option_table(options) + parser.print_help(self.common_usage + "\nGlobal options:") + print('') + + if display_options: + parser.set_option_table(self.display_options) + parser.print_help( + "Information display options (just display " + + "information, ignore any commands)" + ) + print('') + + for command in self.commands: + if isinstance(command, type) and issubclass(command, Command): + klass = command + else: + klass = self.get_command_class(command) + if hasattr(klass, 'help_options') and isinstance(klass.help_options, list): + parser.set_option_table( + klass.user_options + fix_help_options(klass.help_options) + ) + else: + parser.set_option_table(klass.user_options) + parser.print_help("Options for '%s' command:" % klass.__name__) + print('') + + print(gen_usage(self.script_name)) + + def handle_display_options(self, option_order): + """If there were any non-global "display-only" options + (--help-commands or the metadata display options) on the command + line, display the requested info and return true; else return + false. + """ + from distutils.core import gen_usage + + # User just wants a list of commands -- we'll print it out and stop + # processing now (ie. if they ran "setup --help-commands foo bar", + # we ignore "foo bar"). + if self.help_commands: + self.print_commands() + print('') + print(gen_usage(self.script_name)) + return 1 + + # If user supplied any of the "display metadata" options, then + # display that metadata in the order in which the user supplied the + # metadata options. + any_display_options = 0 + is_display_option = {} + for option in self.display_options: + is_display_option[option[0]] = 1 + + for (opt, val) in option_order: + if val and is_display_option.get(opt): + opt = translate_longopt(opt) + value = getattr(self.metadata, "get_" + opt)() + if opt in ['keywords', 'platforms']: + print(','.join(value)) + elif opt in ('classifiers', 'provides', 'requires', 'obsoletes'): + print('\n'.join(value)) + else: + print(value) + any_display_options = 1 + + return any_display_options + + def print_command_list(self, commands, header, max_length): + """Print a subset of the list of all commands -- used by + 'print_commands()'. + """ + print(header + ":") + + for cmd in commands: + klass = self.cmdclass.get(cmd) + if not klass: + klass = self.get_command_class(cmd) + try: + description = klass.description + except AttributeError: + description = "(no description available)" + + print(" %-*s %s" % (max_length, cmd, description)) + + def print_commands(self): + """Print out a help message listing all available commands with a + description of each. The list is divided into "standard commands" + (listed in distutils.command.__all__) and "extra commands" + (mentioned in self.cmdclass, but not a standard command). The + descriptions come from the command class attribute + 'description'. + """ + import distutils.command + + std_commands = distutils.command.__all__ + is_std = {} + for cmd in std_commands: + is_std[cmd] = 1 + + extra_commands = [] + for cmd in self.cmdclass.keys(): + if not is_std.get(cmd): + extra_commands.append(cmd) + + max_length = 0 + for cmd in std_commands + extra_commands: + if len(cmd) > max_length: + max_length = len(cmd) + + self.print_command_list(std_commands, "Standard commands", max_length) + if extra_commands: + print() + self.print_command_list(extra_commands, "Extra commands", max_length) + + def get_command_list(self): + """Get a list of (command, description) tuples. + The list is divided into "standard commands" (listed in + distutils.command.__all__) and "extra commands" (mentioned in + self.cmdclass, but not a standard command). The descriptions come + from the command class attribute 'description'. + """ + # Currently this is only used on Mac OS, for the Mac-only GUI + # Distutils interface (by Jack Jansen) + import distutils.command + + std_commands = distutils.command.__all__ + is_std = {} + for cmd in std_commands: + is_std[cmd] = 1 + + extra_commands = [] + for cmd in self.cmdclass.keys(): + if not is_std.get(cmd): + extra_commands.append(cmd) + + rv = [] + for cmd in std_commands + extra_commands: + klass = self.cmdclass.get(cmd) + if not klass: + klass = self.get_command_class(cmd) + try: + description = klass.description + except AttributeError: + description = "(no description available)" + rv.append((cmd, description)) + return rv + + # -- Command class/object methods ---------------------------------- + + def get_command_packages(self): + """Return a list of packages from which commands are loaded.""" + pkgs = self.command_packages + if not isinstance(pkgs, list): + if pkgs is None: + pkgs = '' + pkgs = [pkg.strip() for pkg in pkgs.split(',') if pkg != ''] + if "distutils.command" not in pkgs: + pkgs.insert(0, "distutils.command") + self.command_packages = pkgs + return pkgs + + def get_command_class(self, command): + """Return the class that implements the Distutils command named by + 'command'. First we check the 'cmdclass' dictionary; if the + command is mentioned there, we fetch the class object from the + dictionary and return it. Otherwise we load the command module + ("distutils.command." + command) and fetch the command class from + the module. The loaded class is also stored in 'cmdclass' + to speed future calls to 'get_command_class()'. + + Raises DistutilsModuleError if the expected module could not be + found, or if that module does not define the expected class. + """ + klass = self.cmdclass.get(command) + if klass: + return klass + + for pkgname in self.get_command_packages(): + module_name = "{}.{}".format(pkgname, command) + klass_name = command + + try: + __import__(module_name) + module = sys.modules[module_name] + except ImportError: + continue + + try: + klass = getattr(module, klass_name) + except AttributeError: + raise DistutilsModuleError( + "invalid command '%s' (no class '%s' in module '%s')" + % (command, klass_name, module_name) + ) + + self.cmdclass[command] = klass + return klass + + raise DistutilsModuleError("invalid command '%s'" % command) + + def get_command_obj(self, command, create=1): + """Return the command object for 'command'. Normally this object + is cached on a previous call to 'get_command_obj()'; if no command + object for 'command' is in the cache, then we either create and + return it (if 'create' is true) or return None. + """ + cmd_obj = self.command_obj.get(command) + if not cmd_obj and create: + if DEBUG: + self.announce( + "Distribution.get_command_obj(): " + "creating '%s' command object" % command + ) + + klass = self.get_command_class(command) + cmd_obj = self.command_obj[command] = klass(self) + self.have_run[command] = 0 + + # Set any options that were supplied in config files + # or on the command line. (NB. support for error + # reporting is lame here: any errors aren't reported + # until 'finalize_options()' is called, which means + # we won't report the source of the error.) + options = self.command_options.get(command) + if options: + self._set_command_options(cmd_obj, options) + + return cmd_obj + + def _set_command_options(self, command_obj, option_dict=None): # noqa: C901 + """Set the options for 'command_obj' from 'option_dict'. Basically + this means copying elements of a dictionary ('option_dict') to + attributes of an instance ('command'). + + 'command_obj' must be a Command instance. If 'option_dict' is not + supplied, uses the standard option dictionary for this command + (from 'self.command_options'). + """ + command_name = command_obj.get_command_name() + if option_dict is None: + option_dict = self.get_option_dict(command_name) + + if DEBUG: + self.announce(" setting options for '%s' command:" % command_name) + for (option, (source, value)) in option_dict.items(): + if DEBUG: + self.announce(" {} = {} (from {})".format(option, value, source)) + try: + bool_opts = [translate_longopt(o) for o in command_obj.boolean_options] + except AttributeError: + bool_opts = [] + try: + neg_opt = command_obj.negative_opt + except AttributeError: + neg_opt = {} + + try: + is_string = isinstance(value, str) + if option in neg_opt and is_string: + setattr(command_obj, neg_opt[option], not strtobool(value)) + elif option in bool_opts and is_string: + setattr(command_obj, option, strtobool(value)) + elif hasattr(command_obj, option): + setattr(command_obj, option, value) + else: + raise DistutilsOptionError( + "error in %s: command '%s' has no such option '%s'" + % (source, command_name, option) + ) + except ValueError as msg: + raise DistutilsOptionError(msg) + + def reinitialize_command(self, command, reinit_subcommands=0): + """Reinitializes a command to the state it was in when first + returned by 'get_command_obj()': ie., initialized but not yet + finalized. This provides the opportunity to sneak option + values in programmatically, overriding or supplementing + user-supplied values from the config files and command line. + You'll have to re-finalize the command object (by calling + 'finalize_options()' or 'ensure_finalized()') before using it for + real. + + 'command' should be a command name (string) or command object. If + 'reinit_subcommands' is true, also reinitializes the command's + sub-commands, as declared by the 'sub_commands' class attribute (if + it has one). See the "install" command for an example. Only + reinitializes the sub-commands that actually matter, ie. those + whose test predicates return true. + + Returns the reinitialized command object. + """ + from distutils.cmd import Command + + if not isinstance(command, Command): + command_name = command + command = self.get_command_obj(command_name) + else: + command_name = command.get_command_name() + + if not command.finalized: + return command + command.initialize_options() + command.finalized = 0 + self.have_run[command_name] = 0 + self._set_command_options(command) + + if reinit_subcommands: + for sub in command.get_sub_commands(): + self.reinitialize_command(sub, reinit_subcommands) + + return command + + # -- Methods that operate on the Distribution ---------------------- + + def announce(self, msg, level=logging.INFO): + log.log(level, msg) + + def run_commands(self): + """Run each command that was seen on the setup script command line. + Uses the list of commands found and cache of command objects + created by 'get_command_obj()'. + """ + for cmd in self.commands: + self.run_command(cmd) + + # -- Methods that operate on its Commands -------------------------- + + def run_command(self, command): + """Do whatever it takes to run a command (including nothing at all, + if the command has already been run). Specifically: if we have + already created and run the command named by 'command', return + silently without doing anything. If the command named by 'command' + doesn't even have a command object yet, create one. Then invoke + 'run()' on that command object (or an existing one). + """ + # Already been here, done that? then return silently. + if self.have_run.get(command): + return + + log.info("running %s", command) + cmd_obj = self.get_command_obj(command) + cmd_obj.ensure_finalized() + cmd_obj.run() + self.have_run[command] = 1 + + # -- Distribution query methods ------------------------------------ + + def has_pure_modules(self): + return len(self.packages or self.py_modules or []) > 0 + + def has_ext_modules(self): + return self.ext_modules and len(self.ext_modules) > 0 + + def has_c_libraries(self): + return self.libraries and len(self.libraries) > 0 + + def has_modules(self): + return self.has_pure_modules() or self.has_ext_modules() + + def has_headers(self): + return self.headers and len(self.headers) > 0 + + def has_scripts(self): + return self.scripts and len(self.scripts) > 0 + + def has_data_files(self): + return self.data_files and len(self.data_files) > 0 + + def is_pure(self): + return ( + self.has_pure_modules() + and not self.has_ext_modules() + and not self.has_c_libraries() + ) + + # -- Metadata query methods ---------------------------------------- + + # If you're looking for 'get_name()', 'get_version()', and so forth, + # they are defined in a sneaky way: the constructor binds self.get_XXX + # to self.metadata.get_XXX. The actual code is in the + # DistributionMetadata class, below. + + +class DistributionMetadata: + """Dummy class to hold the distribution meta-data: name, version, + author, and so forth. + """ + + _METHOD_BASENAMES = ( + "name", + "version", + "author", + "author_email", + "maintainer", + "maintainer_email", + "url", + "license", + "description", + "long_description", + "keywords", + "platforms", + "fullname", + "contact", + "contact_email", + "classifiers", + "download_url", + # PEP 314 + "provides", + "requires", + "obsoletes", + ) + + def __init__(self, path=None): + if path is not None: + self.read_pkg_file(open(path)) + else: + self.name = None + self.version = None + self.author = None + self.author_email = None + self.maintainer = None + self.maintainer_email = None + self.url = None + self.license = None + self.description = None + self.long_description = None + self.keywords = None + self.platforms = None + self.classifiers = None + self.download_url = None + # PEP 314 + self.provides = None + self.requires = None + self.obsoletes = None + + def read_pkg_file(self, file): + """Reads the metadata values from a file object.""" + msg = message_from_file(file) + + def _read_field(name): + value = msg[name] + if value and value != "UNKNOWN": + return value + + def _read_list(name): + values = msg.get_all(name, None) + if values == []: + return None + return values + + metadata_version = msg['metadata-version'] + self.name = _read_field('name') + self.version = _read_field('version') + self.description = _read_field('summary') + # we are filling author only. + self.author = _read_field('author') + self.maintainer = None + self.author_email = _read_field('author-email') + self.maintainer_email = None + self.url = _read_field('home-page') + self.license = _read_field('license') + + if 'download-url' in msg: + self.download_url = _read_field('download-url') + else: + self.download_url = None + + self.long_description = _read_field('description') + self.description = _read_field('summary') + + if 'keywords' in msg: + self.keywords = _read_field('keywords').split(',') + + self.platforms = _read_list('platform') + self.classifiers = _read_list('classifier') + + # PEP 314 - these fields only exist in 1.1 + if metadata_version == '1.1': + self.requires = _read_list('requires') + self.provides = _read_list('provides') + self.obsoletes = _read_list('obsoletes') + else: + self.requires = None + self.provides = None + self.obsoletes = None + + def write_pkg_info(self, base_dir): + """Write the PKG-INFO file into the release tree.""" + with open( + os.path.join(base_dir, 'PKG-INFO'), 'w', encoding='UTF-8' + ) as pkg_info: + self.write_pkg_file(pkg_info) + + def write_pkg_file(self, file): + """Write the PKG-INFO format data to a file object.""" + version = '1.0' + if ( + self.provides + or self.requires + or self.obsoletes + or self.classifiers + or self.download_url + ): + version = '1.1' + + # required fields + file.write('Metadata-Version: %s\n' % version) + file.write('Name: %s\n' % self.get_name()) + file.write('Version: %s\n' % self.get_version()) + + def maybe_write(header, val): + if val: + file.write(f"{header}: {val}\n") + + # optional fields + maybe_write("Summary", self.get_description()) + maybe_write("Home-page", self.get_url()) + maybe_write("Author", self.get_contact()) + maybe_write("Author-email", self.get_contact_email()) + maybe_write("License", self.get_license()) + maybe_write("Download-URL", self.download_url) + maybe_write("Description", rfc822_escape(self.get_long_description() or "")) + maybe_write("Keywords", ",".join(self.get_keywords())) + + self._write_list(file, 'Platform', self.get_platforms()) + self._write_list(file, 'Classifier', self.get_classifiers()) + + # PEP 314 + self._write_list(file, 'Requires', self.get_requires()) + self._write_list(file, 'Provides', self.get_provides()) + self._write_list(file, 'Obsoletes', self.get_obsoletes()) + + def _write_list(self, file, name, values): + values = values or [] + for value in values: + file.write('{}: {}\n'.format(name, value)) + + # -- Metadata query methods ---------------------------------------- + + def get_name(self): + return self.name or "UNKNOWN" + + def get_version(self): + return self.version or "0.0.0" + + def get_fullname(self): + return "{}-{}".format(self.get_name(), self.get_version()) + + def get_author(self): + return self.author + + def get_author_email(self): + return self.author_email + + def get_maintainer(self): + return self.maintainer + + def get_maintainer_email(self): + return self.maintainer_email + + def get_contact(self): + return self.maintainer or self.author + + def get_contact_email(self): + return self.maintainer_email or self.author_email + + def get_url(self): + return self.url + + def get_license(self): + return self.license + + get_licence = get_license + + def get_description(self): + return self.description + + def get_long_description(self): + return self.long_description + + def get_keywords(self): + return self.keywords or [] + + def set_keywords(self, value): + self.keywords = _ensure_list(value, 'keywords') + + def get_platforms(self): + return self.platforms + + def set_platforms(self, value): + self.platforms = _ensure_list(value, 'platforms') + + def get_classifiers(self): + return self.classifiers or [] + + def set_classifiers(self, value): + self.classifiers = _ensure_list(value, 'classifiers') + + def get_download_url(self): + return self.download_url + + # PEP 314 + def get_requires(self): + return self.requires or [] + + def set_requires(self, value): + import distutils.versionpredicate + + for v in value: + distutils.versionpredicate.VersionPredicate(v) + self.requires = list(value) + + def get_provides(self): + return self.provides or [] + + def set_provides(self, value): + value = [v.strip() for v in value] + for v in value: + import distutils.versionpredicate + + distutils.versionpredicate.split_provision(v) + self.provides = value + + def get_obsoletes(self): + return self.obsoletes or [] + + def set_obsoletes(self, value): + import distutils.versionpredicate + + for v in value: + distutils.versionpredicate.VersionPredicate(v) + self.obsoletes = list(value) + + +def fix_help_options(options): + """Convert a 4-tuple 'help_options' list as found in various command + classes to the 3-tuple form required by FancyGetopt. + """ + new_options = [] + for help_tuple in options: + new_options.append(help_tuple[0:3]) + return new_options diff --git a/libs/common/setuptools/_distutils/errors.py b/libs/common/setuptools/_distutils/errors.py new file mode 100644 index 00000000..626254c3 --- /dev/null +++ b/libs/common/setuptools/_distutils/errors.py @@ -0,0 +1,127 @@ +"""distutils.errors + +Provides exceptions used by the Distutils modules. Note that Distutils +modules may raise standard exceptions; in particular, SystemExit is +usually raised for errors that are obviously the end-user's fault +(eg. bad command-line arguments). + +This module is safe to use in "from ... import *" mode; it only exports +symbols whose names start with "Distutils" and end with "Error".""" + + +class DistutilsError(Exception): + """The root of all Distutils evil.""" + + pass + + +class DistutilsModuleError(DistutilsError): + """Unable to load an expected module, or to find an expected class + within some module (in particular, command modules and classes).""" + + pass + + +class DistutilsClassError(DistutilsError): + """Some command class (or possibly distribution class, if anyone + feels a need to subclass Distribution) is found not to be holding + up its end of the bargain, ie. implementing some part of the + "command "interface.""" + + pass + + +class DistutilsGetoptError(DistutilsError): + """The option table provided to 'fancy_getopt()' is bogus.""" + + pass + + +class DistutilsArgError(DistutilsError): + """Raised by fancy_getopt in response to getopt.error -- ie. an + error in the command line usage.""" + + pass + + +class DistutilsFileError(DistutilsError): + """Any problems in the filesystem: expected file not found, etc. + Typically this is for problems that we detect before OSError + could be raised.""" + + pass + + +class DistutilsOptionError(DistutilsError): + """Syntactic/semantic errors in command options, such as use of + mutually conflicting options, or inconsistent options, + badly-spelled values, etc. No distinction is made between option + values originating in the setup script, the command line, config + files, or what-have-you -- but if we *know* something originated in + the setup script, we'll raise DistutilsSetupError instead.""" + + pass + + +class DistutilsSetupError(DistutilsError): + """For errors that can be definitely blamed on the setup script, + such as invalid keyword arguments to 'setup()'.""" + + pass + + +class DistutilsPlatformError(DistutilsError): + """We don't know how to do something on the current platform (but + we do know how to do it on some platform) -- eg. trying to compile + C files on a platform not supported by a CCompiler subclass.""" + + pass + + +class DistutilsExecError(DistutilsError): + """Any problems executing an external program (such as the C + compiler, when compiling C files).""" + + pass + + +class DistutilsInternalError(DistutilsError): + """Internal inconsistencies or impossibilities (obviously, this + should never be seen if the code is working!).""" + + pass + + +class DistutilsTemplateError(DistutilsError): + """Syntax error in a file list template.""" + + +class DistutilsByteCompileError(DistutilsError): + """Byte compile error.""" + + +# Exception classes used by the CCompiler implementation classes +class CCompilerError(Exception): + """Some compile/link operation failed.""" + + +class PreprocessError(CCompilerError): + """Failure to preprocess one or more C/C++ files.""" + + +class CompileError(CCompilerError): + """Failure to compile one or more C/C++ source files.""" + + +class LibError(CCompilerError): + """Failure to create a static library from one or more C/C++ object + files.""" + + +class LinkError(CCompilerError): + """Failure to link one or more C/C++ object files into an executable + or shared library file.""" + + +class UnknownFileError(CCompilerError): + """Attempt to process an unknown file type.""" diff --git a/libs/common/setuptools/_distutils/extension.py b/libs/common/setuptools/_distutils/extension.py new file mode 100644 index 00000000..6b8575de --- /dev/null +++ b/libs/common/setuptools/_distutils/extension.py @@ -0,0 +1,248 @@ +"""distutils.extension + +Provides the Extension class, used to describe C/C++ extension +modules in setup scripts.""" + +import os +import warnings + +# This class is really only used by the "build_ext" command, so it might +# make sense to put it in distutils.command.build_ext. However, that +# module is already big enough, and I want to make this class a bit more +# complex to simplify some common cases ("foo" module in "foo.c") and do +# better error-checking ("foo.c" actually exists). +# +# Also, putting this in build_ext.py means every setup script would have to +# import that large-ish module (indirectly, through distutils.core) in +# order to do anything. + + +class Extension: + """Just a collection of attributes that describes an extension + module and everything needed to build it (hopefully in a portable + way, but there are hooks that let you be as unportable as you need). + + Instance attributes: + name : string + the full name of the extension, including any packages -- ie. + *not* a filename or pathname, but Python dotted name + sources : [string] + list of source filenames, relative to the distribution root + (where the setup script lives), in Unix form (slash-separated) + for portability. Source files may be C, C++, SWIG (.i), + platform-specific resource files, or whatever else is recognized + by the "build_ext" command as source for a Python extension. + include_dirs : [string] + list of directories to search for C/C++ header files (in Unix + form for portability) + define_macros : [(name : string, value : string|None)] + list of macros to define; each macro is defined using a 2-tuple, + where 'value' is either the string to define it to or None to + define it without a particular value (equivalent of "#define + FOO" in source or -DFOO on Unix C compiler command line) + undef_macros : [string] + list of macros to undefine explicitly + library_dirs : [string] + list of directories to search for C/C++ libraries at link time + libraries : [string] + list of library names (not filenames or paths) to link against + runtime_library_dirs : [string] + list of directories to search for C/C++ libraries at run time + (for shared extensions, this is when the extension is loaded) + extra_objects : [string] + list of extra files to link with (eg. object files not implied + by 'sources', static library that must be explicitly specified, + binary resource files, etc.) + extra_compile_args : [string] + any extra platform- and compiler-specific information to use + when compiling the source files in 'sources'. For platforms and + compilers where "command line" makes sense, this is typically a + list of command-line arguments, but for other platforms it could + be anything. + extra_link_args : [string] + any extra platform- and compiler-specific information to use + when linking object files together to create the extension (or + to create a new static Python interpreter). Similar + interpretation as for 'extra_compile_args'. + export_symbols : [string] + list of symbols to be exported from a shared extension. Not + used on all platforms, and not generally necessary for Python + extensions, which typically export exactly one symbol: "init" + + extension_name. + swig_opts : [string] + any extra options to pass to SWIG if a source file has the .i + extension. + depends : [string] + list of files that the extension depends on + language : string + extension language (i.e. "c", "c++", "objc"). Will be detected + from the source extensions if not provided. + optional : boolean + specifies that a build failure in the extension should not abort the + build process, but simply not install the failing extension. + """ + + # When adding arguments to this constructor, be sure to update + # setup_keywords in core.py. + def __init__( + self, + name, + sources, + include_dirs=None, + define_macros=None, + undef_macros=None, + library_dirs=None, + libraries=None, + runtime_library_dirs=None, + extra_objects=None, + extra_compile_args=None, + extra_link_args=None, + export_symbols=None, + swig_opts=None, + depends=None, + language=None, + optional=None, + **kw # To catch unknown keywords + ): + if not isinstance(name, str): + raise AssertionError("'name' must be a string") + if not (isinstance(sources, list) and all(isinstance(v, str) for v in sources)): + raise AssertionError("'sources' must be a list of strings") + + self.name = name + self.sources = sources + self.include_dirs = include_dirs or [] + self.define_macros = define_macros or [] + self.undef_macros = undef_macros or [] + self.library_dirs = library_dirs or [] + self.libraries = libraries or [] + self.runtime_library_dirs = runtime_library_dirs or [] + self.extra_objects = extra_objects or [] + self.extra_compile_args = extra_compile_args or [] + self.extra_link_args = extra_link_args or [] + self.export_symbols = export_symbols or [] + self.swig_opts = swig_opts or [] + self.depends = depends or [] + self.language = language + self.optional = optional + + # If there are unknown keyword options, warn about them + if len(kw) > 0: + options = [repr(option) for option in kw] + options = ', '.join(sorted(options)) + msg = "Unknown Extension options: %s" % options + warnings.warn(msg) + + def __repr__(self): + return '<{}.{}({!r}) at {:#x}>'.format( + self.__class__.__module__, + self.__class__.__qualname__, + self.name, + id(self), + ) + + +def read_setup_file(filename): # noqa: C901 + """Reads a Setup file and returns Extension instances.""" + from distutils.sysconfig import parse_makefile, expand_makefile_vars, _variable_rx + + from distutils.text_file import TextFile + from distutils.util import split_quoted + + # First pass over the file to gather "VAR = VALUE" assignments. + vars = parse_makefile(filename) + + # Second pass to gobble up the real content: lines of the form + # ... [ ...] [ ...] [ ...] + file = TextFile( + filename, + strip_comments=1, + skip_blanks=1, + join_lines=1, + lstrip_ws=1, + rstrip_ws=1, + ) + try: + extensions = [] + + while True: + line = file.readline() + if line is None: # eof + break + if _variable_rx.match(line): # VAR=VALUE, handled in first pass + continue + + if line[0] == line[-1] == "*": + file.warn("'%s' lines not handled yet" % line) + continue + + line = expand_makefile_vars(line, vars) + words = split_quoted(line) + + # NB. this parses a slightly different syntax than the old + # makesetup script: here, there must be exactly one extension per + # line, and it must be the first word of the line. I have no idea + # why the old syntax supported multiple extensions per line, as + # they all wind up being the same. + + module = words[0] + ext = Extension(module, []) + append_next_word = None + + for word in words[1:]: + if append_next_word is not None: + append_next_word.append(word) + append_next_word = None + continue + + suffix = os.path.splitext(word)[1] + switch = word[0:2] + value = word[2:] + + if suffix in (".c", ".cc", ".cpp", ".cxx", ".c++", ".m", ".mm"): + # hmm, should we do something about C vs. C++ sources? + # or leave it up to the CCompiler implementation to + # worry about? + ext.sources.append(word) + elif switch == "-I": + ext.include_dirs.append(value) + elif switch == "-D": + equals = value.find("=") + if equals == -1: # bare "-DFOO" -- no value + ext.define_macros.append((value, None)) + else: # "-DFOO=blah" + ext.define_macros.append((value[0:equals], value[equals + 2 :])) + elif switch == "-U": + ext.undef_macros.append(value) + elif switch == "-C": # only here 'cause makesetup has it! + ext.extra_compile_args.append(word) + elif switch == "-l": + ext.libraries.append(value) + elif switch == "-L": + ext.library_dirs.append(value) + elif switch == "-R": + ext.runtime_library_dirs.append(value) + elif word == "-rpath": + append_next_word = ext.runtime_library_dirs + elif word == "-Xlinker": + append_next_word = ext.extra_link_args + elif word == "-Xcompiler": + append_next_word = ext.extra_compile_args + elif switch == "-u": + ext.extra_link_args.append(word) + if not value: + append_next_word = ext.extra_link_args + elif suffix in (".a", ".so", ".sl", ".o", ".dylib"): + # NB. a really faithful emulation of makesetup would + # append a .o file to extra_objects only if it + # had a slash in it; otherwise, it would s/.o/.c/ + # and append it to sources. Hmmmm. + ext.extra_objects.append(word) + else: + file.warn("unrecognized argument '%s'" % word) + + extensions.append(ext) + finally: + file.close() + + return extensions diff --git a/libs/common/setuptools/_distutils/fancy_getopt.py b/libs/common/setuptools/_distutils/fancy_getopt.py new file mode 100644 index 00000000..6abb884d --- /dev/null +++ b/libs/common/setuptools/_distutils/fancy_getopt.py @@ -0,0 +1,470 @@ +"""distutils.fancy_getopt + +Wrapper around the standard getopt module that provides the following +additional features: + * short and long options are tied together + * options have help strings, so fancy_getopt could potentially + create a complete usage summary + * options set attributes of a passed-in object +""" + +import sys +import string +import re +import getopt +from .errors import DistutilsGetoptError, DistutilsArgError + +# Much like command_re in distutils.core, this is close to but not quite +# the same as a Python NAME -- except, in the spirit of most GNU +# utilities, we use '-' in place of '_'. (The spirit of LISP lives on!) +# The similarities to NAME are again not a coincidence... +longopt_pat = r'[a-zA-Z](?:[a-zA-Z0-9-]*)' +longopt_re = re.compile(r'^%s$' % longopt_pat) + +# For recognizing "negative alias" options, eg. "quiet=!verbose" +neg_alias_re = re.compile("^({})=!({})$".format(longopt_pat, longopt_pat)) + +# This is used to translate long options to legitimate Python identifiers +# (for use as attributes of some object). +longopt_xlate = str.maketrans('-', '_') + + +class FancyGetopt: + """Wrapper around the standard 'getopt()' module that provides some + handy extra functionality: + * short and long options are tied together + * options have help strings, and help text can be assembled + from them + * options set attributes of a passed-in object + * boolean options can have "negative aliases" -- eg. if + --quiet is the "negative alias" of --verbose, then "--quiet" + on the command line sets 'verbose' to false + """ + + def __init__(self, option_table=None): + # The option table is (currently) a list of tuples. The + # tuples may have 3 or four values: + # (long_option, short_option, help_string [, repeatable]) + # if an option takes an argument, its long_option should have '=' + # appended; short_option should just be a single character, no ':' + # in any case. If a long_option doesn't have a corresponding + # short_option, short_option should be None. All option tuples + # must have long options. + self.option_table = option_table + + # 'option_index' maps long option names to entries in the option + # table (ie. those 3-tuples). + self.option_index = {} + if self.option_table: + self._build_index() + + # 'alias' records (duh) alias options; {'foo': 'bar'} means + # --foo is an alias for --bar + self.alias = {} + + # 'negative_alias' keeps track of options that are the boolean + # opposite of some other option + self.negative_alias = {} + + # These keep track of the information in the option table. We + # don't actually populate these structures until we're ready to + # parse the command-line, since the 'option_table' passed in here + # isn't necessarily the final word. + self.short_opts = [] + self.long_opts = [] + self.short2long = {} + self.attr_name = {} + self.takes_arg = {} + + # And 'option_order' is filled up in 'getopt()'; it records the + # original order of options (and their values) on the command-line, + # but expands short options, converts aliases, etc. + self.option_order = [] + + def _build_index(self): + self.option_index.clear() + for option in self.option_table: + self.option_index[option[0]] = option + + def set_option_table(self, option_table): + self.option_table = option_table + self._build_index() + + def add_option(self, long_option, short_option=None, help_string=None): + if long_option in self.option_index: + raise DistutilsGetoptError( + "option conflict: already an option '%s'" % long_option + ) + else: + option = (long_option, short_option, help_string) + self.option_table.append(option) + self.option_index[long_option] = option + + def has_option(self, long_option): + """Return true if the option table for this parser has an + option with long name 'long_option'.""" + return long_option in self.option_index + + def get_attr_name(self, long_option): + """Translate long option name 'long_option' to the form it + has as an attribute of some object: ie., translate hyphens + to underscores.""" + return long_option.translate(longopt_xlate) + + def _check_alias_dict(self, aliases, what): + assert isinstance(aliases, dict) + for (alias, opt) in aliases.items(): + if alias not in self.option_index: + raise DistutilsGetoptError( + ("invalid %s '%s': " "option '%s' not defined") + % (what, alias, alias) + ) + if opt not in self.option_index: + raise DistutilsGetoptError( + ("invalid %s '%s': " "aliased option '%s' not defined") + % (what, alias, opt) + ) + + def set_aliases(self, alias): + """Set the aliases for this option parser.""" + self._check_alias_dict(alias, "alias") + self.alias = alias + + def set_negative_aliases(self, negative_alias): + """Set the negative aliases for this option parser. + 'negative_alias' should be a dictionary mapping option names to + option names, both the key and value must already be defined + in the option table.""" + self._check_alias_dict(negative_alias, "negative alias") + self.negative_alias = negative_alias + + def _grok_option_table(self): # noqa: C901 + """Populate the various data structures that keep tabs on the + option table. Called by 'getopt()' before it can do anything + worthwhile. + """ + self.long_opts = [] + self.short_opts = [] + self.short2long.clear() + self.repeat = {} + + for option in self.option_table: + if len(option) == 3: + long, short, help = option + repeat = 0 + elif len(option) == 4: + long, short, help, repeat = option + else: + # the option table is part of the code, so simply + # assert that it is correct + raise ValueError("invalid option tuple: {!r}".format(option)) + + # Type- and value-check the option names + if not isinstance(long, str) or len(long) < 2: + raise DistutilsGetoptError( + ("invalid long option '%s': " "must be a string of length >= 2") + % long + ) + + if not ((short is None) or (isinstance(short, str) and len(short) == 1)): + raise DistutilsGetoptError( + "invalid short option '%s': " + "must a single character or None" % short + ) + + self.repeat[long] = repeat + self.long_opts.append(long) + + if long[-1] == '=': # option takes an argument? + if short: + short = short + ':' + long = long[0:-1] + self.takes_arg[long] = 1 + else: + # Is option is a "negative alias" for some other option (eg. + # "quiet" == "!verbose")? + alias_to = self.negative_alias.get(long) + if alias_to is not None: + if self.takes_arg[alias_to]: + raise DistutilsGetoptError( + "invalid negative alias '%s': " + "aliased option '%s' takes a value" % (long, alias_to) + ) + + self.long_opts[-1] = long # XXX redundant?! + self.takes_arg[long] = 0 + + # If this is an alias option, make sure its "takes arg" flag is + # the same as the option it's aliased to. + alias_to = self.alias.get(long) + if alias_to is not None: + if self.takes_arg[long] != self.takes_arg[alias_to]: + raise DistutilsGetoptError( + "invalid alias '%s': inconsistent with " + "aliased option '%s' (one of them takes a value, " + "the other doesn't" % (long, alias_to) + ) + + # Now enforce some bondage on the long option name, so we can + # later translate it to an attribute name on some object. Have + # to do this a bit late to make sure we've removed any trailing + # '='. + if not longopt_re.match(long): + raise DistutilsGetoptError( + "invalid long option name '%s' " + "(must be letters, numbers, hyphens only" % long + ) + + self.attr_name[long] = self.get_attr_name(long) + if short: + self.short_opts.append(short) + self.short2long[short[0]] = long + + def getopt(self, args=None, object=None): # noqa: C901 + """Parse command-line options in args. Store as attributes on object. + + If 'args' is None or not supplied, uses 'sys.argv[1:]'. If + 'object' is None or not supplied, creates a new OptionDummy + object, stores option values there, and returns a tuple (args, + object). If 'object' is supplied, it is modified in place and + 'getopt()' just returns 'args'; in both cases, the returned + 'args' is a modified copy of the passed-in 'args' list, which + is left untouched. + """ + if args is None: + args = sys.argv[1:] + if object is None: + object = OptionDummy() + created_object = True + else: + created_object = False + + self._grok_option_table() + + short_opts = ' '.join(self.short_opts) + try: + opts, args = getopt.getopt(args, short_opts, self.long_opts) + except getopt.error as msg: + raise DistutilsArgError(msg) + + for opt, val in opts: + if len(opt) == 2 and opt[0] == '-': # it's a short option + opt = self.short2long[opt[1]] + else: + assert len(opt) > 2 and opt[:2] == '--' + opt = opt[2:] + + alias = self.alias.get(opt) + if alias: + opt = alias + + if not self.takes_arg[opt]: # boolean option? + assert val == '', "boolean option can't have value" + alias = self.negative_alias.get(opt) + if alias: + opt = alias + val = 0 + else: + val = 1 + + attr = self.attr_name[opt] + # The only repeating option at the moment is 'verbose'. + # It has a negative option -q quiet, which should set verbose = 0. + if val and self.repeat.get(attr) is not None: + val = getattr(object, attr, 0) + 1 + setattr(object, attr, val) + self.option_order.append((opt, val)) + + # for opts + if created_object: + return args, object + else: + return args + + def get_option_order(self): + """Returns the list of (option, value) tuples processed by the + previous run of 'getopt()'. Raises RuntimeError if + 'getopt()' hasn't been called yet. + """ + if self.option_order is None: + raise RuntimeError("'getopt()' hasn't been called yet") + else: + return self.option_order + + def generate_help(self, header=None): # noqa: C901 + """Generate help text (a list of strings, one per suggested line of + output) from the option table for this FancyGetopt object. + """ + # Blithely assume the option table is good: probably wouldn't call + # 'generate_help()' unless you've already called 'getopt()'. + + # First pass: determine maximum length of long option names + max_opt = 0 + for option in self.option_table: + long = option[0] + short = option[1] + ell = len(long) + if long[-1] == '=': + ell = ell - 1 + if short is not None: + ell = ell + 5 # " (-x)" where short == 'x' + if ell > max_opt: + max_opt = ell + + opt_width = max_opt + 2 + 2 + 2 # room for indent + dashes + gutter + + # Typical help block looks like this: + # --foo controls foonabulation + # Help block for longest option looks like this: + # --flimflam set the flim-flam level + # and with wrapped text: + # --flimflam set the flim-flam level (must be between + # 0 and 100, except on Tuesdays) + # Options with short names will have the short name shown (but + # it doesn't contribute to max_opt): + # --foo (-f) controls foonabulation + # If adding the short option would make the left column too wide, + # we push the explanation off to the next line + # --flimflam (-l) + # set the flim-flam level + # Important parameters: + # - 2 spaces before option block start lines + # - 2 dashes for each long option name + # - min. 2 spaces between option and explanation (gutter) + # - 5 characters (incl. space) for short option name + + # Now generate lines of help text. (If 80 columns were good enough + # for Jesus, then 78 columns are good enough for me!) + line_width = 78 + text_width = line_width - opt_width + big_indent = ' ' * opt_width + if header: + lines = [header] + else: + lines = ['Option summary:'] + + for option in self.option_table: + long, short, help = option[:3] + text = wrap_text(help, text_width) + if long[-1] == '=': + long = long[0:-1] + + # Case 1: no short option at all (makes life easy) + if short is None: + if text: + lines.append(" --%-*s %s" % (max_opt, long, text[0])) + else: + lines.append(" --%-*s " % (max_opt, long)) + + # Case 2: we have a short option, so we have to include it + # just after the long option + else: + opt_names = "{} (-{})".format(long, short) + if text: + lines.append(" --%-*s %s" % (max_opt, opt_names, text[0])) + else: + lines.append(" --%-*s" % opt_names) + + for ell in text[1:]: + lines.append(big_indent + ell) + return lines + + def print_help(self, header=None, file=None): + if file is None: + file = sys.stdout + for line in self.generate_help(header): + file.write(line + "\n") + + +def fancy_getopt(options, negative_opt, object, args): + parser = FancyGetopt(options) + parser.set_negative_aliases(negative_opt) + return parser.getopt(args, object) + + +WS_TRANS = {ord(_wschar): ' ' for _wschar in string.whitespace} + + +def wrap_text(text, width): + """wrap_text(text : string, width : int) -> [string] + + Split 'text' into multiple lines of no more than 'width' characters + each, and return the list of strings that results. + """ + if text is None: + return [] + if len(text) <= width: + return [text] + + text = text.expandtabs() + text = text.translate(WS_TRANS) + chunks = re.split(r'( +|-+)', text) + chunks = [ch for ch in chunks if ch] # ' - ' results in empty strings + lines = [] + + while chunks: + cur_line = [] # list of chunks (to-be-joined) + cur_len = 0 # length of current line + + while chunks: + ell = len(chunks[0]) + if cur_len + ell <= width: # can squeeze (at least) this chunk in + cur_line.append(chunks[0]) + del chunks[0] + cur_len = cur_len + ell + else: # this line is full + # drop last chunk if all space + if cur_line and cur_line[-1][0] == ' ': + del cur_line[-1] + break + + if chunks: # any chunks left to process? + # if the current line is still empty, then we had a single + # chunk that's too big too fit on a line -- so we break + # down and break it up at the line width + if cur_len == 0: + cur_line.append(chunks[0][0:width]) + chunks[0] = chunks[0][width:] + + # all-whitespace chunks at the end of a line can be discarded + # (and we know from the re.split above that if a chunk has + # *any* whitespace, it is *all* whitespace) + if chunks[0][0] == ' ': + del chunks[0] + + # and store this line in the list-of-all-lines -- as a single + # string, of course! + lines.append(''.join(cur_line)) + + return lines + + +def translate_longopt(opt): + """Convert a long option name to a valid Python identifier by + changing "-" to "_". + """ + return opt.translate(longopt_xlate) + + +class OptionDummy: + """Dummy class just used as a place to hold command-line option + values as instance attributes.""" + + def __init__(self, options=[]): + """Create a new OptionDummy instance. The attributes listed in + 'options' will be initialized to None.""" + for opt in options: + setattr(self, opt, None) + + +if __name__ == "__main__": + text = """\ +Tra-la-la, supercalifragilisticexpialidocious. +How *do* you spell that odd word, anyways? +(Someone ask Mary -- she'll know [or she'll +say, "How should I know?"].)""" + + for w in (10, 20, 30, 40): + print("width: %d" % w) + print("\n".join(wrap_text(text, w))) + print() diff --git a/libs/common/setuptools/_distutils/file_util.py b/libs/common/setuptools/_distutils/file_util.py new file mode 100644 index 00000000..1b7cd53b --- /dev/null +++ b/libs/common/setuptools/_distutils/file_util.py @@ -0,0 +1,249 @@ +"""distutils.file_util + +Utility functions for operating on single files. +""" + +import os +from .errors import DistutilsFileError +from ._log import log + +# for generating verbose output in 'copy_file()' +_copy_action = {None: 'copying', 'hard': 'hard linking', 'sym': 'symbolically linking'} + + +def _copy_file_contents(src, dst, buffer_size=16 * 1024): # noqa: C901 + """Copy the file 'src' to 'dst'; both must be filenames. Any error + opening either file, reading from 'src', or writing to 'dst', raises + DistutilsFileError. Data is read/written in chunks of 'buffer_size' + bytes (default 16k). No attempt is made to handle anything apart from + regular files. + """ + # Stolen from shutil module in the standard library, but with + # custom error-handling added. + fsrc = None + fdst = None + try: + try: + fsrc = open(src, 'rb') + except OSError as e: + raise DistutilsFileError("could not open '{}': {}".format(src, e.strerror)) + + if os.path.exists(dst): + try: + os.unlink(dst) + except OSError as e: + raise DistutilsFileError( + "could not delete '{}': {}".format(dst, e.strerror) + ) + + try: + fdst = open(dst, 'wb') + except OSError as e: + raise DistutilsFileError( + "could not create '{}': {}".format(dst, e.strerror) + ) + + while True: + try: + buf = fsrc.read(buffer_size) + except OSError as e: + raise DistutilsFileError( + "could not read from '{}': {}".format(src, e.strerror) + ) + + if not buf: + break + + try: + fdst.write(buf) + except OSError as e: + raise DistutilsFileError( + "could not write to '{}': {}".format(dst, e.strerror) + ) + finally: + if fdst: + fdst.close() + if fsrc: + fsrc.close() + + +def copy_file( # noqa: C901 + src, + dst, + preserve_mode=1, + preserve_times=1, + update=0, + link=None, + verbose=1, + dry_run=0, +): + """Copy a file 'src' to 'dst'. If 'dst' is a directory, then 'src' is + copied there with the same name; otherwise, it must be a filename. (If + the file exists, it will be ruthlessly clobbered.) If 'preserve_mode' + is true (the default), the file's mode (type and permission bits, or + whatever is analogous on the current platform) is copied. If + 'preserve_times' is true (the default), the last-modified and + last-access times are copied as well. If 'update' is true, 'src' will + only be copied if 'dst' does not exist, or if 'dst' does exist but is + older than 'src'. + + 'link' allows you to make hard links (os.link) or symbolic links + (os.symlink) instead of copying: set it to "hard" or "sym"; if it is + None (the default), files are copied. Don't set 'link' on systems that + don't support it: 'copy_file()' doesn't check if hard or symbolic + linking is available. If hardlink fails, falls back to + _copy_file_contents(). + + Under Mac OS, uses the native file copy function in macostools; on + other systems, uses '_copy_file_contents()' to copy file contents. + + Return a tuple (dest_name, copied): 'dest_name' is the actual name of + the output file, and 'copied' is true if the file was copied (or would + have been copied, if 'dry_run' true). + """ + # XXX if the destination file already exists, we clobber it if + # copying, but blow up if linking. Hmmm. And I don't know what + # macostools.copyfile() does. Should definitely be consistent, and + # should probably blow up if destination exists and we would be + # changing it (ie. it's not already a hard/soft link to src OR + # (not update) and (src newer than dst). + + from distutils.dep_util import newer + from stat import ST_ATIME, ST_MTIME, ST_MODE, S_IMODE + + if not os.path.isfile(src): + raise DistutilsFileError( + "can't copy '%s': doesn't exist or not a regular file" % src + ) + + if os.path.isdir(dst): + dir = dst + dst = os.path.join(dst, os.path.basename(src)) + else: + dir = os.path.dirname(dst) + + if update and not newer(src, dst): + if verbose >= 1: + log.debug("not copying %s (output up-to-date)", src) + return (dst, 0) + + try: + action = _copy_action[link] + except KeyError: + raise ValueError("invalid value '%s' for 'link' argument" % link) + + if verbose >= 1: + if os.path.basename(dst) == os.path.basename(src): + log.info("%s %s -> %s", action, src, dir) + else: + log.info("%s %s -> %s", action, src, dst) + + if dry_run: + return (dst, 1) + + # If linking (hard or symbolic), use the appropriate system call + # (Unix only, of course, but that's the caller's responsibility) + elif link == 'hard': + if not (os.path.exists(dst) and os.path.samefile(src, dst)): + try: + os.link(src, dst) + return (dst, 1) + except OSError: + # If hard linking fails, fall back on copying file + # (some special filesystems don't support hard linking + # even under Unix, see issue #8876). + pass + elif link == 'sym': + if not (os.path.exists(dst) and os.path.samefile(src, dst)): + os.symlink(src, dst) + return (dst, 1) + + # Otherwise (non-Mac, not linking), copy the file contents and + # (optionally) copy the times and mode. + _copy_file_contents(src, dst) + if preserve_mode or preserve_times: + st = os.stat(src) + + # According to David Ascher , utime() should be done + # before chmod() (at least under NT). + if preserve_times: + os.utime(dst, (st[ST_ATIME], st[ST_MTIME])) + if preserve_mode: + os.chmod(dst, S_IMODE(st[ST_MODE])) + + return (dst, 1) + + +# XXX I suspect this is Unix-specific -- need porting help! +def move_file(src, dst, verbose=1, dry_run=0): # noqa: C901 + + """Move a file 'src' to 'dst'. If 'dst' is a directory, the file will + be moved into it with the same name; otherwise, 'src' is just renamed + to 'dst'. Return the new full name of the file. + + Handles cross-device moves on Unix using 'copy_file()'. What about + other systems??? + """ + from os.path import exists, isfile, isdir, basename, dirname + import errno + + if verbose >= 1: + log.info("moving %s -> %s", src, dst) + + if dry_run: + return dst + + if not isfile(src): + raise DistutilsFileError("can't move '%s': not a regular file" % src) + + if isdir(dst): + dst = os.path.join(dst, basename(src)) + elif exists(dst): + raise DistutilsFileError( + "can't move '{}': destination '{}' already exists".format(src, dst) + ) + + if not isdir(dirname(dst)): + raise DistutilsFileError( + "can't move '{}': destination '{}' not a valid path".format(src, dst) + ) + + copy_it = False + try: + os.rename(src, dst) + except OSError as e: + (num, msg) = e.args + if num == errno.EXDEV: + copy_it = True + else: + raise DistutilsFileError( + "couldn't move '{}' to '{}': {}".format(src, dst, msg) + ) + + if copy_it: + copy_file(src, dst, verbose=verbose) + try: + os.unlink(src) + except OSError as e: + (num, msg) = e.args + try: + os.unlink(dst) + except OSError: + pass + raise DistutilsFileError( + "couldn't move '%s' to '%s' by copy/delete: " + "delete '%s' failed: %s" % (src, dst, src, msg) + ) + return dst + + +def write_file(filename, contents): + """Create a file with the specified name and write 'contents' (a + sequence of strings without line terminators) to it. + """ + f = open(filename, "w") + try: + for line in contents: + f.write(line + "\n") + finally: + f.close() diff --git a/libs/common/setuptools/_distutils/filelist.py b/libs/common/setuptools/_distutils/filelist.py new file mode 100644 index 00000000..6dadf923 --- /dev/null +++ b/libs/common/setuptools/_distutils/filelist.py @@ -0,0 +1,371 @@ +"""distutils.filelist + +Provides the FileList class, used for poking about the filesystem +and building lists of files. +""" + +import os +import re +import fnmatch +import functools + +from .util import convert_path +from .errors import DistutilsTemplateError, DistutilsInternalError +from ._log import log + + +class FileList: + """A list of files built by on exploring the filesystem and filtered by + applying various patterns to what we find there. + + Instance attributes: + dir + directory from which files will be taken -- only used if + 'allfiles' not supplied to constructor + files + list of filenames currently being built/filtered/manipulated + allfiles + complete list of files under consideration (ie. without any + filtering applied) + """ + + def __init__(self, warn=None, debug_print=None): + # ignore argument to FileList, but keep them for backwards + # compatibility + self.allfiles = None + self.files = [] + + def set_allfiles(self, allfiles): + self.allfiles = allfiles + + def findall(self, dir=os.curdir): + self.allfiles = findall(dir) + + def debug_print(self, msg): + """Print 'msg' to stdout if the global DEBUG (taken from the + DISTUTILS_DEBUG environment variable) flag is true. + """ + from distutils.debug import DEBUG + + if DEBUG: + print(msg) + + # Collection methods + + def append(self, item): + self.files.append(item) + + def extend(self, items): + self.files.extend(items) + + def sort(self): + # Not a strict lexical sort! + sortable_files = sorted(map(os.path.split, self.files)) + self.files = [] + for sort_tuple in sortable_files: + self.files.append(os.path.join(*sort_tuple)) + + # Other miscellaneous utility methods + + def remove_duplicates(self): + # Assumes list has been sorted! + for i in range(len(self.files) - 1, 0, -1): + if self.files[i] == self.files[i - 1]: + del self.files[i] + + # "File template" methods + + def _parse_template_line(self, line): + words = line.split() + action = words[0] + + patterns = dir = dir_pattern = None + + if action in ('include', 'exclude', 'global-include', 'global-exclude'): + if len(words) < 2: + raise DistutilsTemplateError( + "'%s' expects ..." % action + ) + patterns = [convert_path(w) for w in words[1:]] + elif action in ('recursive-include', 'recursive-exclude'): + if len(words) < 3: + raise DistutilsTemplateError( + "'%s' expects ..." % action + ) + dir = convert_path(words[1]) + patterns = [convert_path(w) for w in words[2:]] + elif action in ('graft', 'prune'): + if len(words) != 2: + raise DistutilsTemplateError( + "'%s' expects a single " % action + ) + dir_pattern = convert_path(words[1]) + else: + raise DistutilsTemplateError("unknown action '%s'" % action) + + return (action, patterns, dir, dir_pattern) + + def process_template_line(self, line): # noqa: C901 + # Parse the line: split it up, make sure the right number of words + # is there, and return the relevant words. 'action' is always + # defined: it's the first word of the line. Which of the other + # three are defined depends on the action; it'll be either + # patterns, (dir and patterns), or (dir_pattern). + (action, patterns, dir, dir_pattern) = self._parse_template_line(line) + + # OK, now we know that the action is valid and we have the + # right number of words on the line for that action -- so we + # can proceed with minimal error-checking. + if action == 'include': + self.debug_print("include " + ' '.join(patterns)) + for pattern in patterns: + if not self.include_pattern(pattern, anchor=1): + log.warning("warning: no files found matching '%s'", pattern) + + elif action == 'exclude': + self.debug_print("exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.exclude_pattern(pattern, anchor=1): + log.warning( + ( + "warning: no previously-included files " + "found matching '%s'" + ), + pattern, + ) + + elif action == 'global-include': + self.debug_print("global-include " + ' '.join(patterns)) + for pattern in patterns: + if not self.include_pattern(pattern, anchor=0): + log.warning( + ( + "warning: no files found matching '%s' " + "anywhere in distribution" + ), + pattern, + ) + + elif action == 'global-exclude': + self.debug_print("global-exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.exclude_pattern(pattern, anchor=0): + log.warning( + ( + "warning: no previously-included files matching " + "'%s' found anywhere in distribution" + ), + pattern, + ) + + elif action == 'recursive-include': + self.debug_print("recursive-include {} {}".format(dir, ' '.join(patterns))) + for pattern in patterns: + if not self.include_pattern(pattern, prefix=dir): + msg = ( + "warning: no files found matching '%s' " "under directory '%s'" + ) + log.warning(msg, pattern, dir) + + elif action == 'recursive-exclude': + self.debug_print("recursive-exclude {} {}".format(dir, ' '.join(patterns))) + for pattern in patterns: + if not self.exclude_pattern(pattern, prefix=dir): + log.warning( + ( + "warning: no previously-included files matching " + "'%s' found under directory '%s'" + ), + pattern, + dir, + ) + + elif action == 'graft': + self.debug_print("graft " + dir_pattern) + if not self.include_pattern(None, prefix=dir_pattern): + log.warning("warning: no directories found matching '%s'", dir_pattern) + + elif action == 'prune': + self.debug_print("prune " + dir_pattern) + if not self.exclude_pattern(None, prefix=dir_pattern): + log.warning( + ("no previously-included directories found " "matching '%s'"), + dir_pattern, + ) + else: + raise DistutilsInternalError( + "this cannot happen: invalid action '%s'" % action + ) + + # Filtering/selection methods + + def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): + """Select strings (presumably filenames) from 'self.files' that + match 'pattern', a Unix-style wildcard (glob) pattern. Patterns + are not quite the same as implemented by the 'fnmatch' module: '*' + and '?' match non-special characters, where "special" is platform- + dependent: slash on Unix; colon, slash, and backslash on + DOS/Windows; and colon on Mac OS. + + If 'anchor' is true (the default), then the pattern match is more + stringent: "*.py" will match "foo.py" but not "foo/bar.py". If + 'anchor' is false, both of these will match. + + If 'prefix' is supplied, then only filenames starting with 'prefix' + (itself a pattern) and ending with 'pattern', with anything in between + them, will match. 'anchor' is ignored in this case. + + If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and + 'pattern' is assumed to be either a string containing a regex or a + regex object -- no translation is done, the regex is just compiled + and used as-is. + + Selected strings will be added to self.files. + + Return True if files are found, False otherwise. + """ + # XXX docstring lying about what the special chars are? + files_found = False + pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) + self.debug_print("include_pattern: applying regex r'%s'" % pattern_re.pattern) + + # delayed loading of allfiles list + if self.allfiles is None: + self.findall() + + for name in self.allfiles: + if pattern_re.search(name): + self.debug_print(" adding " + name) + self.files.append(name) + files_found = True + return files_found + + def exclude_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): + """Remove strings (presumably filenames) from 'files' that match + 'pattern'. Other parameters are the same as for + 'include_pattern()', above. + The list 'self.files' is modified in place. + Return True if files are found, False otherwise. + """ + files_found = False + pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) + self.debug_print("exclude_pattern: applying regex r'%s'" % pattern_re.pattern) + for i in range(len(self.files) - 1, -1, -1): + if pattern_re.search(self.files[i]): + self.debug_print(" removing " + self.files[i]) + del self.files[i] + files_found = True + return files_found + + +# Utility functions + + +def _find_all_simple(path): + """ + Find all files under 'path' + """ + all_unique = _UniqueDirs.filter(os.walk(path, followlinks=True)) + results = ( + os.path.join(base, file) for base, dirs, files in all_unique for file in files + ) + return filter(os.path.isfile, results) + + +class _UniqueDirs(set): + """ + Exclude previously-seen dirs from walk results, + avoiding infinite recursion. + Ref https://bugs.python.org/issue44497. + """ + + def __call__(self, walk_item): + """ + Given an item from an os.walk result, determine + if the item represents a unique dir for this instance + and if not, prevent further traversal. + """ + base, dirs, files = walk_item + stat = os.stat(base) + candidate = stat.st_dev, stat.st_ino + found = candidate in self + if found: + del dirs[:] + self.add(candidate) + return not found + + @classmethod + def filter(cls, items): + return filter(cls(), items) + + +def findall(dir=os.curdir): + """ + Find all files under 'dir' and return the list of full filenames. + Unless dir is '.', return full filenames with dir prepended. + """ + files = _find_all_simple(dir) + if dir == os.curdir: + make_rel = functools.partial(os.path.relpath, start=dir) + files = map(make_rel, files) + return list(files) + + +def glob_to_re(pattern): + """Translate a shell-like glob pattern to a regular expression; return + a string containing the regex. Differs from 'fnmatch.translate()' in + that '*' does not match "special characters" (which are + platform-specific). + """ + pattern_re = fnmatch.translate(pattern) + + # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which + # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, + # and by extension they shouldn't match such "special characters" under + # any OS. So change all non-escaped dots in the RE to match any + # character except the special characters (currently: just os.sep). + sep = os.sep + if os.sep == '\\': + # we're using a regex to manipulate a regex, so we need + # to escape the backslash twice + sep = r'\\\\' + escaped = r'\1[^%s]' % sep + pattern_re = re.sub(r'((?= 2: + set_threshold(logging.DEBUG) + + +class Log(logging.Logger): + """distutils.log.Log is deprecated, please use an alternative from `logging`.""" + + def __init__(self, threshold=WARN): + warnings.warn(Log.__doc__) # avoid DeprecationWarning to ensure warn is shown + super().__init__(__name__, level=threshold) + + @property + def threshold(self): + return self.level + + @threshold.setter + def threshold(self, level): + self.setLevel(level) + + warn = logging.Logger.warning diff --git a/libs/common/setuptools/_distutils/msvc9compiler.py b/libs/common/setuptools/_distutils/msvc9compiler.py new file mode 100644 index 00000000..a4714a55 --- /dev/null +++ b/libs/common/setuptools/_distutils/msvc9compiler.py @@ -0,0 +1,832 @@ +"""distutils.msvc9compiler + +Contains MSVCCompiler, an implementation of the abstract CCompiler class +for the Microsoft Visual Studio 2008. + +The module is compatible with VS 2005 and VS 2008. You can find legacy support +for older versions of VS in distutils.msvccompiler. +""" + +# Written by Perry Stoll +# hacked by Robin Becker and Thomas Heller to do a better job of +# finding DevStudio (through the registry) +# ported to VS2005 and VS 2008 by Christian Heimes + +import os +import subprocess +import sys +import re +import warnings + +from .errors import ( + DistutilsExecError, + DistutilsPlatformError, + CompileError, + LibError, + LinkError, +) +from .ccompiler import CCompiler, gen_lib_options +from ._log import log +from .util import get_platform + +import winreg + +warnings.warn( + "msvc9compiler is deprecated and slated to be removed " + "in the future. Please discontinue use or file an issue " + "with pypa/distutils describing your use case.", + DeprecationWarning, +) + +RegOpenKeyEx = winreg.OpenKeyEx +RegEnumKey = winreg.EnumKey +RegEnumValue = winreg.EnumValue +RegError = winreg.error + +HKEYS = ( + winreg.HKEY_USERS, + winreg.HKEY_CURRENT_USER, + winreg.HKEY_LOCAL_MACHINE, + winreg.HKEY_CLASSES_ROOT, +) + +NATIVE_WIN64 = sys.platform == 'win32' and sys.maxsize > 2**32 +if NATIVE_WIN64: + # Visual C++ is a 32-bit application, so we need to look in + # the corresponding registry branch, if we're running a + # 64-bit Python on Win64 + VS_BASE = r"Software\Wow6432Node\Microsoft\VisualStudio\%0.1f" + WINSDK_BASE = r"Software\Wow6432Node\Microsoft\Microsoft SDKs\Windows" + NET_BASE = r"Software\Wow6432Node\Microsoft\.NETFramework" +else: + VS_BASE = r"Software\Microsoft\VisualStudio\%0.1f" + WINSDK_BASE = r"Software\Microsoft\Microsoft SDKs\Windows" + NET_BASE = r"Software\Microsoft\.NETFramework" + +# A map keyed by get_platform() return values to values accepted by +# 'vcvarsall.bat'. Note a cross-compile may combine these (eg, 'x86_amd64' is +# the param to cross-compile on x86 targeting amd64.) +PLAT_TO_VCVARS = { + 'win32': 'x86', + 'win-amd64': 'amd64', +} + + +class Reg: + """Helper class to read values from the registry""" + + def get_value(cls, path, key): + for base in HKEYS: + d = cls.read_values(base, path) + if d and key in d: + return d[key] + raise KeyError(key) + + get_value = classmethod(get_value) + + def read_keys(cls, base, key): + """Return list of registry keys.""" + try: + handle = RegOpenKeyEx(base, key) + except RegError: + return None + L = [] + i = 0 + while True: + try: + k = RegEnumKey(handle, i) + except RegError: + break + L.append(k) + i += 1 + return L + + read_keys = classmethod(read_keys) + + def read_values(cls, base, key): + """Return dict of registry keys and values. + + All names are converted to lowercase. + """ + try: + handle = RegOpenKeyEx(base, key) + except RegError: + return None + d = {} + i = 0 + while True: + try: + name, value, type = RegEnumValue(handle, i) + except RegError: + break + name = name.lower() + d[cls.convert_mbcs(name)] = cls.convert_mbcs(value) + i += 1 + return d + + read_values = classmethod(read_values) + + def convert_mbcs(s): + dec = getattr(s, "decode", None) + if dec is not None: + try: + s = dec("mbcs") + except UnicodeError: + pass + return s + + convert_mbcs = staticmethod(convert_mbcs) + + +class MacroExpander: + def __init__(self, version): + self.macros = {} + self.vsbase = VS_BASE % version + self.load_macros(version) + + def set_macro(self, macro, path, key): + self.macros["$(%s)" % macro] = Reg.get_value(path, key) + + def load_macros(self, version): + self.set_macro("VCInstallDir", self.vsbase + r"\Setup\VC", "productdir") + self.set_macro("VSInstallDir", self.vsbase + r"\Setup\VS", "productdir") + self.set_macro("FrameworkDir", NET_BASE, "installroot") + try: + if version >= 8.0: + self.set_macro("FrameworkSDKDir", NET_BASE, "sdkinstallrootv2.0") + else: + raise KeyError("sdkinstallrootv2.0") + except KeyError: + raise DistutilsPlatformError( + """Python was built with Visual Studio 2008; +extensions must be built with a compiler than can generate compatible binaries. +Visual Studio 2008 was not found on this system. If you have Cygwin installed, +you can try compiling with MingW32, by passing "-c mingw32" to setup.py.""" + ) + + if version >= 9.0: + self.set_macro("FrameworkVersion", self.vsbase, "clr version") + self.set_macro("WindowsSdkDir", WINSDK_BASE, "currentinstallfolder") + else: + p = r"Software\Microsoft\NET Framework Setup\Product" + for base in HKEYS: + try: + h = RegOpenKeyEx(base, p) + except RegError: + continue + key = RegEnumKey(h, 0) + d = Reg.get_value(base, r"{}\{}".format(p, key)) + self.macros["$(FrameworkVersion)"] = d["version"] + + def sub(self, s): + for k, v in self.macros.items(): + s = s.replace(k, v) + return s + + +def get_build_version(): + """Return the version of MSVC that was used to build Python. + + For Python 2.3 and up, the version number is included in + sys.version. For earlier versions, assume the compiler is MSVC 6. + """ + prefix = "MSC v." + i = sys.version.find(prefix) + if i == -1: + return 6 + i = i + len(prefix) + s, rest = sys.version[i:].split(" ", 1) + majorVersion = int(s[:-2]) - 6 + if majorVersion >= 13: + # v13 was skipped and should be v14 + majorVersion += 1 + minorVersion = int(s[2:3]) / 10.0 + # I don't think paths are affected by minor version in version 6 + if majorVersion == 6: + minorVersion = 0 + if majorVersion >= 6: + return majorVersion + minorVersion + # else we don't know what version of the compiler this is + return None + + +def normalize_and_reduce_paths(paths): + """Return a list of normalized paths with duplicates removed. + + The current order of paths is maintained. + """ + # Paths are normalized so things like: /a and /a/ aren't both preserved. + reduced_paths = [] + for p in paths: + np = os.path.normpath(p) + # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set. + if np not in reduced_paths: + reduced_paths.append(np) + return reduced_paths + + +def removeDuplicates(variable): + """Remove duplicate values of an environment variable.""" + oldList = variable.split(os.pathsep) + newList = [] + for i in oldList: + if i not in newList: + newList.append(i) + newVariable = os.pathsep.join(newList) + return newVariable + + +def find_vcvarsall(version): + """Find the vcvarsall.bat file + + At first it tries to find the productdir of VS 2008 in the registry. If + that fails it falls back to the VS90COMNTOOLS env var. + """ + vsbase = VS_BASE % version + try: + productdir = Reg.get_value(r"%s\Setup\VC" % vsbase, "productdir") + except KeyError: + log.debug("Unable to find productdir in registry") + productdir = None + + if not productdir or not os.path.isdir(productdir): + toolskey = "VS%0.f0COMNTOOLS" % version + toolsdir = os.environ.get(toolskey, None) + + if toolsdir and os.path.isdir(toolsdir): + productdir = os.path.join(toolsdir, os.pardir, os.pardir, "VC") + productdir = os.path.abspath(productdir) + if not os.path.isdir(productdir): + log.debug("%s is not a valid directory" % productdir) + return None + else: + log.debug("Env var %s is not set or invalid" % toolskey) + if not productdir: + log.debug("No productdir found") + return None + vcvarsall = os.path.join(productdir, "vcvarsall.bat") + if os.path.isfile(vcvarsall): + return vcvarsall + log.debug("Unable to find vcvarsall.bat") + return None + + +def query_vcvarsall(version, arch="x86"): + """Launch vcvarsall.bat and read the settings from its environment""" + vcvarsall = find_vcvarsall(version) + interesting = {"include", "lib", "libpath", "path"} + result = {} + + if vcvarsall is None: + raise DistutilsPlatformError("Unable to find vcvarsall.bat") + log.debug("Calling 'vcvarsall.bat %s' (version=%s)", arch, version) + popen = subprocess.Popen( + '"{}" {} & set'.format(vcvarsall, arch), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + try: + stdout, stderr = popen.communicate() + if popen.wait() != 0: + raise DistutilsPlatformError(stderr.decode("mbcs")) + + stdout = stdout.decode("mbcs") + for line in stdout.split("\n"): + line = Reg.convert_mbcs(line) + if '=' not in line: + continue + line = line.strip() + key, value = line.split('=', 1) + key = key.lower() + if key in interesting: + if value.endswith(os.pathsep): + value = value[:-1] + result[key] = removeDuplicates(value) + + finally: + popen.stdout.close() + popen.stderr.close() + + if len(result) != len(interesting): + raise ValueError(str(list(result.keys()))) + + return result + + +# More globals +VERSION = get_build_version() +# MACROS = MacroExpander(VERSION) + + +class MSVCCompiler(CCompiler): + """Concrete class that implements an interface to Microsoft Visual C++, + as defined by the CCompiler abstract class.""" + + compiler_type = 'msvc' + + # Just set this so CCompiler's constructor doesn't barf. We currently + # don't use the 'set_executables()' bureaucracy provided by CCompiler, + # as it really isn't necessary for this sort of single-compiler class. + # Would be nice to have a consistent interface with UnixCCompiler, + # though, so it's worth thinking about. + executables = {} + + # Private class data (need to distinguish C from C++ source for compiler) + _c_extensions = ['.c'] + _cpp_extensions = ['.cc', '.cpp', '.cxx'] + _rc_extensions = ['.rc'] + _mc_extensions = ['.mc'] + + # Needed for the filename generation methods provided by the + # base class, CCompiler. + src_extensions = _c_extensions + _cpp_extensions + _rc_extensions + _mc_extensions + res_extension = '.res' + obj_extension = '.obj' + static_lib_extension = '.lib' + shared_lib_extension = '.dll' + static_lib_format = shared_lib_format = '%s%s' + exe_extension = '.exe' + + def __init__(self, verbose=0, dry_run=0, force=0): + super().__init__(verbose, dry_run, force) + self.__version = VERSION + self.__root = r"Software\Microsoft\VisualStudio" + # self.__macros = MACROS + self.__paths = [] + # target platform (.plat_name is consistent with 'bdist') + self.plat_name = None + self.__arch = None # deprecated name + self.initialized = False + + def initialize(self, plat_name=None): # noqa: C901 + # multi-init means we would need to check platform same each time... + assert not self.initialized, "don't init multiple times" + if self.__version < 8.0: + raise DistutilsPlatformError( + "VC %0.1f is not supported by this module" % self.__version + ) + if plat_name is None: + plat_name = get_platform() + # sanity check for platforms to prevent obscure errors later. + ok_plats = 'win32', 'win-amd64' + if plat_name not in ok_plats: + raise DistutilsPlatformError( + "--plat-name must be one of {}".format(ok_plats) + ) + + if ( + "DISTUTILS_USE_SDK" in os.environ + and "MSSdk" in os.environ + and self.find_exe("cl.exe") + ): + # Assume that the SDK set up everything alright; don't try to be + # smarter + self.cc = "cl.exe" + self.linker = "link.exe" + self.lib = "lib.exe" + self.rc = "rc.exe" + self.mc = "mc.exe" + else: + # On x86, 'vcvars32.bat amd64' creates an env that doesn't work; + # to cross compile, you use 'x86_amd64'. + # On AMD64, 'vcvars32.bat amd64' is a native build env; to cross + # compile use 'x86' (ie, it runs the x86 compiler directly) + if plat_name == get_platform() or plat_name == 'win32': + # native build or cross-compile to win32 + plat_spec = PLAT_TO_VCVARS[plat_name] + else: + # cross compile from win32 -> some 64bit + plat_spec = ( + PLAT_TO_VCVARS[get_platform()] + '_' + PLAT_TO_VCVARS[plat_name] + ) + + vc_env = query_vcvarsall(VERSION, plat_spec) + + self.__paths = vc_env['path'].split(os.pathsep) + os.environ['lib'] = vc_env['lib'] + os.environ['include'] = vc_env['include'] + + if len(self.__paths) == 0: + raise DistutilsPlatformError( + "Python was built with %s, " + "and extensions need to be built with the same " + "version of the compiler, but it isn't installed." % self.__product + ) + + self.cc = self.find_exe("cl.exe") + self.linker = self.find_exe("link.exe") + self.lib = self.find_exe("lib.exe") + self.rc = self.find_exe("rc.exe") # resource compiler + self.mc = self.find_exe("mc.exe") # message compiler + # self.set_path_env_var('lib') + # self.set_path_env_var('include') + + # extend the MSVC path with the current path + try: + for p in os.environ['path'].split(';'): + self.__paths.append(p) + except KeyError: + pass + self.__paths = normalize_and_reduce_paths(self.__paths) + os.environ['path'] = ";".join(self.__paths) + + self.preprocess_options = None + if self.__arch == "x86": + self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/DNDEBUG'] + self.compile_options_debug = [ + '/nologo', + '/Od', + '/MDd', + '/W3', + '/Z7', + '/D_DEBUG', + ] + else: + # Win64 + self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/GS-', '/DNDEBUG'] + self.compile_options_debug = [ + '/nologo', + '/Od', + '/MDd', + '/W3', + '/GS-', + '/Z7', + '/D_DEBUG', + ] + + self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO'] + if self.__version >= 7: + self.ldflags_shared_debug = ['/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG'] + self.ldflags_static = ['/nologo'] + + self.initialized = True + + # -- Worker methods ------------------------------------------------ + + def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): + # Copied from ccompiler.py, extended to return .res as 'object'-file + # for .rc input file + if output_dir is None: + output_dir = '' + obj_names = [] + for src_name in source_filenames: + (base, ext) = os.path.splitext(src_name) + base = os.path.splitdrive(base)[1] # Chop off the drive + base = base[os.path.isabs(base) :] # If abs, chop off leading / + if ext not in self.src_extensions: + # Better to raise an exception instead of silently continuing + # and later complain about sources and targets having + # different lengths + raise CompileError("Don't know how to compile %s" % src_name) + if strip_dir: + base = os.path.basename(base) + if ext in self._rc_extensions: + obj_names.append(os.path.join(output_dir, base + self.res_extension)) + elif ext in self._mc_extensions: + obj_names.append(os.path.join(output_dir, base + self.res_extension)) + else: + obj_names.append(os.path.join(output_dir, base + self.obj_extension)) + return obj_names + + def compile( # noqa: C901 + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None, + ): + + if not self.initialized: + self.initialize() + compile_info = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + macros, objects, extra_postargs, pp_opts, build = compile_info + + compile_opts = extra_preargs or [] + compile_opts.append('/c') + if debug: + compile_opts.extend(self.compile_options_debug) + else: + compile_opts.extend(self.compile_options) + + for obj in objects: + try: + src, ext = build[obj] + except KeyError: + continue + if debug: + # pass the full pathname to MSVC in debug mode, + # this allows the debugger to find the source file + # without asking the user to browse for it + src = os.path.abspath(src) + + if ext in self._c_extensions: + input_opt = "/Tc" + src + elif ext in self._cpp_extensions: + input_opt = "/Tp" + src + elif ext in self._rc_extensions: + # compile .RC to .RES file + input_opt = src + output_opt = "/fo" + obj + try: + self.spawn([self.rc] + pp_opts + [output_opt] + [input_opt]) + except DistutilsExecError as msg: + raise CompileError(msg) + continue + elif ext in self._mc_extensions: + # Compile .MC to .RC file to .RES file. + # * '-h dir' specifies the directory for the + # generated include file + # * '-r dir' specifies the target directory of the + # generated RC file and the binary message resource + # it includes + # + # For now (since there are no options to change this), + # we use the source-directory for the include file and + # the build directory for the RC file and message + # resources. This works at least for win32all. + h_dir = os.path.dirname(src) + rc_dir = os.path.dirname(obj) + try: + # first compile .MC to .RC and .H file + self.spawn([self.mc] + ['-h', h_dir, '-r', rc_dir] + [src]) + base, _ = os.path.splitext(os.path.basename(src)) + rc_file = os.path.join(rc_dir, base + '.rc') + # then compile .RC to .RES file + self.spawn([self.rc] + ["/fo" + obj] + [rc_file]) + + except DistutilsExecError as msg: + raise CompileError(msg) + continue + else: + # how to handle this file? + raise CompileError( + "Don't know how to compile {} to {}".format(src, obj) + ) + + output_opt = "/Fo" + obj + try: + self.spawn( + [self.cc] + + compile_opts + + pp_opts + + [input_opt, output_opt] + + extra_postargs + ) + except DistutilsExecError as msg: + raise CompileError(msg) + + return objects + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=0, target_lang=None + ): + + if not self.initialized: + self.initialize() + (objects, output_dir) = self._fix_object_args(objects, output_dir) + output_filename = self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + lib_args = objects + ['/OUT:' + output_filename] + if debug: + pass # XXX what goes here? + try: + self.spawn([self.lib] + lib_args) + except DistutilsExecError as msg: + raise LibError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def link( # noqa: C901 + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + + if not self.initialized: + self.initialize() + (objects, output_dir) = self._fix_object_args(objects, output_dir) + fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) + (libraries, library_dirs, runtime_library_dirs) = fixed_args + + if runtime_library_dirs: + self.warn( + "I don't know what to do with 'runtime_library_dirs': " + + str(runtime_library_dirs) + ) + + lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries) + if output_dir is not None: + output_filename = os.path.join(output_dir, output_filename) + + if self._need_link(objects, output_filename): + if target_desc == CCompiler.EXECUTABLE: + if debug: + ldflags = self.ldflags_shared_debug[1:] + else: + ldflags = self.ldflags_shared[1:] + else: + if debug: + ldflags = self.ldflags_shared_debug + else: + ldflags = self.ldflags_shared + + export_opts = [] + for sym in export_symbols or []: + export_opts.append("/EXPORT:" + sym) + + ld_args = ( + ldflags + lib_opts + export_opts + objects + ['/OUT:' + output_filename] + ) + + # The MSVC linker generates .lib and .exp files, which cannot be + # suppressed by any linker switches. The .lib files may even be + # needed! Make sure they are generated in the temporary build + # directory. Since they have different names for debug and release + # builds, they can go into the same directory. + build_temp = os.path.dirname(objects[0]) + if export_symbols is not None: + (dll_name, dll_ext) = os.path.splitext( + os.path.basename(output_filename) + ) + implib_file = os.path.join(build_temp, self.library_filename(dll_name)) + ld_args.append('/IMPLIB:' + implib_file) + + self.manifest_setup_ldargs(output_filename, build_temp, ld_args) + + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + + self.mkpath(os.path.dirname(output_filename)) + try: + self.spawn([self.linker] + ld_args) + except DistutilsExecError as msg: + raise LinkError(msg) + + # embed the manifest + # XXX - this is somewhat fragile - if mt.exe fails, distutils + # will still consider the DLL up-to-date, but it will not have a + # manifest. Maybe we should link to a temp file? OTOH, that + # implies a build environment error that shouldn't go undetected. + mfinfo = self.manifest_get_embed_info(target_desc, ld_args) + if mfinfo is not None: + mffilename, mfid = mfinfo + out_arg = '-outputresource:{};{}'.format(output_filename, mfid) + try: + self.spawn(['mt.exe', '-nologo', '-manifest', mffilename, out_arg]) + except DistutilsExecError as msg: + raise LinkError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def manifest_setup_ldargs(self, output_filename, build_temp, ld_args): + # If we need a manifest at all, an embedded manifest is recommended. + # See MSDN article titled + # "How to: Embed a Manifest Inside a C/C++ Application" + # (currently at http://msdn2.microsoft.com/en-us/library/ms235591(VS.80).aspx) + # Ask the linker to generate the manifest in the temp dir, so + # we can check it, and possibly embed it, later. + temp_manifest = os.path.join( + build_temp, os.path.basename(output_filename) + ".manifest" + ) + ld_args.append('/MANIFESTFILE:' + temp_manifest) + + def manifest_get_embed_info(self, target_desc, ld_args): + # If a manifest should be embedded, return a tuple of + # (manifest_filename, resource_id). Returns None if no manifest + # should be embedded. See http://bugs.python.org/issue7833 for why + # we want to avoid any manifest for extension modules if we can) + for arg in ld_args: + if arg.startswith("/MANIFESTFILE:"): + temp_manifest = arg.split(":", 1)[1] + break + else: + # no /MANIFESTFILE so nothing to do. + return None + if target_desc == CCompiler.EXECUTABLE: + # by default, executables always get the manifest with the + # CRT referenced. + mfid = 1 + else: + # Extension modules try and avoid any manifest if possible. + mfid = 2 + temp_manifest = self._remove_visual_c_ref(temp_manifest) + if temp_manifest is None: + return None + return temp_manifest, mfid + + def _remove_visual_c_ref(self, manifest_file): + try: + # Remove references to the Visual C runtime, so they will + # fall through to the Visual C dependency of Python.exe. + # This way, when installed for a restricted user (e.g. + # runtimes are not in WinSxS folder, but in Python's own + # folder), the runtimes do not need to be in every folder + # with .pyd's. + # Returns either the filename of the modified manifest or + # None if no manifest should be embedded. + manifest_f = open(manifest_file) + try: + manifest_buf = manifest_f.read() + finally: + manifest_f.close() + pattern = re.compile( + r"""|)""", + re.DOTALL, + ) + manifest_buf = re.sub(pattern, "", manifest_buf) + pattern = r"\s*" + manifest_buf = re.sub(pattern, "", manifest_buf) + # Now see if any other assemblies are referenced - if not, we + # don't want a manifest embedded. + pattern = re.compile( + r"""|)""", + re.DOTALL, + ) + if re.search(pattern, manifest_buf) is None: + return None + + manifest_f = open(manifest_file, 'w') + try: + manifest_f.write(manifest_buf) + return manifest_file + finally: + manifest_f.close() + except OSError: + pass + + # -- Miscellaneous methods ----------------------------------------- + # These are all used by the 'gen_lib_options() function, in + # ccompiler.py. + + def library_dir_option(self, dir): + return "/LIBPATH:" + dir + + def runtime_library_dir_option(self, dir): + raise DistutilsPlatformError( + "don't know how to set runtime library search path for MSVC++" + ) + + def library_option(self, lib): + return self.library_filename(lib) + + def find_library_file(self, dirs, lib, debug=0): + # Prefer a debugging library if found (and requested), but deal + # with it if we don't have one. + if debug: + try_names = [lib + "_d", lib] + else: + try_names = [lib] + for dir in dirs: + for name in try_names: + libfile = os.path.join(dir, self.library_filename(name)) + if os.path.exists(libfile): + return libfile + else: + # Oops, didn't find it in *any* of 'dirs' + return None + + # Helper methods for using the MSVC registry settings + + def find_exe(self, exe): + """Return path to an MSVC executable program. + + Tries to find the program in several places: first, one of the + MSVC program search paths from the registry; next, the directories + in the PATH environment variable. If any of those work, return an + absolute path that is known to exist. If none of them work, just + return the original program name, 'exe'. + """ + for p in self.__paths: + fn = os.path.join(os.path.abspath(p), exe) + if os.path.isfile(fn): + return fn + + # didn't find it; try existing path + for p in os.environ['Path'].split(';'): + fn = os.path.join(os.path.abspath(p), exe) + if os.path.isfile(fn): + return fn + + return exe diff --git a/libs/common/setuptools/_distutils/msvccompiler.py b/libs/common/setuptools/_distutils/msvccompiler.py new file mode 100644 index 00000000..59ebe99c --- /dev/null +++ b/libs/common/setuptools/_distutils/msvccompiler.py @@ -0,0 +1,695 @@ +"""distutils.msvccompiler + +Contains MSVCCompiler, an implementation of the abstract CCompiler class +for the Microsoft Visual Studio. +""" + +# Written by Perry Stoll +# hacked by Robin Becker and Thomas Heller to do a better job of +# finding DevStudio (through the registry) + +import sys +import os +import warnings +from .errors import ( + DistutilsExecError, + DistutilsPlatformError, + CompileError, + LibError, + LinkError, +) +from .ccompiler import CCompiler, gen_lib_options +from ._log import log + +_can_read_reg = False +try: + import winreg + + _can_read_reg = True + hkey_mod = winreg + + RegOpenKeyEx = winreg.OpenKeyEx + RegEnumKey = winreg.EnumKey + RegEnumValue = winreg.EnumValue + RegError = winreg.error + +except ImportError: + try: + import win32api + import win32con + + _can_read_reg = True + hkey_mod = win32con + + RegOpenKeyEx = win32api.RegOpenKeyEx + RegEnumKey = win32api.RegEnumKey + RegEnumValue = win32api.RegEnumValue + RegError = win32api.error + except ImportError: + log.info( + "Warning: Can't read registry to find the " + "necessary compiler setting\n" + "Make sure that Python modules winreg, " + "win32api or win32con are installed." + ) + pass + +if _can_read_reg: + HKEYS = ( + hkey_mod.HKEY_USERS, + hkey_mod.HKEY_CURRENT_USER, + hkey_mod.HKEY_LOCAL_MACHINE, + hkey_mod.HKEY_CLASSES_ROOT, + ) + + +warnings.warn( + "msvccompiler is deprecated and slated to be removed " + "in the future. Please discontinue use or file an issue " + "with pypa/distutils describing your use case.", + DeprecationWarning, +) + + +def read_keys(base, key): + """Return list of registry keys.""" + try: + handle = RegOpenKeyEx(base, key) + except RegError: + return None + L = [] + i = 0 + while True: + try: + k = RegEnumKey(handle, i) + except RegError: + break + L.append(k) + i += 1 + return L + + +def read_values(base, key): + """Return dict of registry keys and values. + + All names are converted to lowercase. + """ + try: + handle = RegOpenKeyEx(base, key) + except RegError: + return None + d = {} + i = 0 + while True: + try: + name, value, type = RegEnumValue(handle, i) + except RegError: + break + name = name.lower() + d[convert_mbcs(name)] = convert_mbcs(value) + i += 1 + return d + + +def convert_mbcs(s): + dec = getattr(s, "decode", None) + if dec is not None: + try: + s = dec("mbcs") + except UnicodeError: + pass + return s + + +class MacroExpander: + def __init__(self, version): + self.macros = {} + self.load_macros(version) + + def set_macro(self, macro, path, key): + for base in HKEYS: + d = read_values(base, path) + if d: + self.macros["$(%s)" % macro] = d[key] + break + + def load_macros(self, version): + vsbase = r"Software\Microsoft\VisualStudio\%0.1f" % version + self.set_macro("VCInstallDir", vsbase + r"\Setup\VC", "productdir") + self.set_macro("VSInstallDir", vsbase + r"\Setup\VS", "productdir") + net = r"Software\Microsoft\.NETFramework" + self.set_macro("FrameworkDir", net, "installroot") + try: + if version > 7.0: + self.set_macro("FrameworkSDKDir", net, "sdkinstallrootv1.1") + else: + self.set_macro("FrameworkSDKDir", net, "sdkinstallroot") + except KeyError: + raise DistutilsPlatformError( + """Python was built with Visual Studio 2003; +extensions must be built with a compiler than can generate compatible binaries. +Visual Studio 2003 was not found on this system. If you have Cygwin installed, +you can try compiling with MingW32, by passing "-c mingw32" to setup.py.""" + ) + + p = r"Software\Microsoft\NET Framework Setup\Product" + for base in HKEYS: + try: + h = RegOpenKeyEx(base, p) + except RegError: + continue + key = RegEnumKey(h, 0) + d = read_values(base, r"{}\{}".format(p, key)) + self.macros["$(FrameworkVersion)"] = d["version"] + + def sub(self, s): + for k, v in self.macros.items(): + s = s.replace(k, v) + return s + + +def get_build_version(): + """Return the version of MSVC that was used to build Python. + + For Python 2.3 and up, the version number is included in + sys.version. For earlier versions, assume the compiler is MSVC 6. + """ + prefix = "MSC v." + i = sys.version.find(prefix) + if i == -1: + return 6 + i = i + len(prefix) + s, rest = sys.version[i:].split(" ", 1) + majorVersion = int(s[:-2]) - 6 + if majorVersion >= 13: + # v13 was skipped and should be v14 + majorVersion += 1 + minorVersion = int(s[2:3]) / 10.0 + # I don't think paths are affected by minor version in version 6 + if majorVersion == 6: + minorVersion = 0 + if majorVersion >= 6: + return majorVersion + minorVersion + # else we don't know what version of the compiler this is + return None + + +def get_build_architecture(): + """Return the processor architecture. + + Possible results are "Intel" or "AMD64". + """ + + prefix = " bit (" + i = sys.version.find(prefix) + if i == -1: + return "Intel" + j = sys.version.find(")", i) + return sys.version[i + len(prefix) : j] + + +def normalize_and_reduce_paths(paths): + """Return a list of normalized paths with duplicates removed. + + The current order of paths is maintained. + """ + # Paths are normalized so things like: /a and /a/ aren't both preserved. + reduced_paths = [] + for p in paths: + np = os.path.normpath(p) + # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set. + if np not in reduced_paths: + reduced_paths.append(np) + return reduced_paths + + +class MSVCCompiler(CCompiler): + """Concrete class that implements an interface to Microsoft Visual C++, + as defined by the CCompiler abstract class.""" + + compiler_type = 'msvc' + + # Just set this so CCompiler's constructor doesn't barf. We currently + # don't use the 'set_executables()' bureaucracy provided by CCompiler, + # as it really isn't necessary for this sort of single-compiler class. + # Would be nice to have a consistent interface with UnixCCompiler, + # though, so it's worth thinking about. + executables = {} + + # Private class data (need to distinguish C from C++ source for compiler) + _c_extensions = ['.c'] + _cpp_extensions = ['.cc', '.cpp', '.cxx'] + _rc_extensions = ['.rc'] + _mc_extensions = ['.mc'] + + # Needed for the filename generation methods provided by the + # base class, CCompiler. + src_extensions = _c_extensions + _cpp_extensions + _rc_extensions + _mc_extensions + res_extension = '.res' + obj_extension = '.obj' + static_lib_extension = '.lib' + shared_lib_extension = '.dll' + static_lib_format = shared_lib_format = '%s%s' + exe_extension = '.exe' + + def __init__(self, verbose=0, dry_run=0, force=0): + super().__init__(verbose, dry_run, force) + self.__version = get_build_version() + self.__arch = get_build_architecture() + if self.__arch == "Intel": + # x86 + if self.__version >= 7: + self.__root = r"Software\Microsoft\VisualStudio" + self.__macros = MacroExpander(self.__version) + else: + self.__root = r"Software\Microsoft\Devstudio" + self.__product = "Visual Studio version %s" % self.__version + else: + # Win64. Assume this was built with the platform SDK + self.__product = "Microsoft SDK compiler %s" % (self.__version + 6) + + self.initialized = False + + def initialize(self): + self.__paths = [] + if ( + "DISTUTILS_USE_SDK" in os.environ + and "MSSdk" in os.environ + and self.find_exe("cl.exe") + ): + # Assume that the SDK set up everything alright; don't try to be + # smarter + self.cc = "cl.exe" + self.linker = "link.exe" + self.lib = "lib.exe" + self.rc = "rc.exe" + self.mc = "mc.exe" + else: + self.__paths = self.get_msvc_paths("path") + + if len(self.__paths) == 0: + raise DistutilsPlatformError( + "Python was built with %s, " + "and extensions need to be built with the same " + "version of the compiler, but it isn't installed." % self.__product + ) + + self.cc = self.find_exe("cl.exe") + self.linker = self.find_exe("link.exe") + self.lib = self.find_exe("lib.exe") + self.rc = self.find_exe("rc.exe") # resource compiler + self.mc = self.find_exe("mc.exe") # message compiler + self.set_path_env_var('lib') + self.set_path_env_var('include') + + # extend the MSVC path with the current path + try: + for p in os.environ['path'].split(';'): + self.__paths.append(p) + except KeyError: + pass + self.__paths = normalize_and_reduce_paths(self.__paths) + os.environ['path'] = ";".join(self.__paths) + + self.preprocess_options = None + if self.__arch == "Intel": + self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/GX', '/DNDEBUG'] + self.compile_options_debug = [ + '/nologo', + '/Od', + '/MDd', + '/W3', + '/GX', + '/Z7', + '/D_DEBUG', + ] + else: + # Win64 + self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/GS-', '/DNDEBUG'] + self.compile_options_debug = [ + '/nologo', + '/Od', + '/MDd', + '/W3', + '/GS-', + '/Z7', + '/D_DEBUG', + ] + + self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO'] + if self.__version >= 7: + self.ldflags_shared_debug = ['/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG'] + else: + self.ldflags_shared_debug = [ + '/DLL', + '/nologo', + '/INCREMENTAL:no', + '/pdb:None', + '/DEBUG', + ] + self.ldflags_static = ['/nologo'] + + self.initialized = True + + # -- Worker methods ------------------------------------------------ + + def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): + # Copied from ccompiler.py, extended to return .res as 'object'-file + # for .rc input file + if output_dir is None: + output_dir = '' + obj_names = [] + for src_name in source_filenames: + (base, ext) = os.path.splitext(src_name) + base = os.path.splitdrive(base)[1] # Chop off the drive + base = base[os.path.isabs(base) :] # If abs, chop off leading / + if ext not in self.src_extensions: + # Better to raise an exception instead of silently continuing + # and later complain about sources and targets having + # different lengths + raise CompileError("Don't know how to compile %s" % src_name) + if strip_dir: + base = os.path.basename(base) + if ext in self._rc_extensions: + obj_names.append(os.path.join(output_dir, base + self.res_extension)) + elif ext in self._mc_extensions: + obj_names.append(os.path.join(output_dir, base + self.res_extension)) + else: + obj_names.append(os.path.join(output_dir, base + self.obj_extension)) + return obj_names + + def compile( # noqa: C901 + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None, + ): + + if not self.initialized: + self.initialize() + compile_info = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + macros, objects, extra_postargs, pp_opts, build = compile_info + + compile_opts = extra_preargs or [] + compile_opts.append('/c') + if debug: + compile_opts.extend(self.compile_options_debug) + else: + compile_opts.extend(self.compile_options) + + for obj in objects: + try: + src, ext = build[obj] + except KeyError: + continue + if debug: + # pass the full pathname to MSVC in debug mode, + # this allows the debugger to find the source file + # without asking the user to browse for it + src = os.path.abspath(src) + + if ext in self._c_extensions: + input_opt = "/Tc" + src + elif ext in self._cpp_extensions: + input_opt = "/Tp" + src + elif ext in self._rc_extensions: + # compile .RC to .RES file + input_opt = src + output_opt = "/fo" + obj + try: + self.spawn([self.rc] + pp_opts + [output_opt] + [input_opt]) + except DistutilsExecError as msg: + raise CompileError(msg) + continue + elif ext in self._mc_extensions: + # Compile .MC to .RC file to .RES file. + # * '-h dir' specifies the directory for the + # generated include file + # * '-r dir' specifies the target directory of the + # generated RC file and the binary message resource + # it includes + # + # For now (since there are no options to change this), + # we use the source-directory for the include file and + # the build directory for the RC file and message + # resources. This works at least for win32all. + h_dir = os.path.dirname(src) + rc_dir = os.path.dirname(obj) + try: + # first compile .MC to .RC and .H file + self.spawn([self.mc] + ['-h', h_dir, '-r', rc_dir] + [src]) + base, _ = os.path.splitext(os.path.basename(src)) + rc_file = os.path.join(rc_dir, base + '.rc') + # then compile .RC to .RES file + self.spawn([self.rc] + ["/fo" + obj] + [rc_file]) + + except DistutilsExecError as msg: + raise CompileError(msg) + continue + else: + # how to handle this file? + raise CompileError( + "Don't know how to compile {} to {}".format(src, obj) + ) + + output_opt = "/Fo" + obj + try: + self.spawn( + [self.cc] + + compile_opts + + pp_opts + + [input_opt, output_opt] + + extra_postargs + ) + except DistutilsExecError as msg: + raise CompileError(msg) + + return objects + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=0, target_lang=None + ): + + if not self.initialized: + self.initialize() + (objects, output_dir) = self._fix_object_args(objects, output_dir) + output_filename = self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + lib_args = objects + ['/OUT:' + output_filename] + if debug: + pass # XXX what goes here? + try: + self.spawn([self.lib] + lib_args) + except DistutilsExecError as msg: + raise LibError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def link( # noqa: C901 + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + + if not self.initialized: + self.initialize() + (objects, output_dir) = self._fix_object_args(objects, output_dir) + fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) + (libraries, library_dirs, runtime_library_dirs) = fixed_args + + if runtime_library_dirs: + self.warn( + "I don't know what to do with 'runtime_library_dirs': " + + str(runtime_library_dirs) + ) + + lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries) + if output_dir is not None: + output_filename = os.path.join(output_dir, output_filename) + + if self._need_link(objects, output_filename): + if target_desc == CCompiler.EXECUTABLE: + if debug: + ldflags = self.ldflags_shared_debug[1:] + else: + ldflags = self.ldflags_shared[1:] + else: + if debug: + ldflags = self.ldflags_shared_debug + else: + ldflags = self.ldflags_shared + + export_opts = [] + for sym in export_symbols or []: + export_opts.append("/EXPORT:" + sym) + + ld_args = ( + ldflags + lib_opts + export_opts + objects + ['/OUT:' + output_filename] + ) + + # The MSVC linker generates .lib and .exp files, which cannot be + # suppressed by any linker switches. The .lib files may even be + # needed! Make sure they are generated in the temporary build + # directory. Since they have different names for debug and release + # builds, they can go into the same directory. + if export_symbols is not None: + (dll_name, dll_ext) = os.path.splitext( + os.path.basename(output_filename) + ) + implib_file = os.path.join( + os.path.dirname(objects[0]), self.library_filename(dll_name) + ) + ld_args.append('/IMPLIB:' + implib_file) + + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + + self.mkpath(os.path.dirname(output_filename)) + try: + self.spawn([self.linker] + ld_args) + except DistutilsExecError as msg: + raise LinkError(msg) + + else: + log.debug("skipping %s (up-to-date)", output_filename) + + # -- Miscellaneous methods ----------------------------------------- + # These are all used by the 'gen_lib_options() function, in + # ccompiler.py. + + def library_dir_option(self, dir): + return "/LIBPATH:" + dir + + def runtime_library_dir_option(self, dir): + raise DistutilsPlatformError( + "don't know how to set runtime library search path for MSVC++" + ) + + def library_option(self, lib): + return self.library_filename(lib) + + def find_library_file(self, dirs, lib, debug=0): + # Prefer a debugging library if found (and requested), but deal + # with it if we don't have one. + if debug: + try_names = [lib + "_d", lib] + else: + try_names = [lib] + for dir in dirs: + for name in try_names: + libfile = os.path.join(dir, self.library_filename(name)) + if os.path.exists(libfile): + return libfile + else: + # Oops, didn't find it in *any* of 'dirs' + return None + + # Helper methods for using the MSVC registry settings + + def find_exe(self, exe): + """Return path to an MSVC executable program. + + Tries to find the program in several places: first, one of the + MSVC program search paths from the registry; next, the directories + in the PATH environment variable. If any of those work, return an + absolute path that is known to exist. If none of them work, just + return the original program name, 'exe'. + """ + for p in self.__paths: + fn = os.path.join(os.path.abspath(p), exe) + if os.path.isfile(fn): + return fn + + # didn't find it; try existing path + for p in os.environ['Path'].split(';'): + fn = os.path.join(os.path.abspath(p), exe) + if os.path.isfile(fn): + return fn + + return exe + + def get_msvc_paths(self, path, platform='x86'): + """Get a list of devstudio directories (include, lib or path). + + Return a list of strings. The list will be empty if unable to + access the registry or appropriate registry keys not found. + """ + if not _can_read_reg: + return [] + + path = path + " dirs" + if self.__version >= 7: + key = r"{}\{:0.1f}\VC\VC_OBJECTS_PLATFORM_INFO\Win32\Directories".format( + self.__root, + self.__version, + ) + else: + key = ( + r"%s\6.0\Build System\Components\Platforms" + r"\Win32 (%s)\Directories" % (self.__root, platform) + ) + + for base in HKEYS: + d = read_values(base, key) + if d: + if self.__version >= 7: + return self.__macros.sub(d[path]).split(";") + else: + return d[path].split(";") + # MSVC 6 seems to create the registry entries we need only when + # the GUI is run. + if self.__version == 6: + for base in HKEYS: + if read_values(base, r"%s\6.0" % self.__root) is not None: + self.warn( + "It seems you have Visual Studio 6 installed, " + "but the expected registry settings are not present.\n" + "You must at least run the Visual Studio GUI once " + "so that these entries are created." + ) + break + return [] + + def set_path_env_var(self, name): + """Set environment variable 'name' to an MSVC path type value. + + This is equivalent to a SET command prior to execution of spawned + commands. + """ + + if name == "lib": + p = self.get_msvc_paths("library") + else: + p = self.get_msvc_paths(name) + if p: + os.environ[name] = ';'.join(p) + + +if get_build_version() >= 8.0: + log.debug("Importing new compiler from distutils.msvc9compiler") + OldMSVCCompiler = MSVCCompiler + from distutils.msvc9compiler import MSVCCompiler + + # get_build_architecture not really relevant now we support cross-compile + from distutils.msvc9compiler import MacroExpander # noqa: F811 diff --git a/libs/common/setuptools/_distutils/py38compat.py b/libs/common/setuptools/_distutils/py38compat.py new file mode 100644 index 00000000..59224e71 --- /dev/null +++ b/libs/common/setuptools/_distutils/py38compat.py @@ -0,0 +1,8 @@ +def aix_platform(osname, version, release): + try: + import _aix_support + + return _aix_support.aix_platform() + except ImportError: + pass + return "{}-{}.{}".format(osname, version, release) diff --git a/libs/common/setuptools/_distutils/py39compat.py b/libs/common/setuptools/_distutils/py39compat.py new file mode 100644 index 00000000..c43e5f10 --- /dev/null +++ b/libs/common/setuptools/_distutils/py39compat.py @@ -0,0 +1,22 @@ +import sys +import platform + + +def add_ext_suffix_39(vars): + """ + Ensure vars contains 'EXT_SUFFIX'. pypa/distutils#130 + """ + import _imp + + ext_suffix = _imp.extension_suffixes()[0] + vars.update( + EXT_SUFFIX=ext_suffix, + # sysconfig sets SO to match EXT_SUFFIX, so maintain + # that expectation. + # https://github.com/python/cpython/blob/785cc6770588de087d09e89a69110af2542be208/Lib/sysconfig.py#L671-L673 + SO=ext_suffix, + ) + + +needs_ext_suffix = sys.version_info < (3, 10) and platform.system() == 'Windows' +add_ext_suffix = add_ext_suffix_39 if needs_ext_suffix else lambda vars: None diff --git a/libs/common/setuptools/_distutils/spawn.py b/libs/common/setuptools/_distutils/spawn.py new file mode 100644 index 00000000..afefe525 --- /dev/null +++ b/libs/common/setuptools/_distutils/spawn.py @@ -0,0 +1,109 @@ +"""distutils.spawn + +Provides the 'spawn()' function, a front-end to various platform- +specific functions for launching another program in a sub-process. +Also provides the 'find_executable()' to search the path for a given +executable name. +""" + +import sys +import os +import subprocess + +from .errors import DistutilsExecError +from .debug import DEBUG +from ._log import log + + +def spawn(cmd, search_path=1, verbose=0, dry_run=0, env=None): # noqa: C901 + """Run another program, specified as a command list 'cmd', in a new process. + + 'cmd' is just the argument list for the new process, ie. + cmd[0] is the program to run and cmd[1:] are the rest of its arguments. + There is no way to run a program with a name different from that of its + executable. + + If 'search_path' is true (the default), the system's executable + search path will be used to find the program; otherwise, cmd[0] + must be the exact path to the executable. If 'dry_run' is true, + the command will not actually be run. + + Raise DistutilsExecError if running the program fails in any way; just + return on success. + """ + # cmd is documented as a list, but just in case some code passes a tuple + # in, protect our %-formatting code against horrible death + cmd = list(cmd) + + log.info(subprocess.list2cmdline(cmd)) + if dry_run: + return + + if search_path: + executable = find_executable(cmd[0]) + if executable is not None: + cmd[0] = executable + + env = env if env is not None else dict(os.environ) + + if sys.platform == 'darwin': + from distutils.util import MACOSX_VERSION_VAR, get_macosx_target_ver + + macosx_target_ver = get_macosx_target_ver() + if macosx_target_ver: + env[MACOSX_VERSION_VAR] = macosx_target_ver + + try: + proc = subprocess.Popen(cmd, env=env) + proc.wait() + exitcode = proc.returncode + except OSError as exc: + if not DEBUG: + cmd = cmd[0] + raise DistutilsExecError( + "command {!r} failed: {}".format(cmd, exc.args[-1]) + ) from exc + + if exitcode: + if not DEBUG: + cmd = cmd[0] + raise DistutilsExecError( + "command {!r} failed with exit code {}".format(cmd, exitcode) + ) + + +def find_executable(executable, path=None): + """Tries to find 'executable' in the directories listed in 'path'. + + A string listing directories separated by 'os.pathsep'; defaults to + os.environ['PATH']. Returns the complete filename or None if not found. + """ + _, ext = os.path.splitext(executable) + if (sys.platform == 'win32') and (ext != '.exe'): + executable = executable + '.exe' + + if os.path.isfile(executable): + return executable + + if path is None: + path = os.environ.get('PATH', None) + if path is None: + try: + path = os.confstr("CS_PATH") + except (AttributeError, ValueError): + # os.confstr() or CS_PATH is not available + path = os.defpath + # bpo-35755: Don't use os.defpath if the PATH environment variable is + # set to an empty string + + # PATH='' doesn't match, whereas PATH=':' looks in the current directory + if not path: + return None + + paths = path.split(os.pathsep) + for p in paths: + f = os.path.join(p, executable) + if os.path.isfile(f): + # the file exists, we have a shot at spawn working + return f + return None diff --git a/libs/common/setuptools/_distutils/sysconfig.py b/libs/common/setuptools/_distutils/sysconfig.py new file mode 100644 index 00000000..0ec69366 --- /dev/null +++ b/libs/common/setuptools/_distutils/sysconfig.py @@ -0,0 +1,552 @@ +"""Provide access to Python's configuration information. The specific +configuration variables available depend heavily on the platform and +configuration. The values may be retrieved using +get_config_var(name), and the list of variables is available via +get_config_vars().keys(). Additional convenience functions are also +available. + +Written by: Fred L. Drake, Jr. +Email: +""" + +import os +import re +import sys +import sysconfig +import pathlib + +from .errors import DistutilsPlatformError +from . import py39compat +from ._functools import pass_none + +IS_PYPY = '__pypy__' in sys.builtin_module_names + +# These are needed in a couple of spots, so just compute them once. +PREFIX = os.path.normpath(sys.prefix) +EXEC_PREFIX = os.path.normpath(sys.exec_prefix) +BASE_PREFIX = os.path.normpath(sys.base_prefix) +BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix) + +# Path to the base directory of the project. On Windows the binary may +# live in project/PCbuild/win32 or project/PCbuild/amd64. +# set for cross builds +if "_PYTHON_PROJECT_BASE" in os.environ: + project_base = os.path.abspath(os.environ["_PYTHON_PROJECT_BASE"]) +else: + if sys.executable: + project_base = os.path.dirname(os.path.abspath(sys.executable)) + else: + # sys.executable can be empty if argv[0] has been changed and Python is + # unable to retrieve the real program name + project_base = os.getcwd() + + +def _is_python_source_dir(d): + """ + Return True if the target directory appears to point to an + un-installed Python. + """ + modules = pathlib.Path(d).joinpath('Modules') + return any(modules.joinpath(fn).is_file() for fn in ('Setup', 'Setup.local')) + + +_sys_home = getattr(sys, '_home', None) + + +def _is_parent(dir_a, dir_b): + """ + Return True if a is a parent of b. + """ + return os.path.normcase(dir_a).startswith(os.path.normcase(dir_b)) + + +if os.name == 'nt': + + @pass_none + def _fix_pcbuild(d): + # In a venv, sys._home will be inside BASE_PREFIX rather than PREFIX. + prefixes = PREFIX, BASE_PREFIX + matched = ( + prefix + for prefix in prefixes + if _is_parent(d, os.path.join(prefix, "PCbuild")) + ) + return next(matched, d) + + project_base = _fix_pcbuild(project_base) + _sys_home = _fix_pcbuild(_sys_home) + + +def _python_build(): + if _sys_home: + return _is_python_source_dir(_sys_home) + return _is_python_source_dir(project_base) + + +python_build = _python_build() + + +# Calculate the build qualifier flags if they are defined. Adding the flags +# to the include and lib directories only makes sense for an installation, not +# an in-source build. +build_flags = '' +try: + if not python_build: + build_flags = sys.abiflags +except AttributeError: + # It's not a configure-based build, so the sys module doesn't have + # this attribute, which is fine. + pass + + +def get_python_version(): + """Return a string containing the major and minor Python version, + leaving off the patchlevel. Sample return values could be '1.5' + or '2.2'. + """ + return '%d.%d' % sys.version_info[:2] + + +def get_python_inc(plat_specific=0, prefix=None): + """Return the directory containing installed Python header files. + + If 'plat_specific' is false (the default), this is the path to the + non-platform-specific header files, i.e. Python.h and so on; + otherwise, this is the path to platform-specific header files + (namely pyconfig.h). + + If 'prefix' is supplied, use it instead of sys.base_prefix or + sys.base_exec_prefix -- i.e., ignore 'plat_specific'. + """ + default_prefix = BASE_EXEC_PREFIX if plat_specific else BASE_PREFIX + resolved_prefix = prefix if prefix is not None else default_prefix + try: + getter = globals()[f'_get_python_inc_{os.name}'] + except KeyError: + raise DistutilsPlatformError( + "I don't know where Python installs its C header files " + "on platform '%s'" % os.name + ) + return getter(resolved_prefix, prefix, plat_specific) + + +def _get_python_inc_posix(prefix, spec_prefix, plat_specific): + if IS_PYPY and sys.version_info < (3, 8): + return os.path.join(prefix, 'include') + return ( + _get_python_inc_posix_python(plat_specific) + or _get_python_inc_from_config(plat_specific, spec_prefix) + or _get_python_inc_posix_prefix(prefix) + ) + + +def _get_python_inc_posix_python(plat_specific): + """ + Assume the executable is in the build directory. The + pyconfig.h file should be in the same directory. Since + the build directory may not be the source directory, + use "srcdir" from the makefile to find the "Include" + directory. + """ + if not python_build: + return + if plat_specific: + return _sys_home or project_base + incdir = os.path.join(get_config_var('srcdir'), 'Include') + return os.path.normpath(incdir) + + +def _get_python_inc_from_config(plat_specific, spec_prefix): + """ + If no prefix was explicitly specified, provide the include + directory from the config vars. Useful when + cross-compiling, since the config vars may come from + the host + platform Python installation, while the current Python + executable is from the build platform installation. + + >>> monkeypatch = getfixture('monkeypatch') + >>> gpifc = _get_python_inc_from_config + >>> monkeypatch.setitem(gpifc.__globals__, 'get_config_var', str.lower) + >>> gpifc(False, '/usr/bin/') + >>> gpifc(False, '') + >>> gpifc(False, None) + 'includepy' + >>> gpifc(True, None) + 'confincludepy' + """ + if spec_prefix is None: + return get_config_var('CONF' * plat_specific + 'INCLUDEPY') + + +def _get_python_inc_posix_prefix(prefix): + implementation = 'pypy' if IS_PYPY else 'python' + python_dir = implementation + get_python_version() + build_flags + return os.path.join(prefix, "include", python_dir) + + +def _get_python_inc_nt(prefix, spec_prefix, plat_specific): + if python_build: + # Include both the include and PC dir to ensure we can find + # pyconfig.h + return ( + os.path.join(prefix, "include") + + os.path.pathsep + + os.path.join(prefix, "PC") + ) + return os.path.join(prefix, "include") + + +# allow this behavior to be monkey-patched. Ref pypa/distutils#2. +def _posix_lib(standard_lib, libpython, early_prefix, prefix): + if standard_lib: + return libpython + else: + return os.path.join(libpython, "site-packages") + + +def get_python_lib(plat_specific=0, standard_lib=0, prefix=None): + """Return the directory containing the Python library (standard or + site additions). + + If 'plat_specific' is true, return the directory containing + platform-specific modules, i.e. any module from a non-pure-Python + module distribution; otherwise, return the platform-shared library + directory. If 'standard_lib' is true, return the directory + containing standard Python library modules; otherwise, return the + directory for site-specific modules. + + If 'prefix' is supplied, use it instead of sys.base_prefix or + sys.base_exec_prefix -- i.e., ignore 'plat_specific'. + """ + + if IS_PYPY and sys.version_info < (3, 8): + # PyPy-specific schema + if prefix is None: + prefix = PREFIX + if standard_lib: + return os.path.join(prefix, "lib-python", sys.version[0]) + return os.path.join(prefix, 'site-packages') + + early_prefix = prefix + + if prefix is None: + if standard_lib: + prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX + else: + prefix = plat_specific and EXEC_PREFIX or PREFIX + + if os.name == "posix": + if plat_specific or standard_lib: + # Platform-specific modules (any module from a non-pure-Python + # module distribution) or standard Python library modules. + libdir = getattr(sys, "platlibdir", "lib") + else: + # Pure Python + libdir = "lib" + implementation = 'pypy' if IS_PYPY else 'python' + libpython = os.path.join(prefix, libdir, implementation + get_python_version()) + return _posix_lib(standard_lib, libpython, early_prefix, prefix) + elif os.name == "nt": + if standard_lib: + return os.path.join(prefix, "Lib") + else: + return os.path.join(prefix, "Lib", "site-packages") + else: + raise DistutilsPlatformError( + "I don't know where Python installs its library " + "on platform '%s'" % os.name + ) + + +def customize_compiler(compiler): # noqa: C901 + """Do any platform-specific customization of a CCompiler instance. + + Mainly needed on Unix, so we can plug in the information that + varies across Unices and is stored in Python's Makefile. + """ + if compiler.compiler_type == "unix": + if sys.platform == "darwin": + # Perform first-time customization of compiler-related + # config vars on OS X now that we know we need a compiler. + # This is primarily to support Pythons from binary + # installers. The kind and paths to build tools on + # the user system may vary significantly from the system + # that Python itself was built on. Also the user OS + # version and build tools may not support the same set + # of CPU architectures for universal builds. + global _config_vars + # Use get_config_var() to ensure _config_vars is initialized. + if not get_config_var('CUSTOMIZED_OSX_COMPILER'): + import _osx_support + + _osx_support.customize_compiler(_config_vars) + _config_vars['CUSTOMIZED_OSX_COMPILER'] = 'True' + + ( + cc, + cxx, + cflags, + ccshared, + ldshared, + shlib_suffix, + ar, + ar_flags, + ) = get_config_vars( + 'CC', + 'CXX', + 'CFLAGS', + 'CCSHARED', + 'LDSHARED', + 'SHLIB_SUFFIX', + 'AR', + 'ARFLAGS', + ) + + if 'CC' in os.environ: + newcc = os.environ['CC'] + if 'LDSHARED' not in os.environ and ldshared.startswith(cc): + # If CC is overridden, use that as the default + # command for LDSHARED as well + ldshared = newcc + ldshared[len(cc) :] + cc = newcc + if 'CXX' in os.environ: + cxx = os.environ['CXX'] + if 'LDSHARED' in os.environ: + ldshared = os.environ['LDSHARED'] + if 'CPP' in os.environ: + cpp = os.environ['CPP'] + else: + cpp = cc + " -E" # not always + if 'LDFLAGS' in os.environ: + ldshared = ldshared + ' ' + os.environ['LDFLAGS'] + if 'CFLAGS' in os.environ: + cflags = cflags + ' ' + os.environ['CFLAGS'] + ldshared = ldshared + ' ' + os.environ['CFLAGS'] + if 'CPPFLAGS' in os.environ: + cpp = cpp + ' ' + os.environ['CPPFLAGS'] + cflags = cflags + ' ' + os.environ['CPPFLAGS'] + ldshared = ldshared + ' ' + os.environ['CPPFLAGS'] + if 'AR' in os.environ: + ar = os.environ['AR'] + if 'ARFLAGS' in os.environ: + archiver = ar + ' ' + os.environ['ARFLAGS'] + else: + archiver = ar + ' ' + ar_flags + + cc_cmd = cc + ' ' + cflags + compiler.set_executables( + preprocessor=cpp, + compiler=cc_cmd, + compiler_so=cc_cmd + ' ' + ccshared, + compiler_cxx=cxx, + linker_so=ldshared, + linker_exe=cc, + archiver=archiver, + ) + + if 'RANLIB' in os.environ and compiler.executables.get('ranlib', None): + compiler.set_executables(ranlib=os.environ['RANLIB']) + + compiler.shared_lib_extension = shlib_suffix + + +def get_config_h_filename(): + """Return full pathname of installed pyconfig.h file.""" + if python_build: + if os.name == "nt": + inc_dir = os.path.join(_sys_home or project_base, "PC") + else: + inc_dir = _sys_home or project_base + return os.path.join(inc_dir, 'pyconfig.h') + else: + return sysconfig.get_config_h_filename() + + +def get_makefile_filename(): + """Return full pathname of installed Makefile from the Python build.""" + return sysconfig.get_makefile_filename() + + +def parse_config_h(fp, g=None): + """Parse a config.h-style file. + + A dictionary containing name/value pairs is returned. If an + optional dictionary is passed in as the second argument, it is + used instead of a new dictionary. + """ + return sysconfig.parse_config_h(fp, vars=g) + + +# Regexes needed for parsing Makefile (and similar syntaxes, +# like old-style Setup files). +_variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") +_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") +_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") + + +def parse_makefile(fn, g=None): # noqa: C901 + """Parse a Makefile-style file. + + A dictionary containing name/value pairs is returned. If an + optional dictionary is passed in as the second argument, it is + used instead of a new dictionary. + """ + from distutils.text_file import TextFile + + fp = TextFile( + fn, strip_comments=1, skip_blanks=1, join_lines=1, errors="surrogateescape" + ) + + if g is None: + g = {} + done = {} + notdone = {} + + while True: + line = fp.readline() + if line is None: # eof + break + m = _variable_rx.match(line) + if m: + n, v = m.group(1, 2) + v = v.strip() + # `$$' is a literal `$' in make + tmpv = v.replace('$$', '') + + if "$" in tmpv: + notdone[n] = v + else: + try: + v = int(v) + except ValueError: + # insert literal `$' + done[n] = v.replace('$$', '$') + else: + done[n] = v + + # Variables with a 'PY_' prefix in the makefile. These need to + # be made available without that prefix through sysconfig. + # Special care is needed to ensure that variable expansion works, even + # if the expansion uses the name without a prefix. + renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') + + # do variable interpolation here + while notdone: + for name in list(notdone): + value = notdone[name] + m = _findvar1_rx.search(value) or _findvar2_rx.search(value) + if m: + n = m.group(1) + found = True + if n in done: + item = str(done[n]) + elif n in notdone: + # get it on a subsequent round + found = False + elif n in os.environ: + # do it like make: fall back to environment + item = os.environ[n] + + elif n in renamed_variables: + if name.startswith('PY_') and name[3:] in renamed_variables: + item = "" + + elif 'PY_' + n in notdone: + found = False + + else: + item = str(done['PY_' + n]) + else: + done[n] = item = "" + if found: + after = value[m.end() :] + value = value[: m.start()] + item + after + if "$" in after: + notdone[name] = value + else: + try: + value = int(value) + except ValueError: + done[name] = value.strip() + else: + done[name] = value + del notdone[name] + + if name.startswith('PY_') and name[3:] in renamed_variables: + + name = name[3:] + if name not in done: + done[name] = value + else: + # bogus variable reference; just drop it since we can't deal + del notdone[name] + + fp.close() + + # strip spurious spaces + for k, v in done.items(): + if isinstance(v, str): + done[k] = v.strip() + + # save the results in the global dictionary + g.update(done) + return g + + +def expand_makefile_vars(s, vars): + """Expand Makefile-style variables -- "${foo}" or "$(foo)" -- in + 'string' according to 'vars' (a dictionary mapping variable names to + values). Variables not present in 'vars' are silently expanded to the + empty string. The variable values in 'vars' should not contain further + variable expansions; if 'vars' is the output of 'parse_makefile()', + you're fine. Returns a variable-expanded version of 's'. + """ + + # This algorithm does multiple expansion, so if vars['foo'] contains + # "${bar}", it will expand ${foo} to ${bar}, and then expand + # ${bar}... and so forth. This is fine as long as 'vars' comes from + # 'parse_makefile()', which takes care of such expansions eagerly, + # according to make's variable expansion semantics. + + while True: + m = _findvar1_rx.search(s) or _findvar2_rx.search(s) + if m: + (beg, end) = m.span() + s = s[0:beg] + vars.get(m.group(1)) + s[end:] + else: + break + return s + + +_config_vars = None + + +def get_config_vars(*args): + """With no arguments, return a dictionary of all configuration + variables relevant for the current platform. Generally this includes + everything needed to build extensions and install both pure modules and + extensions. On Unix, this means every variable defined in Python's + installed Makefile; on Windows it's a much smaller set. + + With arguments, return a list of values that result from looking up + each argument in the configuration variable dictionary. + """ + global _config_vars + if _config_vars is None: + _config_vars = sysconfig.get_config_vars().copy() + py39compat.add_ext_suffix(_config_vars) + + return [_config_vars.get(name) for name in args] if args else _config_vars + + +def get_config_var(name): + """Return the value of a single variable using the dictionary + returned by 'get_config_vars()'. Equivalent to + get_config_vars().get(name) + """ + if name == 'SO': + import warnings + + warnings.warn('SO is deprecated, use EXT_SUFFIX', DeprecationWarning, 2) + return get_config_vars().get(name) diff --git a/libs/common/setuptools/_distutils/text_file.py b/libs/common/setuptools/_distutils/text_file.py new file mode 100644 index 00000000..7274d4b1 --- /dev/null +++ b/libs/common/setuptools/_distutils/text_file.py @@ -0,0 +1,287 @@ +"""text_file + +provides the TextFile class, which gives an interface to text files +that (optionally) takes care of stripping comments, ignoring blank +lines, and joining lines with backslashes.""" + +import sys + + +class TextFile: + """Provides a file-like object that takes care of all the things you + commonly want to do when processing a text file that has some + line-by-line syntax: strip comments (as long as "#" is your + comment character), skip blank lines, join adjacent lines by + escaping the newline (ie. backslash at end of line), strip + leading and/or trailing whitespace. All of these are optional + and independently controllable. + + Provides a 'warn()' method so you can generate warning messages that + report physical line number, even if the logical line in question + spans multiple physical lines. Also provides 'unreadline()' for + implementing line-at-a-time lookahead. + + Constructor is called as: + + TextFile (filename=None, file=None, **options) + + It bombs (RuntimeError) if both 'filename' and 'file' are None; + 'filename' should be a string, and 'file' a file object (or + something that provides 'readline()' and 'close()' methods). It is + recommended that you supply at least 'filename', so that TextFile + can include it in warning messages. If 'file' is not supplied, + TextFile creates its own using 'io.open()'. + + The options are all boolean, and affect the value returned by + 'readline()': + strip_comments [default: true] + strip from "#" to end-of-line, as well as any whitespace + leading up to the "#" -- unless it is escaped by a backslash + lstrip_ws [default: false] + strip leading whitespace from each line before returning it + rstrip_ws [default: true] + strip trailing whitespace (including line terminator!) from + each line before returning it + skip_blanks [default: true} + skip lines that are empty *after* stripping comments and + whitespace. (If both lstrip_ws and rstrip_ws are false, + then some lines may consist of solely whitespace: these will + *not* be skipped, even if 'skip_blanks' is true.) + join_lines [default: false] + if a backslash is the last non-newline character on a line + after stripping comments and whitespace, join the following line + to it to form one "logical line"; if N consecutive lines end + with a backslash, then N+1 physical lines will be joined to + form one logical line. + collapse_join [default: false] + strip leading whitespace from lines that are joined to their + predecessor; only matters if (join_lines and not lstrip_ws) + errors [default: 'strict'] + error handler used to decode the file content + + Note that since 'rstrip_ws' can strip the trailing newline, the + semantics of 'readline()' must differ from those of the builtin file + object's 'readline()' method! In particular, 'readline()' returns + None for end-of-file: an empty string might just be a blank line (or + an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is + not.""" + + default_options = { + 'strip_comments': 1, + 'skip_blanks': 1, + 'lstrip_ws': 0, + 'rstrip_ws': 1, + 'join_lines': 0, + 'collapse_join': 0, + 'errors': 'strict', + } + + def __init__(self, filename=None, file=None, **options): + """Construct a new TextFile object. At least one of 'filename' + (a string) and 'file' (a file-like object) must be supplied. + They keyword argument options are described above and affect + the values returned by 'readline()'.""" + if filename is None and file is None: + raise RuntimeError( + "you must supply either or both of 'filename' and 'file'" + ) + + # set values for all options -- either from client option hash + # or fallback to default_options + for opt in self.default_options.keys(): + if opt in options: + setattr(self, opt, options[opt]) + else: + setattr(self, opt, self.default_options[opt]) + + # sanity check client option hash + for opt in options.keys(): + if opt not in self.default_options: + raise KeyError("invalid TextFile option '%s'" % opt) + + if file is None: + self.open(filename) + else: + self.filename = filename + self.file = file + self.current_line = 0 # assuming that file is at BOF! + + # 'linebuf' is a stack of lines that will be emptied before we + # actually read from the file; it's only populated by an + # 'unreadline()' operation + self.linebuf = [] + + def open(self, filename): + """Open a new file named 'filename'. This overrides both the + 'filename' and 'file' arguments to the constructor.""" + self.filename = filename + self.file = open(self.filename, errors=self.errors) + self.current_line = 0 + + def close(self): + """Close the current file and forget everything we know about it + (filename, current line number).""" + file = self.file + self.file = None + self.filename = None + self.current_line = None + file.close() + + def gen_error(self, msg, line=None): + outmsg = [] + if line is None: + line = self.current_line + outmsg.append(self.filename + ", ") + if isinstance(line, (list, tuple)): + outmsg.append("lines %d-%d: " % tuple(line)) + else: + outmsg.append("line %d: " % line) + outmsg.append(str(msg)) + return "".join(outmsg) + + def error(self, msg, line=None): + raise ValueError("error: " + self.gen_error(msg, line)) + + def warn(self, msg, line=None): + """Print (to stderr) a warning message tied to the current logical + line in the current file. If the current logical line in the + file spans multiple physical lines, the warning refers to the + whole range, eg. "lines 3-5". If 'line' supplied, it overrides + the current line number; it may be a list or tuple to indicate a + range of physical lines, or an integer for a single physical + line.""" + sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n") + + def readline(self): # noqa: C901 + """Read and return a single logical line from the current file (or + from an internal buffer if lines have previously been "unread" + with 'unreadline()'). If the 'join_lines' option is true, this + may involve reading multiple physical lines concatenated into a + single string. Updates the current line number, so calling + 'warn()' after 'readline()' emits a warning about the physical + line(s) just read. Returns None on end-of-file, since the empty + string can occur if 'rstrip_ws' is true but 'strip_blanks' is + not.""" + # If any "unread" lines waiting in 'linebuf', return the top + # one. (We don't actually buffer read-ahead data -- lines only + # get put in 'linebuf' if the client explicitly does an + # 'unreadline()'. + if self.linebuf: + line = self.linebuf[-1] + del self.linebuf[-1] + return line + + buildup_line = '' + + while True: + # read the line, make it None if EOF + line = self.file.readline() + if line == '': + line = None + + if self.strip_comments and line: + + # Look for the first "#" in the line. If none, never + # mind. If we find one and it's the first character, or + # is not preceded by "\", then it starts a comment -- + # strip the comment, strip whitespace before it, and + # carry on. Otherwise, it's just an escaped "#", so + # unescape it (and any other escaped "#"'s that might be + # lurking in there) and otherwise leave the line alone. + + pos = line.find("#") + if pos == -1: # no "#" -- no comments + pass + + # It's definitely a comment -- either "#" is the first + # character, or it's elsewhere and unescaped. + elif pos == 0 or line[pos - 1] != "\\": + # Have to preserve the trailing newline, because it's + # the job of a later step (rstrip_ws) to remove it -- + # and if rstrip_ws is false, we'd better preserve it! + # (NB. this means that if the final line is all comment + # and has no trailing newline, we will think that it's + # EOF; I think that's OK.) + eol = (line[-1] == '\n') and '\n' or '' + line = line[0:pos] + eol + + # If all that's left is whitespace, then skip line + # *now*, before we try to join it to 'buildup_line' -- + # that way constructs like + # hello \\ + # # comment that should be ignored + # there + # result in "hello there". + if line.strip() == "": + continue + else: # it's an escaped "#" + line = line.replace("\\#", "#") + + # did previous line end with a backslash? then accumulate + if self.join_lines and buildup_line: + # oops: end of file + if line is None: + self.warn("continuation line immediately precedes " "end-of-file") + return buildup_line + + if self.collapse_join: + line = line.lstrip() + line = buildup_line + line + + # careful: pay attention to line number when incrementing it + if isinstance(self.current_line, list): + self.current_line[1] = self.current_line[1] + 1 + else: + self.current_line = [self.current_line, self.current_line + 1] + # just an ordinary line, read it as usual + else: + if line is None: # eof + return None + + # still have to be careful about incrementing the line number! + if isinstance(self.current_line, list): + self.current_line = self.current_line[1] + 1 + else: + self.current_line = self.current_line + 1 + + # strip whitespace however the client wants (leading and + # trailing, or one or the other, or neither) + if self.lstrip_ws and self.rstrip_ws: + line = line.strip() + elif self.lstrip_ws: + line = line.lstrip() + elif self.rstrip_ws: + line = line.rstrip() + + # blank line (whether we rstrip'ed or not)? skip to next line + # if appropriate + if (line == '' or line == '\n') and self.skip_blanks: + continue + + if self.join_lines: + if line[-1] == '\\': + buildup_line = line[:-1] + continue + + if line[-2:] == '\\\n': + buildup_line = line[0:-2] + '\n' + continue + + # well, I guess there's some actual content there: return it + return line + + def readlines(self): + """Read and return the list of all logical lines remaining in the + current file.""" + lines = [] + while True: + line = self.readline() + if line is None: + return lines + lines.append(line) + + def unreadline(self, line): + """Push 'line' (a string) onto an internal buffer that will be + checked by future 'readline()' calls. Handy for implementing + a parser with line-at-a-time lookahead.""" + self.linebuf.append(line) diff --git a/libs/common/setuptools/_distutils/unixccompiler.py b/libs/common/setuptools/_distutils/unixccompiler.py new file mode 100644 index 00000000..4bf2e6a6 --- /dev/null +++ b/libs/common/setuptools/_distutils/unixccompiler.py @@ -0,0 +1,401 @@ +"""distutils.unixccompiler + +Contains the UnixCCompiler class, a subclass of CCompiler that handles +the "typical" Unix-style command-line C compiler: + * macros defined with -Dname[=value] + * macros undefined with -Uname + * include search directories specified with -Idir + * libraries specified with -lllib + * library search directories specified with -Ldir + * compile handled by 'cc' (or similar) executable with -c option: + compiles .c to .o + * link static library handled by 'ar' command (possibly with 'ranlib') + * link shared library handled by 'cc -shared' +""" + +import os +import sys +import re +import shlex +import itertools + +from . import sysconfig +from .dep_util import newer +from .ccompiler import CCompiler, gen_preprocess_options, gen_lib_options +from .errors import DistutilsExecError, CompileError, LibError, LinkError +from ._log import log +from ._macos_compat import compiler_fixup + +# XXX Things not currently handled: +# * optimization/debug/warning flags; we just use whatever's in Python's +# Makefile and live with it. Is this adequate? If not, we might +# have to have a bunch of subclasses GNUCCompiler, SGICCompiler, +# SunCCompiler, and I suspect down that road lies madness. +# * even if we don't know a warning flag from an optimization flag, +# we need some way for outsiders to feed preprocessor/compiler/linker +# flags in to us -- eg. a sysadmin might want to mandate certain flags +# via a site config file, or a user might want to set something for +# compiling this module distribution only via the setup.py command +# line, whatever. As long as these options come from something on the +# current system, they can be as system-dependent as they like, and we +# should just happily stuff them into the preprocessor/compiler/linker +# options and carry on. + + +def _split_env(cmd): + """ + For macOS, split command into 'env' portion (if any) + and the rest of the linker command. + + >>> _split_env(['a', 'b', 'c']) + ([], ['a', 'b', 'c']) + >>> _split_env(['/usr/bin/env', 'A=3', 'gcc']) + (['/usr/bin/env', 'A=3'], ['gcc']) + """ + pivot = 0 + if os.path.basename(cmd[0]) == "env": + pivot = 1 + while '=' in cmd[pivot]: + pivot += 1 + return cmd[:pivot], cmd[pivot:] + + +def _split_aix(cmd): + """ + AIX platforms prefix the compiler with the ld_so_aix + script, so split that from the linker command. + + >>> _split_aix(['a', 'b', 'c']) + ([], ['a', 'b', 'c']) + >>> _split_aix(['/bin/foo/ld_so_aix', 'gcc']) + (['/bin/foo/ld_so_aix'], ['gcc']) + """ + pivot = os.path.basename(cmd[0]) == 'ld_so_aix' + return cmd[:pivot], cmd[pivot:] + + +def _linker_params(linker_cmd, compiler_cmd): + """ + The linker command usually begins with the compiler + command (possibly multiple elements), followed by zero or more + params for shared library building. + + If the LDSHARED env variable overrides the linker command, + however, the commands may not match. + + Return the best guess of the linker parameters by stripping + the linker command. If the compiler command does not + match the linker command, assume the linker command is + just the first element. + + >>> _linker_params('gcc foo bar'.split(), ['gcc']) + ['foo', 'bar'] + >>> _linker_params('gcc foo bar'.split(), ['other']) + ['foo', 'bar'] + >>> _linker_params('ccache gcc foo bar'.split(), 'ccache gcc'.split()) + ['foo', 'bar'] + >>> _linker_params(['gcc'], ['gcc']) + [] + """ + c_len = len(compiler_cmd) + pivot = c_len if linker_cmd[:c_len] == compiler_cmd else 1 + return linker_cmd[pivot:] + + +class UnixCCompiler(CCompiler): + + compiler_type = 'unix' + + # These are used by CCompiler in two places: the constructor sets + # instance attributes 'preprocessor', 'compiler', etc. from them, and + # 'set_executable()' allows any of these to be set. The defaults here + # are pretty generic; they will probably have to be set by an outsider + # (eg. using information discovered by the sysconfig about building + # Python extensions). + executables = { + 'preprocessor': None, + 'compiler': ["cc"], + 'compiler_so': ["cc"], + 'compiler_cxx': ["cc"], + 'linker_so': ["cc", "-shared"], + 'linker_exe': ["cc"], + 'archiver': ["ar", "-cr"], + 'ranlib': None, + } + + if sys.platform[:6] == "darwin": + executables['ranlib'] = ["ranlib"] + + # Needed for the filename generation methods provided by the base + # class, CCompiler. NB. whoever instantiates/uses a particular + # UnixCCompiler instance should set 'shared_lib_ext' -- we set a + # reasonable common default here, but it's not necessarily used on all + # Unices! + + src_extensions = [".c", ".C", ".cc", ".cxx", ".cpp", ".m"] + obj_extension = ".o" + static_lib_extension = ".a" + shared_lib_extension = ".so" + dylib_lib_extension = ".dylib" + xcode_stub_lib_extension = ".tbd" + static_lib_format = shared_lib_format = dylib_lib_format = "lib%s%s" + xcode_stub_lib_format = dylib_lib_format + if sys.platform == "cygwin": + exe_extension = ".exe" + + def preprocess( + self, + source, + output_file=None, + macros=None, + include_dirs=None, + extra_preargs=None, + extra_postargs=None, + ): + fixed_args = self._fix_compile_args(None, macros, include_dirs) + ignore, macros, include_dirs = fixed_args + pp_opts = gen_preprocess_options(macros, include_dirs) + pp_args = self.preprocessor + pp_opts + if output_file: + pp_args.extend(['-o', output_file]) + if extra_preargs: + pp_args[:0] = extra_preargs + if extra_postargs: + pp_args.extend(extra_postargs) + pp_args.append(source) + + # reasons to preprocess: + # - force is indicated + # - output is directed to stdout + # - source file is newer than the target + preprocess = self.force or output_file is None or newer(source, output_file) + if not preprocess: + return + + if output_file: + self.mkpath(os.path.dirname(output_file)) + + try: + self.spawn(pp_args) + except DistutilsExecError as msg: + raise CompileError(msg) + + def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): + compiler_so = compiler_fixup(self.compiler_so, cc_args + extra_postargs) + try: + self.spawn(compiler_so + cc_args + [src, '-o', obj] + extra_postargs) + except DistutilsExecError as msg: + raise CompileError(msg) + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=0, target_lang=None + ): + objects, output_dir = self._fix_object_args(objects, output_dir) + + output_filename = self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + self.mkpath(os.path.dirname(output_filename)) + self.spawn(self.archiver + [output_filename] + objects + self.objects) + + # Not many Unices required ranlib anymore -- SunOS 4.x is, I + # think the only major Unix that does. Maybe we need some + # platform intelligence here to skip ranlib if it's not + # needed -- or maybe Python's configure script took care of + # it for us, hence the check for leading colon. + if self.ranlib: + try: + self.spawn(self.ranlib + [output_filename]) + except DistutilsExecError as msg: + raise LibError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def link( + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + objects, output_dir = self._fix_object_args(objects, output_dir) + fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) + libraries, library_dirs, runtime_library_dirs = fixed_args + + lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries) + if not isinstance(output_dir, (str, type(None))): + raise TypeError("'output_dir' must be a string or None") + if output_dir is not None: + output_filename = os.path.join(output_dir, output_filename) + + if self._need_link(objects, output_filename): + ld_args = objects + self.objects + lib_opts + ['-o', output_filename] + if debug: + ld_args[:0] = ['-g'] + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + self.mkpath(os.path.dirname(output_filename)) + try: + # Select a linker based on context: linker_exe when + # building an executable or linker_so (with shared options) + # when building a shared library. + building_exe = target_desc == CCompiler.EXECUTABLE + linker = (self.linker_exe if building_exe else self.linker_so)[:] + + if target_lang == "c++" and self.compiler_cxx: + env, linker_ne = _split_env(linker) + aix, linker_na = _split_aix(linker_ne) + _, compiler_cxx_ne = _split_env(self.compiler_cxx) + _, linker_exe_ne = _split_env(self.linker_exe) + + params = _linker_params(linker_na, linker_exe_ne) + linker = env + aix + compiler_cxx_ne + params + + linker = compiler_fixup(linker, ld_args) + + self.spawn(linker + ld_args) + except DistutilsExecError as msg: + raise LinkError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + # -- Miscellaneous methods ----------------------------------------- + # These are all used by the 'gen_lib_options() function, in + # ccompiler.py. + + def library_dir_option(self, dir): + return "-L" + dir + + def _is_gcc(self): + cc_var = sysconfig.get_config_var("CC") + compiler = os.path.basename(shlex.split(cc_var)[0]) + return "gcc" in compiler or "g++" in compiler + + def runtime_library_dir_option(self, dir): + # XXX Hackish, at the very least. See Python bug #445902: + # http://sourceforge.net/tracker/index.php + # ?func=detail&aid=445902&group_id=5470&atid=105470 + # Linkers on different platforms need different options to + # specify that directories need to be added to the list of + # directories searched for dependencies when a dynamic library + # is sought. GCC on GNU systems (Linux, FreeBSD, ...) has to + # be told to pass the -R option through to the linker, whereas + # other compilers and gcc on other systems just know this. + # Other compilers may need something slightly different. At + # this time, there's no way to determine this information from + # the configuration data stored in the Python installation, so + # we use this hack. + if sys.platform[:6] == "darwin": + from distutils.util import get_macosx_target_ver, split_version + + macosx_target_ver = get_macosx_target_ver() + if macosx_target_ver and split_version(macosx_target_ver) >= [10, 5]: + return "-Wl,-rpath," + dir + else: # no support for -rpath on earlier macOS versions + return "-L" + dir + elif sys.platform[:7] == "freebsd": + return "-Wl,-rpath=" + dir + elif sys.platform[:5] == "hp-ux": + return [ + "-Wl,+s" if self._is_gcc() else "+s", + "-L" + dir, + ] + + # For all compilers, `-Wl` is the presumed way to + # pass a compiler option to the linker and `-R` is + # the way to pass an RPATH. + if sysconfig.get_config_var("GNULD") == "yes": + # GNU ld needs an extra option to get a RUNPATH + # instead of just an RPATH. + return "-Wl,--enable-new-dtags,-R" + dir + else: + return "-Wl,-R" + dir + + def library_option(self, lib): + return "-l" + lib + + @staticmethod + def _library_root(dir): + """ + macOS users can specify an alternate SDK using'-isysroot'. + Calculate the SDK root if it is specified. + + Note that, as of Xcode 7, Apple SDKs may contain textual stub + libraries with .tbd extensions rather than the normal .dylib + shared libraries installed in /. The Apple compiler tool + chain handles this transparently but it can cause problems + for programs that are being built with an SDK and searching + for specific libraries. Callers of find_library_file need to + keep in mind that the base filename of the returned SDK library + file might have a different extension from that of the library + file installed on the running system, for example: + /Applications/Xcode.app/Contents/Developer/Platforms/ + MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/ + usr/lib/libedit.tbd + vs + /usr/lib/libedit.dylib + """ + cflags = sysconfig.get_config_var('CFLAGS') + match = re.search(r'-isysroot\s*(\S+)', cflags) + + apply_root = ( + sys.platform == 'darwin' + and match + and ( + dir.startswith('/System/') + or (dir.startswith('/usr/') and not dir.startswith('/usr/local/')) + ) + ) + + return os.path.join(match.group(1), dir[1:]) if apply_root else dir + + def find_library_file(self, dirs, lib, debug=0): + r""" + Second-guess the linker with not much hard + data to go on: GCC seems to prefer the shared library, so + assume that *all* Unix C compilers do, + ignoring even GCC's "-static" option. + + >>> compiler = UnixCCompiler() + >>> compiler._library_root = lambda dir: dir + >>> monkeypatch = getfixture('monkeypatch') + >>> monkeypatch.setattr(os.path, 'exists', lambda d: 'existing' in d) + >>> dirs = ('/foo/bar/missing', '/foo/bar/existing') + >>> compiler.find_library_file(dirs, 'abc').replace('\\', '/') + '/foo/bar/existing/libabc.dylib' + >>> compiler.find_library_file(reversed(dirs), 'abc').replace('\\', '/') + '/foo/bar/existing/libabc.dylib' + >>> monkeypatch.setattr(os.path, 'exists', + ... lambda d: 'existing' in d and '.a' in d) + >>> compiler.find_library_file(dirs, 'abc').replace('\\', '/') + '/foo/bar/existing/libabc.a' + >>> compiler.find_library_file(reversed(dirs), 'abc').replace('\\', '/') + '/foo/bar/existing/libabc.a' + """ + lib_names = ( + self.library_filename(lib, lib_type=type) + for type in 'dylib xcode_stub shared static'.split() + ) + + roots = map(self._library_root, dirs) + + searched = ( + os.path.join(root, lib_name) + for root, lib_name in itertools.product(roots, lib_names) + ) + + found = filter(os.path.exists, searched) + + # Return None if it could not be found in any dir. + return next(found, None) diff --git a/libs/common/setuptools/_distutils/util.py b/libs/common/setuptools/_distutils/util.py new file mode 100644 index 00000000..8668b436 --- /dev/null +++ b/libs/common/setuptools/_distutils/util.py @@ -0,0 +1,513 @@ +"""distutils.util + +Miscellaneous utility functions -- anything that doesn't fit into +one of the other *util.py modules. +""" + +import importlib.util +import os +import re +import string +import subprocess +import sys +import sysconfig +import functools + +from .errors import DistutilsPlatformError, DistutilsByteCompileError +from .dep_util import newer +from .spawn import spawn +from ._log import log + + +def get_host_platform(): + """ + Return a string that identifies the current platform. Use this + function to distinguish platform-specific build directories and + platform-specific built distributions. + """ + + # This function initially exposed platforms as defined in Python 3.9 + # even with older Python versions when distutils was split out. + # Now it delegates to stdlib sysconfig, but maintains compatibility. + + if sys.version_info < (3, 8): + if os.name == 'nt': + if '(arm)' in sys.version.lower(): + return 'win-arm32' + if '(arm64)' in sys.version.lower(): + return 'win-arm64' + + if sys.version_info < (3, 9): + if os.name == "posix" and hasattr(os, 'uname'): + osname, host, release, version, machine = os.uname() + if osname[:3] == "aix": + from .py38compat import aix_platform + + return aix_platform(osname, version, release) + + return sysconfig.get_platform() + + +def get_platform(): + if os.name == 'nt': + TARGET_TO_PLAT = { + 'x86': 'win32', + 'x64': 'win-amd64', + 'arm': 'win-arm32', + 'arm64': 'win-arm64', + } + target = os.environ.get('VSCMD_ARG_TGT_ARCH') + return TARGET_TO_PLAT.get(target) or get_host_platform() + return get_host_platform() + + +if sys.platform == 'darwin': + _syscfg_macosx_ver = None # cache the version pulled from sysconfig +MACOSX_VERSION_VAR = 'MACOSX_DEPLOYMENT_TARGET' + + +def _clear_cached_macosx_ver(): + """For testing only. Do not call.""" + global _syscfg_macosx_ver + _syscfg_macosx_ver = None + + +def get_macosx_target_ver_from_syscfg(): + """Get the version of macOS latched in the Python interpreter configuration. + Returns the version as a string or None if can't obtain one. Cached.""" + global _syscfg_macosx_ver + if _syscfg_macosx_ver is None: + from distutils import sysconfig + + ver = sysconfig.get_config_var(MACOSX_VERSION_VAR) or '' + if ver: + _syscfg_macosx_ver = ver + return _syscfg_macosx_ver + + +def get_macosx_target_ver(): + """Return the version of macOS for which we are building. + + The target version defaults to the version in sysconfig latched at time + the Python interpreter was built, unless overridden by an environment + variable. If neither source has a value, then None is returned""" + + syscfg_ver = get_macosx_target_ver_from_syscfg() + env_ver = os.environ.get(MACOSX_VERSION_VAR) + + if env_ver: + # Validate overridden version against sysconfig version, if have both. + # Ensure that the deployment target of the build process is not less + # than 10.3 if the interpreter was built for 10.3 or later. This + # ensures extension modules are built with correct compatibility + # values, specifically LDSHARED which can use + # '-undefined dynamic_lookup' which only works on >= 10.3. + if ( + syscfg_ver + and split_version(syscfg_ver) >= [10, 3] + and split_version(env_ver) < [10, 3] + ): + my_msg = ( + '$' + MACOSX_VERSION_VAR + ' mismatch: ' + 'now "%s" but "%s" during configure; ' + 'must use 10.3 or later' % (env_ver, syscfg_ver) + ) + raise DistutilsPlatformError(my_msg) + return env_ver + return syscfg_ver + + +def split_version(s): + """Convert a dot-separated string into a list of numbers for comparisons""" + return [int(n) for n in s.split('.')] + + +def convert_path(pathname): + """Return 'pathname' as a name that will work on the native filesystem, + i.e. split it on '/' and put it back together again using the current + directory separator. Needed because filenames in the setup script are + always supplied in Unix style, and have to be converted to the local + convention before we can actually use them in the filesystem. Raises + ValueError on non-Unix-ish systems if 'pathname' either starts or + ends with a slash. + """ + if os.sep == '/': + return pathname + if not pathname: + return pathname + if pathname[0] == '/': + raise ValueError("path '%s' cannot be absolute" % pathname) + if pathname[-1] == '/': + raise ValueError("path '%s' cannot end with '/'" % pathname) + + paths = pathname.split('/') + while '.' in paths: + paths.remove('.') + if not paths: + return os.curdir + return os.path.join(*paths) + + +# convert_path () + + +def change_root(new_root, pathname): + """Return 'pathname' with 'new_root' prepended. If 'pathname' is + relative, this is equivalent to "os.path.join(new_root,pathname)". + Otherwise, it requires making 'pathname' relative and then joining the + two, which is tricky on DOS/Windows and Mac OS. + """ + if os.name == 'posix': + if not os.path.isabs(pathname): + return os.path.join(new_root, pathname) + else: + return os.path.join(new_root, pathname[1:]) + + elif os.name == 'nt': + (drive, path) = os.path.splitdrive(pathname) + if path[0] == '\\': + path = path[1:] + return os.path.join(new_root, path) + + raise DistutilsPlatformError(f"nothing known about platform '{os.name}'") + + +@functools.lru_cache() +def check_environ(): + """Ensure that 'os.environ' has all the environment variables we + guarantee that users can use in config files, command-line options, + etc. Currently this includes: + HOME - user's home directory (Unix only) + PLAT - description of the current platform, including hardware + and OS (see 'get_platform()') + """ + if os.name == 'posix' and 'HOME' not in os.environ: + try: + import pwd + + os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] + except (ImportError, KeyError): + # bpo-10496: if the current user identifier doesn't exist in the + # password database, do nothing + pass + + if 'PLAT' not in os.environ: + os.environ['PLAT'] = get_platform() + + +def subst_vars(s, local_vars): + """ + Perform variable substitution on 'string'. + Variables are indicated by format-style braces ("{var}"). + Variable is substituted by the value found in the 'local_vars' + dictionary or in 'os.environ' if it's not in 'local_vars'. + 'os.environ' is first checked/augmented to guarantee that it contains + certain values: see 'check_environ()'. Raise ValueError for any + variables not found in either 'local_vars' or 'os.environ'. + """ + check_environ() + lookup = dict(os.environ) + lookup.update((name, str(value)) for name, value in local_vars.items()) + try: + return _subst_compat(s).format_map(lookup) + except KeyError as var: + raise ValueError(f"invalid variable {var}") + + +def _subst_compat(s): + """ + Replace shell/Perl-style variable substitution with + format-style. For compatibility. + """ + + def _subst(match): + return f'{{{match.group(1)}}}' + + repl = re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) + if repl != s: + import warnings + + warnings.warn( + "shell/Perl-style substitions are deprecated", + DeprecationWarning, + ) + return repl + + +def grok_environment_error(exc, prefix="error: "): + # Function kept for backward compatibility. + # Used to try clever things with EnvironmentErrors, + # but nowadays str(exception) produces good messages. + return prefix + str(exc) + + +# Needed by 'split_quoted()' +_wordchars_re = _squote_re = _dquote_re = None + + +def _init_regex(): + global _wordchars_re, _squote_re, _dquote_re + _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) + _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") + _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') + + +def split_quoted(s): + """Split a string up according to Unix shell-like rules for quotes and + backslashes. In short: words are delimited by spaces, as long as those + spaces are not escaped by a backslash, or inside a quoted string. + Single and double quotes are equivalent, and the quote characters can + be backslash-escaped. The backslash is stripped from any two-character + escape sequence, leaving only the escaped character. The quote + characters are stripped from any quoted string. Returns a list of + words. + """ + + # This is a nice algorithm for splitting up a single string, since it + # doesn't require character-by-character examination. It was a little + # bit of a brain-bender to get it working right, though... + if _wordchars_re is None: + _init_regex() + + s = s.strip() + words = [] + pos = 0 + + while s: + m = _wordchars_re.match(s, pos) + end = m.end() + if end == len(s): + words.append(s[:end]) + break + + if s[end] in string.whitespace: + # unescaped, unquoted whitespace: now + # we definitely have a word delimiter + words.append(s[:end]) + s = s[end:].lstrip() + pos = 0 + + elif s[end] == '\\': + # preserve whatever is being escaped; + # will become part of the current word + s = s[:end] + s[end + 1 :] + pos = end + 1 + + else: + if s[end] == "'": # slurp singly-quoted string + m = _squote_re.match(s, end) + elif s[end] == '"': # slurp doubly-quoted string + m = _dquote_re.match(s, end) + else: + raise RuntimeError("this can't happen (bad char '%c')" % s[end]) + + if m is None: + raise ValueError("bad string (mismatched %s quotes?)" % s[end]) + + (beg, end) = m.span() + s = s[:beg] + s[beg + 1 : end - 1] + s[end:] + pos = m.end() - 2 + + if pos >= len(s): + words.append(s) + break + + return words + + +# split_quoted () + + +def execute(func, args, msg=None, verbose=0, dry_run=0): + """Perform some action that affects the outside world (eg. by + writing to the filesystem). Such actions are special because they + are disabled by the 'dry_run' flag. This method takes care of all + that bureaucracy for you; all you have to do is supply the + function to call and an argument tuple for it (to embody the + "external action" being performed), and an optional message to + print. + """ + if msg is None: + msg = "{}{!r}".format(func.__name__, args) + if msg[-2:] == ',)': # correct for singleton tuple + msg = msg[0:-2] + ')' + + log.info(msg) + if not dry_run: + func(*args) + + +def strtobool(val): + """Convert a string representation of truth to true (1) or false (0). + + True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values + are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if + 'val' is anything else. + """ + val = val.lower() + if val in ('y', 'yes', 't', 'true', 'on', '1'): + return 1 + elif val in ('n', 'no', 'f', 'false', 'off', '0'): + return 0 + else: + raise ValueError("invalid truth value {!r}".format(val)) + + +def byte_compile( # noqa: C901 + py_files, + optimize=0, + force=0, + prefix=None, + base_dir=None, + verbose=1, + dry_run=0, + direct=None, +): + """Byte-compile a collection of Python source files to .pyc + files in a __pycache__ subdirectory. 'py_files' is a list + of files to compile; any files that don't end in ".py" are silently + skipped. 'optimize' must be one of the following: + 0 - don't optimize + 1 - normal optimization (like "python -O") + 2 - extra optimization (like "python -OO") + If 'force' is true, all files are recompiled regardless of + timestamps. + + The source filename encoded in each bytecode file defaults to the + filenames listed in 'py_files'; you can modify these with 'prefix' and + 'basedir'. 'prefix' is a string that will be stripped off of each + source filename, and 'base_dir' is a directory name that will be + prepended (after 'prefix' is stripped). You can supply either or both + (or neither) of 'prefix' and 'base_dir', as you wish. + + If 'dry_run' is true, doesn't actually do anything that would + affect the filesystem. + + Byte-compilation is either done directly in this interpreter process + with the standard py_compile module, or indirectly by writing a + temporary script and executing it. Normally, you should let + 'byte_compile()' figure out to use direct compilation or not (see + the source for details). The 'direct' flag is used by the script + generated in indirect mode; unless you know what you're doing, leave + it set to None. + """ + + # nothing is done if sys.dont_write_bytecode is True + if sys.dont_write_bytecode: + raise DistutilsByteCompileError('byte-compiling is disabled.') + + # First, if the caller didn't force us into direct or indirect mode, + # figure out which mode we should be in. We take a conservative + # approach: choose direct mode *only* if the current interpreter is + # in debug mode and optimize is 0. If we're not in debug mode (-O + # or -OO), we don't know which level of optimization this + # interpreter is running with, so we can't do direct + # byte-compilation and be certain that it's the right thing. Thus, + # always compile indirectly if the current interpreter is in either + # optimize mode, or if either optimization level was requested by + # the caller. + if direct is None: + direct = __debug__ and optimize == 0 + + # "Indirect" byte-compilation: write a temporary script and then + # run it with the appropriate flags. + if not direct: + try: + from tempfile import mkstemp + + (script_fd, script_name) = mkstemp(".py") + except ImportError: + from tempfile import mktemp + + (script_fd, script_name) = None, mktemp(".py") + log.info("writing byte-compilation script '%s'", script_name) + if not dry_run: + if script_fd is not None: + script = os.fdopen(script_fd, "w") + else: + script = open(script_name, "w") + + with script: + script.write( + """\ +from distutils.util import byte_compile +files = [ +""" + ) + + # XXX would be nice to write absolute filenames, just for + # safety's sake (script should be more robust in the face of + # chdir'ing before running it). But this requires abspath'ing + # 'prefix' as well, and that breaks the hack in build_lib's + # 'byte_compile()' method that carefully tacks on a trailing + # slash (os.sep really) to make sure the prefix here is "just + # right". This whole prefix business is rather delicate -- the + # problem is that it's really a directory, but I'm treating it + # as a dumb string, so trailing slashes and so forth matter. + + script.write(",\n".join(map(repr, py_files)) + "]\n") + script.write( + """ +byte_compile(files, optimize=%r, force=%r, + prefix=%r, base_dir=%r, + verbose=%r, dry_run=0, + direct=1) +""" + % (optimize, force, prefix, base_dir, verbose) + ) + + cmd = [sys.executable] + cmd.extend(subprocess._optim_args_from_interpreter_flags()) + cmd.append(script_name) + spawn(cmd, dry_run=dry_run) + execute(os.remove, (script_name,), "removing %s" % script_name, dry_run=dry_run) + + # "Direct" byte-compilation: use the py_compile module to compile + # right here, right now. Note that the script generated in indirect + # mode simply calls 'byte_compile()' in direct mode, a weird sort of + # cross-process recursion. Hey, it works! + else: + from py_compile import compile + + for file in py_files: + if file[-3:] != ".py": + # This lets us be lazy and not filter filenames in + # the "install_lib" command. + continue + + # Terminology from the py_compile module: + # cfile - byte-compiled file + # dfile - purported source filename (same as 'file' by default) + if optimize >= 0: + opt = '' if optimize == 0 else optimize + cfile = importlib.util.cache_from_source(file, optimization=opt) + else: + cfile = importlib.util.cache_from_source(file) + dfile = file + if prefix: + if file[: len(prefix)] != prefix: + raise ValueError( + "invalid prefix: filename %r doesn't start with %r" + % (file, prefix) + ) + dfile = dfile[len(prefix) :] + if base_dir: + dfile = os.path.join(base_dir, dfile) + + cfile_base = os.path.basename(cfile) + if direct: + if force or newer(file, cfile): + log.info("byte-compiling %s to %s", file, cfile_base) + if not dry_run: + compile(file, cfile, dfile) + else: + log.debug("skipping byte-compilation of %s to %s", file, cfile_base) + + +def rfc822_escape(header): + """Return a version of the string escaped for inclusion in an + RFC-822 header, by ensuring there are 8 spaces space after each newline. + """ + lines = header.split('\n') + sep = '\n' + 8 * ' ' + return sep.join(lines) diff --git a/libs/common/setuptools/_distutils/version.py b/libs/common/setuptools/_distutils/version.py new file mode 100644 index 00000000..e29e2657 --- /dev/null +++ b/libs/common/setuptools/_distutils/version.py @@ -0,0 +1,358 @@ +# +# distutils/version.py +# +# Implements multiple version numbering conventions for the +# Python Module Distribution Utilities. +# +# $Id$ +# + +"""Provides classes to represent module version numbers (one class for +each style of version numbering). There are currently two such classes +implemented: StrictVersion and LooseVersion. + +Every version number class implements the following interface: + * the 'parse' method takes a string and parses it to some internal + representation; if the string is an invalid version number, + 'parse' raises a ValueError exception + * the class constructor takes an optional string argument which, + if supplied, is passed to 'parse' + * __str__ reconstructs the string that was passed to 'parse' (or + an equivalent string -- ie. one that will generate an equivalent + version number instance) + * __repr__ generates Python code to recreate the version number instance + * _cmp compares the current instance with either another instance + of the same class or a string (which will be parsed to an instance + of the same class, thus must follow the same rules) +""" + +import re +import warnings +import contextlib + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.filterwarnings( + action='default', + category=DeprecationWarning, + message="distutils Version classes are deprecated.", + ) + yield ctx + + +class Version: + """Abstract base class for version numbering classes. Just provides + constructor (__init__) and reproducer (__repr__), because those + seem to be the same for all version numbering classes; and route + rich comparisons to _cmp. + """ + + def __init__(self, vstring=None): + if vstring: + self.parse(vstring) + warnings.warn( + "distutils Version classes are deprecated. " + "Use packaging.version instead.", + DeprecationWarning, + stacklevel=2, + ) + + def __repr__(self): + return "{} ('{}')".format(self.__class__.__name__, str(self)) + + def __eq__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c == 0 + + def __lt__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c < 0 + + def __le__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c <= 0 + + def __gt__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c > 0 + + def __ge__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c >= 0 + + +# Interface for version-number classes -- must be implemented +# by the following classes (the concrete ones -- Version should +# be treated as an abstract class). +# __init__ (string) - create and take same action as 'parse' +# (string parameter is optional) +# parse (string) - convert a string representation to whatever +# internal representation is appropriate for +# this style of version numbering +# __str__ (self) - convert back to a string; should be very similar +# (if not identical to) the string supplied to parse +# __repr__ (self) - generate Python code to recreate +# the instance +# _cmp (self, other) - compare two version numbers ('other' may +# be an unparsed version string, or another +# instance of your version class) + + +class StrictVersion(Version): + + """Version numbering for anal retentives and software idealists. + Implements the standard interface for version number classes as + described above. A version number consists of two or three + dot-separated numeric components, with an optional "pre-release" tag + on the end. The pre-release tag consists of the letter 'a' or 'b' + followed by a number. If the numeric components of two version + numbers are equal, then one with a pre-release tag will always + be deemed earlier (lesser) than one without. + + The following are valid version numbers (shown in the order that + would be obtained by sorting according to the supplied cmp function): + + 0.4 0.4.0 (these two are equivalent) + 0.4.1 + 0.5a1 + 0.5b3 + 0.5 + 0.9.6 + 1.0 + 1.0.4a3 + 1.0.4b1 + 1.0.4 + + The following are examples of invalid version numbers: + + 1 + 2.7.2.2 + 1.3.a4 + 1.3pl1 + 1.3c4 + + The rationale for this version numbering system will be explained + in the distutils documentation. + """ + + version_re = re.compile( + r'^(\d+) \. (\d+) (\. (\d+))? ([ab](\d+))?$', re.VERBOSE | re.ASCII + ) + + def parse(self, vstring): + match = self.version_re.match(vstring) + if not match: + raise ValueError("invalid version number '%s'" % vstring) + + (major, minor, patch, prerelease, prerelease_num) = match.group(1, 2, 4, 5, 6) + + if patch: + self.version = tuple(map(int, [major, minor, patch])) + else: + self.version = tuple(map(int, [major, minor])) + (0,) + + if prerelease: + self.prerelease = (prerelease[0], int(prerelease_num)) + else: + self.prerelease = None + + def __str__(self): + + if self.version[2] == 0: + vstring = '.'.join(map(str, self.version[0:2])) + else: + vstring = '.'.join(map(str, self.version)) + + if self.prerelease: + vstring = vstring + self.prerelease[0] + str(self.prerelease[1]) + + return vstring + + def _cmp(self, other): # noqa: C901 + if isinstance(other, str): + with suppress_known_deprecation(): + other = StrictVersion(other) + elif not isinstance(other, StrictVersion): + return NotImplemented + + if self.version != other.version: + # numeric versions don't match + # prerelease stuff doesn't matter + if self.version < other.version: + return -1 + else: + return 1 + + # have to compare prerelease + # case 1: neither has prerelease; they're equal + # case 2: self has prerelease, other doesn't; other is greater + # case 3: self doesn't have prerelease, other does: self is greater + # case 4: both have prerelease: must compare them! + + if not self.prerelease and not other.prerelease: + return 0 + elif self.prerelease and not other.prerelease: + return -1 + elif not self.prerelease and other.prerelease: + return 1 + elif self.prerelease and other.prerelease: + if self.prerelease == other.prerelease: + return 0 + elif self.prerelease < other.prerelease: + return -1 + else: + return 1 + else: + assert False, "never get here" + + +# end class StrictVersion + + +# The rules according to Greg Stein: +# 1) a version number has 1 or more numbers separated by a period or by +# sequences of letters. If only periods, then these are compared +# left-to-right to determine an ordering. +# 2) sequences of letters are part of the tuple for comparison and are +# compared lexicographically +# 3) recognize the numeric components may have leading zeroes +# +# The LooseVersion class below implements these rules: a version number +# string is split up into a tuple of integer and string components, and +# comparison is a simple tuple comparison. This means that version +# numbers behave in a predictable and obvious way, but a way that might +# not necessarily be how people *want* version numbers to behave. There +# wouldn't be a problem if people could stick to purely numeric version +# numbers: just split on period and compare the numbers as tuples. +# However, people insist on putting letters into their version numbers; +# the most common purpose seems to be: +# - indicating a "pre-release" version +# ('alpha', 'beta', 'a', 'b', 'pre', 'p') +# - indicating a post-release patch ('p', 'pl', 'patch') +# but of course this can't cover all version number schemes, and there's +# no way to know what a programmer means without asking him. +# +# The problem is what to do with letters (and other non-numeric +# characters) in a version number. The current implementation does the +# obvious and predictable thing: keep them as strings and compare +# lexically within a tuple comparison. This has the desired effect if +# an appended letter sequence implies something "post-release": +# eg. "0.99" < "0.99pl14" < "1.0", and "5.001" < "5.001m" < "5.002". +# +# However, if letters in a version number imply a pre-release version, +# the "obvious" thing isn't correct. Eg. you would expect that +# "1.5.1" < "1.5.2a2" < "1.5.2", but under the tuple/lexical comparison +# implemented here, this just isn't so. +# +# Two possible solutions come to mind. The first is to tie the +# comparison algorithm to a particular set of semantic rules, as has +# been done in the StrictVersion class above. This works great as long +# as everyone can go along with bondage and discipline. Hopefully a +# (large) subset of Python module programmers will agree that the +# particular flavour of bondage and discipline provided by StrictVersion +# provides enough benefit to be worth using, and will submit their +# version numbering scheme to its domination. The free-thinking +# anarchists in the lot will never give in, though, and something needs +# to be done to accommodate them. +# +# Perhaps a "moderately strict" version class could be implemented that +# lets almost anything slide (syntactically), and makes some heuristic +# assumptions about non-digits in version number strings. This could +# sink into special-case-hell, though; if I was as talented and +# idiosyncratic as Larry Wall, I'd go ahead and implement a class that +# somehow knows that "1.2.1" < "1.2.2a2" < "1.2.2" < "1.2.2pl3", and is +# just as happy dealing with things like "2g6" and "1.13++". I don't +# think I'm smart enough to do it right though. +# +# In any case, I've coded the test suite for this module (see +# ../test/test_version.py) specifically to fail on things like comparing +# "1.2a2" and "1.2". That's not because the *code* is doing anything +# wrong, it's because the simple, obvious design doesn't match my +# complicated, hairy expectations for real-world version numbers. It +# would be a snap to fix the test suite to say, "Yep, LooseVersion does +# the Right Thing" (ie. the code matches the conception). But I'd rather +# have a conception that matches common notions about version numbers. + + +class LooseVersion(Version): + + """Version numbering for anarchists and software realists. + Implements the standard interface for version number classes as + described above. A version number consists of a series of numbers, + separated by either periods or strings of letters. When comparing + version numbers, the numeric components will be compared + numerically, and the alphabetic components lexically. The following + are all valid version numbers, in no particular order: + + 1.5.1 + 1.5.2b2 + 161 + 3.10a + 8.02 + 3.4j + 1996.07.12 + 3.2.pl0 + 3.1.1.6 + 2g6 + 11g + 0.960923 + 2.2beta29 + 1.13++ + 5.5.kw + 2.0b1pl0 + + In fact, there is no such thing as an invalid version number under + this scheme; the rules for comparison are simple and predictable, + but may not always give the results you want (for some definition + of "want"). + """ + + component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE) + + def parse(self, vstring): + # I've given up on thinking I can reconstruct the version string + # from the parsed tuple -- so I just store the string here for + # use by __str__ + self.vstring = vstring + components = [x for x in self.component_re.split(vstring) if x and x != '.'] + for i, obj in enumerate(components): + try: + components[i] = int(obj) + except ValueError: + pass + + self.version = components + + def __str__(self): + return self.vstring + + def __repr__(self): + return "LooseVersion ('%s')" % str(self) + + def _cmp(self, other): + if isinstance(other, str): + other = LooseVersion(other) + elif not isinstance(other, LooseVersion): + return NotImplemented + + if self.version == other.version: + return 0 + if self.version < other.version: + return -1 + if self.version > other.version: + return 1 + + +# end class LooseVersion diff --git a/libs/common/setuptools/_distutils/versionpredicate.py b/libs/common/setuptools/_distutils/versionpredicate.py new file mode 100644 index 00000000..d6c0c007 --- /dev/null +++ b/libs/common/setuptools/_distutils/versionpredicate.py @@ -0,0 +1,175 @@ +"""Module for parsing and testing package version predicate strings. +""" +import re +from . import version +import operator + + +re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)", re.ASCII) +# (package) (rest) + +re_paren = re.compile(r"^\s*\((.*)\)\s*$") # (list) inside of parentheses +re_splitComparison = re.compile(r"^\s*(<=|>=|<|>|!=|==)\s*([^\s,]+)\s*$") +# (comp) (version) + + +def splitUp(pred): + """Parse a single version comparison. + + Return (comparison string, StrictVersion) + """ + res = re_splitComparison.match(pred) + if not res: + raise ValueError("bad package restriction syntax: %r" % pred) + comp, verStr = res.groups() + with version.suppress_known_deprecation(): + other = version.StrictVersion(verStr) + return (comp, other) + + +compmap = { + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + ">": operator.gt, + ">=": operator.ge, + "!=": operator.ne, +} + + +class VersionPredicate: + """Parse and test package version predicates. + + >>> v = VersionPredicate('pyepat.abc (>1.0, <3333.3a1, !=1555.1b3)') + + The `name` attribute provides the full dotted name that is given:: + + >>> v.name + 'pyepat.abc' + + The str() of a `VersionPredicate` provides a normalized + human-readable version of the expression:: + + >>> print(v) + pyepat.abc (> 1.0, < 3333.3a1, != 1555.1b3) + + The `satisfied_by()` method can be used to determine with a given + version number is included in the set described by the version + restrictions:: + + >>> v.satisfied_by('1.1') + True + >>> v.satisfied_by('1.4') + True + >>> v.satisfied_by('1.0') + False + >>> v.satisfied_by('4444.4') + False + >>> v.satisfied_by('1555.1b3') + False + + `VersionPredicate` is flexible in accepting extra whitespace:: + + >>> v = VersionPredicate(' pat( == 0.1 ) ') + >>> v.name + 'pat' + >>> v.satisfied_by('0.1') + True + >>> v.satisfied_by('0.2') + False + + If any version numbers passed in do not conform to the + restrictions of `StrictVersion`, a `ValueError` is raised:: + + >>> v = VersionPredicate('p1.p2.p3.p4(>=1.0, <=1.3a1, !=1.2zb3)') + Traceback (most recent call last): + ... + ValueError: invalid version number '1.2zb3' + + It the module or package name given does not conform to what's + allowed as a legal module or package name, `ValueError` is + raised:: + + >>> v = VersionPredicate('foo-bar') + Traceback (most recent call last): + ... + ValueError: expected parenthesized list: '-bar' + + >>> v = VersionPredicate('foo bar (12.21)') + Traceback (most recent call last): + ... + ValueError: expected parenthesized list: 'bar (12.21)' + + """ + + def __init__(self, versionPredicateStr): + """Parse a version predicate string.""" + # Fields: + # name: package name + # pred: list of (comparison string, StrictVersion) + + versionPredicateStr = versionPredicateStr.strip() + if not versionPredicateStr: + raise ValueError("empty package restriction") + match = re_validPackage.match(versionPredicateStr) + if not match: + raise ValueError("bad package name in %r" % versionPredicateStr) + self.name, paren = match.groups() + paren = paren.strip() + if paren: + match = re_paren.match(paren) + if not match: + raise ValueError("expected parenthesized list: %r" % paren) + str = match.groups()[0] + self.pred = [splitUp(aPred) for aPred in str.split(",")] + if not self.pred: + raise ValueError("empty parenthesized list in %r" % versionPredicateStr) + else: + self.pred = [] + + def __str__(self): + if self.pred: + seq = [cond + " " + str(ver) for cond, ver in self.pred] + return self.name + " (" + ", ".join(seq) + ")" + else: + return self.name + + def satisfied_by(self, version): + """True if version is compatible with all the predicates in self. + The parameter version must be acceptable to the StrictVersion + constructor. It may be either a string or StrictVersion. + """ + for cond, ver in self.pred: + if not compmap[cond](version, ver): + return False + return True + + +_provision_rx = None + + +def split_provision(value): + """Return the name and optional version number of a provision. + + The version number, if given, will be returned as a `StrictVersion` + instance, otherwise it will be `None`. + + >>> split_provision('mypkg') + ('mypkg', None) + >>> split_provision(' mypkg( 1.2 ) ') + ('mypkg', StrictVersion ('1.2')) + """ + global _provision_rx + if _provision_rx is None: + _provision_rx = re.compile( + r"([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$", re.ASCII + ) + value = value.strip() + m = _provision_rx.match(value) + if not m: + raise ValueError("illegal provides specification: %r" % value) + ver = m.group(2) or None + if ver: + with version.suppress_known_deprecation(): + ver = version.StrictVersion(ver) + return m.group(1), ver diff --git a/libs/common/setuptools/_entry_points.py b/libs/common/setuptools/_entry_points.py new file mode 100644 index 00000000..a2346342 --- /dev/null +++ b/libs/common/setuptools/_entry_points.py @@ -0,0 +1,94 @@ +import functools +import operator +import itertools + +from .errors import OptionError +from .extern.jaraco.text import yield_lines +from .extern.jaraco.functools import pass_none +from ._importlib import metadata +from ._itertools import ensure_unique +from .extern.more_itertools import consume + + +def ensure_valid(ep): + """ + Exercise one of the dynamic properties to trigger + the pattern match. + """ + try: + ep.extras + except AttributeError as ex: + msg = ( + f"Problems to parse {ep}.\nPlease ensure entry-point follows the spec: " + "https://packaging.python.org/en/latest/specifications/entry-points/" + ) + raise OptionError(msg) from ex + + +def load_group(value, group): + """ + Given a value of an entry point or series of entry points, + return each as an EntryPoint. + """ + # normalize to a single sequence of lines + lines = yield_lines(value) + text = f'[{group}]\n' + '\n'.join(lines) + return metadata.EntryPoints._from_text(text) + + +def by_group_and_name(ep): + return ep.group, ep.name + + +def validate(eps: metadata.EntryPoints): + """ + Ensure entry points are unique by group and name and validate each. + """ + consume(map(ensure_valid, ensure_unique(eps, key=by_group_and_name))) + return eps + + +@functools.singledispatch +def load(eps): + """ + Given a Distribution.entry_points, produce EntryPoints. + """ + groups = itertools.chain.from_iterable( + load_group(value, group) + for group, value in eps.items()) + return validate(metadata.EntryPoints(groups)) + + +@load.register(str) +def _(eps): + r""" + >>> ep, = load('[console_scripts]\nfoo=bar') + >>> ep.group + 'console_scripts' + >>> ep.name + 'foo' + >>> ep.value + 'bar' + """ + return validate(metadata.EntryPoints(metadata.EntryPoints._from_text(eps))) + + +load.register(type(None), lambda x: x) + + +@pass_none +def render(eps: metadata.EntryPoints): + by_group = operator.attrgetter('group') + groups = itertools.groupby(sorted(eps, key=by_group), by_group) + + return '\n'.join( + f'[{group}]\n{render_items(items)}\n' + for group, items in groups + ) + + +def render_items(eps): + return '\n'.join( + f'{ep.name} = {ep.value}' + for ep in sorted(eps) + ) diff --git a/libs/common/setuptools/_imp.py b/libs/common/setuptools/_imp.py index a3cce9b2..47efd792 100644 --- a/libs/common/setuptools/_imp.py +++ b/libs/common/setuptools/_imp.py @@ -17,9 +17,18 @@ C_BUILTIN = 6 PY_FROZEN = 7 +def find_spec(module, paths): + finder = ( + importlib.machinery.PathFinder().find_spec + if isinstance(paths, list) else + importlib.util.find_spec + ) + return finder(module, paths) + + def find_module(module, paths=None): """Just like 'imp.find_module()', but with package support""" - spec = importlib.util.find_spec(module, paths) + spec = find_spec(module, paths) if spec is None: raise ImportError("Can't find %s" % module) if not spec.has_location and hasattr(spec, 'submodule_search_locations'): @@ -32,12 +41,12 @@ def find_module(module, paths=None): spec.loader, importlib.machinery.FrozenImporter): kind = PY_FROZEN path = None # imp compabilty - suffix = mode = '' # imp compability + suffix = mode = '' # imp compatibility elif spec.origin == 'built-in' or static and issubclass( spec.loader, importlib.machinery.BuiltinImporter): kind = C_BUILTIN path = None # imp compabilty - suffix = mode = '' # imp compability + suffix = mode = '' # imp compatibility elif spec.has_location: path = spec.origin suffix = os.path.splitext(path)[1] @@ -60,14 +69,14 @@ def find_module(module, paths=None): def get_frozen_object(module, paths=None): - spec = importlib.util.find_spec(module, paths) + spec = find_spec(module, paths) if not spec: raise ImportError("Can't find %s" % module) return spec.loader.get_code(module) def get_module(module, paths, info): - spec = importlib.util.find_spec(module, paths) + spec = find_spec(module, paths) if not spec: raise ImportError("Can't find %s" % module) return module_from_spec(spec) diff --git a/libs/common/setuptools/_importlib.py b/libs/common/setuptools/_importlib.py new file mode 100644 index 00000000..819bf5d3 --- /dev/null +++ b/libs/common/setuptools/_importlib.py @@ -0,0 +1,47 @@ +import sys + + +def disable_importlib_metadata_finder(metadata): + """ + Ensure importlib_metadata doesn't provide older, incompatible + Distributions. + + Workaround for #3102. + """ + try: + import importlib_metadata + except ImportError: + return + except AttributeError: + import warnings + + msg = ( + "`importlib-metadata` version is incompatible with `setuptools`.\n" + "This problem is likely to be solved by installing an updated version of " + "`importlib-metadata`." + ) + warnings.warn(msg) # Ensure a descriptive message is shown. + raise # This exception can be suppressed by _distutils_hack + + if importlib_metadata is metadata: + return + to_remove = [ + ob + for ob in sys.meta_path + if isinstance(ob, importlib_metadata.MetadataPathFinder) + ] + for item in to_remove: + sys.meta_path.remove(item) + + +if sys.version_info < (3, 10): + from setuptools.extern import importlib_metadata as metadata + disable_importlib_metadata_finder(metadata) +else: + import importlib.metadata as metadata # noqa: F401 + + +if sys.version_info < (3, 9): + from setuptools.extern import importlib_resources as resources +else: + import importlib.resources as resources # noqa: F401 diff --git a/libs/common/setuptools/_itertools.py b/libs/common/setuptools/_itertools.py new file mode 100644 index 00000000..b8bf6d21 --- /dev/null +++ b/libs/common/setuptools/_itertools.py @@ -0,0 +1,23 @@ +from setuptools.extern.more_itertools import consume # noqa: F401 + + +# copied from jaraco.itertools 6.1 +def ensure_unique(iterable, key=lambda x: x): + """ + Wrap an iterable to raise a ValueError if non-unique values are encountered. + + >>> list(ensure_unique('abc')) + ['a', 'b', 'c'] + >>> consume(ensure_unique('abca')) + Traceback (most recent call last): + ... + ValueError: Duplicate element 'a' encountered. + """ + seen = set() + seen_add = seen.add + for element in iterable: + k = key(element) + if k in seen: + raise ValueError(f"Duplicate element {element!r} encountered.") + seen_add(k) + yield element diff --git a/libs/common/setuptools/_path.py b/libs/common/setuptools/_path.py new file mode 100644 index 00000000..3767523b --- /dev/null +++ b/libs/common/setuptools/_path.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +_Path = Union[str, os.PathLike] + + +def ensure_directory(path): + """Ensure that the parent directory of `path` exists""" + dirname = os.path.dirname(path) + os.makedirs(dirname, exist_ok=True) + + +def same_path(p1: _Path, p2: _Path) -> bool: + """Differs from os.path.samefile because it does not require paths to exist. + Purely string based (no comparison between i-nodes). + >>> same_path("a/b", "./a/b") + True + >>> same_path("a/b", "a/./b") + True + >>> same_path("a/b", "././a/b") + True + >>> same_path("a/b", "./a/b/c/..") + True + >>> same_path("a/b", "../a/b/c") + False + >>> same_path("a", "a/b") + False + """ + return os.path.normpath(p1) == os.path.normpath(p2) diff --git a/libs/common/setuptools/_reqs.py b/libs/common/setuptools/_reqs.py new file mode 100644 index 00000000..ca724174 --- /dev/null +++ b/libs/common/setuptools/_reqs.py @@ -0,0 +1,19 @@ +import setuptools.extern.jaraco.text as text + +from pkg_resources import Requirement + + +def parse_strings(strs): + """ + Yield requirement strings for each specification in `strs`. + + `strs` must be a string, or a (possibly-nested) iterable thereof. + """ + return text.join_continuation(map(text.drop_comment, text.yield_lines(strs))) + + +def parse(strs): + """ + Deprecated drop-in replacement for pkg_resources.parse_requirements. + """ + return map(Requirement, parse_strings(strs)) diff --git a/libs/common/setuptools/_vendor/importlib_metadata/__init__.py b/libs/common/setuptools/_vendor/importlib_metadata/__init__.py new file mode 100644 index 00000000..292e0c6d --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/__init__.py @@ -0,0 +1,1047 @@ +import os +import re +import abc +import csv +import sys +from .. import zipp +import email +import pathlib +import operator +import textwrap +import warnings +import functools +import itertools +import posixpath +import collections + +from . import _adapters, _meta +from ._collections import FreezableDefaultDict, Pair +from ._compat import ( + NullFinder, + install, + pypy_partial, +) +from ._functools import method_cache, pass_none +from ._itertools import always_iterable, unique_everseen +from ._meta import PackageMetadata, SimplePath + +from contextlib import suppress +from importlib import import_module +from importlib.abc import MetaPathFinder +from itertools import starmap +from typing import List, Mapping, Optional, Union + + +__all__ = [ + 'Distribution', + 'DistributionFinder', + 'PackageMetadata', + 'PackageNotFoundError', + 'distribution', + 'distributions', + 'entry_points', + 'files', + 'metadata', + 'packages_distributions', + 'requires', + 'version', +] + + +class PackageNotFoundError(ModuleNotFoundError): + """The package was not found.""" + + def __str__(self): + return f"No package metadata was found for {self.name}" + + @property + def name(self): + (name,) = self.args + return name + + +class Sectioned: + """ + A simple entry point config parser for performance + + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name + 'sec1' + >>> item.value + Pair(name='a', value='1') + >>> item = next(res) + >>> item.value + Pair(name='b', value='2') + >>> item = next(res) + >>> item.name + 'sec2' + >>> item.value + Pair(name='a', value='2') + >>> list(res) + [] + """ + + _sample = textwrap.dedent( + """ + [sec1] + # comments ignored + a = 1 + b = 2 + + [sec2] + a = 2 + """ + ).lstrip() + + @classmethod + def section_pairs(cls, text): + return ( + section._replace(value=Pair.parse(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None + ) + + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Pair(name, value) + + @staticmethod + def valid(line): + return line and not line.startswith('#') + + +class DeprecatedTuple: + """ + Provide subscript item access for backward compatibility. + + >>> recwarn = getfixture('recwarn') + >>> ep = EntryPoint(name='name', value='value', group='group') + >>> ep[:] + ('name', 'value', 'group') + >>> ep[0] + 'name' + >>> len(recwarn) + 1 + """ + + _warn = functools.partial( + warnings.warn, + "EntryPoint tuple interface is deprecated. Access members by name.", + DeprecationWarning, + stacklevel=pypy_partial(2), + ) + + def __getitem__(self, item): + self._warn() + return self._key()[item] + + +class EntryPoint(DeprecatedTuple): + """An entry point as defined by Python packaging conventions. + + See `the packaging docs on entry points + `_ + for more information. + """ + + pattern = re.compile( + r'(?P[\w.]+)\s*' + r'(:\s*(?P[\w.]+)\s*)?' + r'((?P\[.*\])\s*)?$' + ) + """ + A regular expression describing the syntax for an entry point, + which might look like: + + - module + - package.module + - package.module:attribute + - package.module:object.attribute + - package.module:attr [extra1, extra2] + + Other combinations are possible as well. + + The expression is lenient about whitespace around the ':', + following the attr, and following any extras. + """ + + dist: Optional['Distribution'] = None + + def __init__(self, name, value, group): + vars(self).update(name=name, value=value, group=group) + + def load(self): + """Load the entry point from its definition. If only a module + is indicated by the value, return that module. Otherwise, + return the named object. + """ + match = self.pattern.match(self.value) + module = import_module(match.group('module')) + attrs = filter(None, (match.group('attr') or '').split('.')) + return functools.reduce(getattr, attrs, module) + + @property + def module(self): + match = self.pattern.match(self.value) + return match.group('module') + + @property + def attr(self): + match = self.pattern.match(self.value) + return match.group('attr') + + @property + def extras(self): + match = self.pattern.match(self.value) + return list(re.finditer(r'\w+', match.group('extras') or '')) + + def _for(self, dist): + vars(self).update(dist=dist) + return self + + def __iter__(self): + """ + Supply iter so one may construct dicts of EntryPoints by name. + """ + msg = ( + "Construction of dict of EntryPoints is deprecated in " + "favor of EntryPoints." + ) + warnings.warn(msg, DeprecationWarning) + return iter((self.name, self)) + + def matches(self, **params): + attrs = (getattr(self, param) for param in params) + return all(map(operator.eq, params.values(), attrs)) + + def _key(self): + return self.name, self.value, self.group + + def __lt__(self, other): + return self._key() < other._key() + + def __eq__(self, other): + return self._key() == other._key() + + def __setattr__(self, name, value): + raise AttributeError("EntryPoint objects are immutable.") + + def __repr__(self): + return ( + f'EntryPoint(name={self.name!r}, value={self.value!r}, ' + f'group={self.group!r})' + ) + + def __hash__(self): + return hash(self._key()) + + +class DeprecatedList(list): + """ + Allow an otherwise immutable object to implement mutability + for compatibility. + + >>> recwarn = getfixture('recwarn') + >>> dl = DeprecatedList(range(3)) + >>> dl[0] = 1 + >>> dl.append(3) + >>> del dl[3] + >>> dl.reverse() + >>> dl.sort() + >>> dl.extend([4]) + >>> dl.pop(-1) + 4 + >>> dl.remove(1) + >>> dl += [5] + >>> dl + [6] + [1, 2, 5, 6] + >>> dl + (6,) + [1, 2, 5, 6] + >>> dl.insert(0, 0) + >>> dl + [0, 1, 2, 5] + >>> dl == [0, 1, 2, 5] + True + >>> dl == (0, 1, 2, 5) + True + >>> len(recwarn) + 1 + """ + + __slots__ = () + + _warn = functools.partial( + warnings.warn, + "EntryPoints list interface is deprecated. Cast to list if needed.", + DeprecationWarning, + stacklevel=pypy_partial(2), + ) + + def _wrap_deprecated_method(method_name: str): # type: ignore + def wrapped(self, *args, **kwargs): + self._warn() + return getattr(super(), method_name)(*args, **kwargs) + + return method_name, wrapped + + locals().update( + map( + _wrap_deprecated_method, + '__setitem__ __delitem__ append reverse extend pop remove ' + '__iadd__ insert sort'.split(), + ) + ) + + def __add__(self, other): + if not isinstance(other, tuple): + self._warn() + other = tuple(other) + return self.__class__(tuple(self) + other) + + def __eq__(self, other): + if not isinstance(other, tuple): + self._warn() + other = tuple(other) + + return tuple(self).__eq__(other) + + +class EntryPoints(DeprecatedList): + """ + An immutable collection of selectable EntryPoint objects. + """ + + __slots__ = () + + def __getitem__(self, name): # -> EntryPoint: + """ + Get the EntryPoint in self matching name. + """ + if isinstance(name, int): + warnings.warn( + "Accessing entry points by index is deprecated. " + "Cast to tuple if needed.", + DeprecationWarning, + stacklevel=2, + ) + return super().__getitem__(name) + try: + return next(iter(self.select(name=name))) + except StopIteration: + raise KeyError(name) + + def select(self, **params): + """ + Select entry points from self that match the + given parameters (typically group and/or name). + """ + return EntryPoints(ep for ep in self if ep.matches(**params)) + + @property + def names(self): + """ + Return the set of all names of all entry points. + """ + return {ep.name for ep in self} + + @property + def groups(self): + """ + Return the set of all groups of all entry points. + + For coverage while SelectableGroups is present. + >>> EntryPoints().groups + set() + """ + return {ep.group for ep in self} + + @classmethod + def _from_text_for(cls, text, dist): + return cls(ep._for(dist) for ep in cls._from_text(text)) + + @staticmethod + def _from_text(text): + return ( + EntryPoint(name=item.value.name, value=item.value.value, group=item.name) + for item in Sectioned.section_pairs(text or '') + ) + + +class Deprecated: + """ + Compatibility add-in for mapping to indicate that + mapping behavior is deprecated. + + >>> recwarn = getfixture('recwarn') + >>> class DeprecatedDict(Deprecated, dict): pass + >>> dd = DeprecatedDict(foo='bar') + >>> dd.get('baz', None) + >>> dd['foo'] + 'bar' + >>> list(dd) + ['foo'] + >>> list(dd.keys()) + ['foo'] + >>> 'foo' in dd + True + >>> list(dd.values()) + ['bar'] + >>> len(recwarn) + 1 + """ + + _warn = functools.partial( + warnings.warn, + "SelectableGroups dict interface is deprecated. Use select.", + DeprecationWarning, + stacklevel=pypy_partial(2), + ) + + def __getitem__(self, name): + self._warn() + return super().__getitem__(name) + + def get(self, name, default=None): + self._warn() + return super().get(name, default) + + def __iter__(self): + self._warn() + return super().__iter__() + + def __contains__(self, *args): + self._warn() + return super().__contains__(*args) + + def keys(self): + self._warn() + return super().keys() + + def values(self): + self._warn() + return super().values() + + +class SelectableGroups(Deprecated, dict): + """ + A backward- and forward-compatible result from + entry_points that fully implements the dict interface. + """ + + @classmethod + def load(cls, eps): + by_group = operator.attrgetter('group') + ordered = sorted(eps, key=by_group) + grouped = itertools.groupby(ordered, by_group) + return cls((group, EntryPoints(eps)) for group, eps in grouped) + + @property + def _all(self): + """ + Reconstruct a list of all entrypoints from the groups. + """ + groups = super(Deprecated, self).values() + return EntryPoints(itertools.chain.from_iterable(groups)) + + @property + def groups(self): + return self._all.groups + + @property + def names(self): + """ + for coverage: + >>> SelectableGroups().names + set() + """ + return self._all.names + + def select(self, **params): + if not params: + return self + return self._all.select(**params) + + +class PackagePath(pathlib.PurePosixPath): + """A reference to a path in a package""" + + def read_text(self, encoding='utf-8'): + with self.locate().open(encoding=encoding) as stream: + return stream.read() + + def read_binary(self): + with self.locate().open('rb') as stream: + return stream.read() + + def locate(self): + """Return a path-like object for this path""" + return self.dist.locate_file(self) + + +class FileHash: + def __init__(self, spec): + self.mode, _, self.value = spec.partition('=') + + def __repr__(self): + return f'' + + +class Distribution: + """A Python distribution package.""" + + @abc.abstractmethod + def read_text(self, filename): + """Attempt to load metadata file given by the name. + + :param filename: The name of the file in the distribution info. + :return: The text if found, otherwise None. + """ + + @abc.abstractmethod + def locate_file(self, path): + """ + Given a path to a file in this distribution, return a path + to it. + """ + + @classmethod + def from_name(cls, name): + """Return the Distribution for the given package name. + + :param name: The name of the distribution package to search for. + :return: The Distribution instance (or subclass thereof) for the named + package, if found. + :raises PackageNotFoundError: When the named package's distribution + metadata cannot be found. + """ + for resolver in cls._discover_resolvers(): + dists = resolver(DistributionFinder.Context(name=name)) + dist = next(iter(dists), None) + if dist is not None: + return dist + else: + raise PackageNotFoundError(name) + + @classmethod + def discover(cls, **kwargs): + """Return an iterable of Distribution objects for all packages. + + Pass a ``context`` or pass keyword arguments for constructing + a context. + + :context: A ``DistributionFinder.Context`` object. + :return: Iterable of Distribution objects for all packages. + """ + context = kwargs.pop('context', None) + if context and kwargs: + raise ValueError("cannot accept context and kwargs") + context = context or DistributionFinder.Context(**kwargs) + return itertools.chain.from_iterable( + resolver(context) for resolver in cls._discover_resolvers() + ) + + @staticmethod + def at(path): + """Return a Distribution for the indicated metadata path + + :param path: a string or path-like object + :return: a concrete Distribution instance for the path + """ + return PathDistribution(pathlib.Path(path)) + + @staticmethod + def _discover_resolvers(): + """Search the meta_path for resolvers.""" + declared = ( + getattr(finder, 'find_distributions', None) for finder in sys.meta_path + ) + return filter(None, declared) + + @property + def metadata(self) -> _meta.PackageMetadata: + """Return the parsed metadata for this Distribution. + + The returned object will have keys that name the various bits of + metadata. See PEP 566 for details. + """ + text = ( + self.read_text('METADATA') + or self.read_text('PKG-INFO') + # This last clause is here to support old egg-info files. Its + # effect is to just end up using the PathDistribution's self._path + # (which points to the egg-info file) attribute unchanged. + or self.read_text('') + ) + return _adapters.Message(email.message_from_string(text)) + + @property + def name(self): + """Return the 'Name' metadata for the distribution package.""" + return self.metadata['Name'] + + @property + def _normalized_name(self): + """Return a normalized version of the name.""" + return Prepared.normalize(self.name) + + @property + def version(self): + """Return the 'Version' metadata for the distribution package.""" + return self.metadata['Version'] + + @property + def entry_points(self): + return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self) + + @property + def files(self): + """Files in this distribution. + + :return: List of PackagePath for this distribution or None + + Result is `None` if the metadata file that enumerates files + (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is + missing. + Result may be empty if the metadata exists but is empty. + """ + + def make_file(name, hash=None, size_str=None): + result = PackagePath(name) + result.hash = FileHash(hash) if hash else None + result.size = int(size_str) if size_str else None + result.dist = self + return result + + @pass_none + def make_files(lines): + return list(starmap(make_file, csv.reader(lines))) + + return make_files(self._read_files_distinfo() or self._read_files_egginfo()) + + def _read_files_distinfo(self): + """ + Read the lines of RECORD + """ + text = self.read_text('RECORD') + return text and text.splitlines() + + def _read_files_egginfo(self): + """ + SOURCES.txt might contain literal commas, so wrap each line + in quotes. + """ + text = self.read_text('SOURCES.txt') + return text and map('"{}"'.format, text.splitlines()) + + @property + def requires(self): + """Generated requirements specified for this Distribution""" + reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() + return reqs and list(reqs) + + def _read_dist_info_reqs(self): + return self.metadata.get_all('Requires-Dist') + + def _read_egg_info_reqs(self): + source = self.read_text('requires.txt') + return pass_none(self._deps_from_requires_text)(source) + + @classmethod + def _deps_from_requires_text(cls, source): + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) + + @staticmethod + def _convert_egg_info_reqs_to_simple_reqs(sections): + """ + Historically, setuptools would solicit and store 'extra' + requirements, including those with environment markers, + in separate sections. More modern tools expect each + dependency to be defined separately, with any relevant + extras and environment markers attached directly to that + requirement. This method converts the former to the + latter. See _test_deps_from_requires_text for an example. + """ + + def make_condition(name): + return name and f'extra == "{name}"' + + def quoted_marker(section): + section = section or '' + extra, sep, markers = section.partition(':') + if extra and markers: + markers = f'({markers})' + conditions = list(filter(None, [markers, make_condition(extra)])) + return '; ' + ' and '.join(conditions) if conditions else '' + + def url_req_space(req): + """ + PEP 508 requires a space between the url_spec and the quoted_marker. + Ref python/importlib_metadata#357. + """ + # '@' is uniquely indicative of a url_req. + return ' ' * ('@' in req) + + for section in sections: + space = url_req_space(section.value) + yield section.value + space + quoted_marker(section.name) + + +class DistributionFinder(MetaPathFinder): + """ + A MetaPathFinder capable of discovering installed distributions. + """ + + class Context: + """ + Keyword arguments presented by the caller to + ``distributions()`` or ``Distribution.discover()`` + to narrow the scope of a search for distributions + in all DistributionFinders. + + Each DistributionFinder may expect any parameters + and should attempt to honor the canonical + parameters defined below when appropriate. + """ + + name = None + """ + Specific name for which a distribution finder should match. + A name of ``None`` matches all distributions. + """ + + def __init__(self, **kwargs): + vars(self).update(kwargs) + + @property + def path(self): + """ + The sequence of directory path that a distribution finder + should search. + + Typically refers to Python installed package paths such as + "site-packages" directories and defaults to ``sys.path``. + """ + return vars(self).get('path', sys.path) + + @abc.abstractmethod + def find_distributions(self, context=Context()): + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching the ``context``, + a DistributionFinder.Context instance. + """ + + +class FastPath: + """ + Micro-optimized class for searching a path for + children. + + >>> FastPath('').children() + ['...'] + """ + + @functools.lru_cache() # type: ignore + def __new__(cls, root): + return super().__new__(cls) + + def __init__(self, root): + self.root = str(root) + + def joinpath(self, child): + return pathlib.Path(self.root, child) + + def children(self): + with suppress(Exception): + return os.listdir(self.root or '.') + with suppress(Exception): + return self.zip_children() + return [] + + def zip_children(self): + zip_path = zipp.Path(self.root) + names = zip_path.root.namelist() + self.joinpath = zip_path.joinpath + + return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) + + def search(self, name): + return self.lookup(self.mtime).search(name) + + @property + def mtime(self): + with suppress(OSError): + return os.stat(self.root).st_mtime + self.lookup.cache_clear() + + @method_cache + def lookup(self, mtime): + return Lookup(self) + + +class Lookup: + def __init__(self, path: FastPath): + base = os.path.basename(path.root).lower() + base_is_egg = base.endswith(".egg") + self.infos = FreezableDefaultDict(list) + self.eggs = FreezableDefaultDict(list) + + for child in path.children(): + low = child.lower() + if low.endswith((".dist-info", ".egg-info")): + # rpartition is faster than splitext and suitable for this purpose. + name = low.rpartition(".")[0].partition("-")[0] + normalized = Prepared.normalize(name) + self.infos[normalized].append(path.joinpath(child)) + elif base_is_egg and low == "egg-info": + name = base.rpartition(".")[0].partition("-")[0] + legacy_normalized = Prepared.legacy_normalize(name) + self.eggs[legacy_normalized].append(path.joinpath(child)) + + self.infos.freeze() + self.eggs.freeze() + + def search(self, prepared): + infos = ( + self.infos[prepared.normalized] + if prepared + else itertools.chain.from_iterable(self.infos.values()) + ) + eggs = ( + self.eggs[prepared.legacy_normalized] + if prepared + else itertools.chain.from_iterable(self.eggs.values()) + ) + return itertools.chain(infos, eggs) + + +class Prepared: + """ + A prepared search for metadata on a possibly-named package. + """ + + normalized = None + legacy_normalized = None + + def __init__(self, name): + self.name = name + if name is None: + return + self.normalized = self.normalize(name) + self.legacy_normalized = self.legacy_normalize(name) + + @staticmethod + def normalize(name): + """ + PEP 503 normalization plus dashes as underscores. + """ + return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + + @staticmethod + def legacy_normalize(name): + """ + Normalize the package name as found in the convention in + older packaging tools versions and specs. + """ + return name.lower().replace('-', '_') + + def __bool__(self): + return bool(self.name) + + +@install +class MetadataPathFinder(NullFinder, DistributionFinder): + """A degenerate finder for distribution packages on the file system. + + This finder supplies only a find_distributions() method for versions + of Python that do not have a PathFinder find_distributions(). + """ + + def find_distributions(self, context=DistributionFinder.Context()): + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching ``context.name`` + (or all names if ``None`` indicated) along the paths in the list + of directories ``context.path``. + """ + found = self._search_paths(context.name, context.path) + return map(PathDistribution, found) + + @classmethod + def _search_paths(cls, name, paths): + """Find metadata directories in paths heuristically.""" + prepared = Prepared(name) + return itertools.chain.from_iterable( + path.search(prepared) for path in map(FastPath, paths) + ) + + def invalidate_caches(cls): + FastPath.__new__.cache_clear() + + +class PathDistribution(Distribution): + def __init__(self, path: SimplePath): + """Construct a distribution. + + :param path: SimplePath indicating the metadata directory. + """ + self._path = path + + def read_text(self, filename): + with suppress( + FileNotFoundError, + IsADirectoryError, + KeyError, + NotADirectoryError, + PermissionError, + ): + return self._path.joinpath(filename).read_text(encoding='utf-8') + + read_text.__doc__ = Distribution.read_text.__doc__ + + def locate_file(self, path): + return self._path.parent / path + + @property + def _normalized_name(self): + """ + Performance optimization: where possible, resolve the + normalized name from the file system path. + """ + stem = os.path.basename(str(self._path)) + return self._name_from_stem(stem) or super()._normalized_name + + def _name_from_stem(self, stem): + name, ext = os.path.splitext(stem) + if ext not in ('.dist-info', '.egg-info'): + return + name, sep, rest = stem.partition('-') + return name + + +def distribution(distribution_name): + """Get the ``Distribution`` instance for the named package. + + :param distribution_name: The name of the distribution package as a string. + :return: A ``Distribution`` instance (or subclass thereof). + """ + return Distribution.from_name(distribution_name) + + +def distributions(**kwargs): + """Get all ``Distribution`` instances in the current environment. + + :return: An iterable of ``Distribution`` instances. + """ + return Distribution.discover(**kwargs) + + +def metadata(distribution_name) -> _meta.PackageMetadata: + """Get the metadata for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: A PackageMetadata containing the parsed metadata. + """ + return Distribution.from_name(distribution_name).metadata + + +def version(distribution_name): + """Get the version string for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: The version string for the package as defined in the package's + "Version" metadata key. + """ + return distribution(distribution_name).version + + +def entry_points(**params) -> Union[EntryPoints, SelectableGroups]: + """Return EntryPoint objects for all installed packages. + + Pass selection parameters (group or name) to filter the + result to entry points matching those properties (see + EntryPoints.select()). + + For compatibility, returns ``SelectableGroups`` object unless + selection parameters are supplied. In the future, this function + will return ``EntryPoints`` instead of ``SelectableGroups`` + even when no selection parameters are supplied. + + For maximum future compatibility, pass selection parameters + or invoke ``.select`` with parameters on the result. + + :return: EntryPoints or SelectableGroups for all installed packages. + """ + norm_name = operator.attrgetter('_normalized_name') + unique = functools.partial(unique_everseen, key=norm_name) + eps = itertools.chain.from_iterable( + dist.entry_points for dist in unique(distributions()) + ) + return SelectableGroups.load(eps).select(**params) + + +def files(distribution_name): + """Return a list of files for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: List of files composing the distribution. + """ + return distribution(distribution_name).files + + +def requires(distribution_name): + """ + Return a list of requirements for the named package. + + :return: An iterator of requirements, suitable for + packaging.requirement.Requirement. + """ + return distribution(distribution_name).requires + + +def packages_distributions() -> Mapping[str, List[str]]: + """ + Return a mapping of top-level packages to their + distributions. + + >>> import collections.abc + >>> pkgs = packages_distributions() + >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values()) + True + """ + pkg_to_dist = collections.defaultdict(list) + for dist in distributions(): + for pkg in _top_level_declared(dist) or _top_level_inferred(dist): + pkg_to_dist[pkg].append(dist.metadata['Name']) + return dict(pkg_to_dist) + + +def _top_level_declared(dist): + return (dist.read_text('top_level.txt') or '').split() + + +def _top_level_inferred(dist): + return { + f.parts[0] if len(f.parts) > 1 else f.with_suffix('').name + for f in always_iterable(dist.files) + if f.suffix == ".py" + } diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_adapters.py b/libs/common/setuptools/_vendor/importlib_metadata/_adapters.py new file mode 100644 index 00000000..aa460d3e --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_adapters.py @@ -0,0 +1,68 @@ +import re +import textwrap +import email.message + +from ._text import FoldedCase + + +class Message(email.message.Message): + multiple_use_keys = set( + map( + FoldedCase, + [ + 'Classifier', + 'Obsoletes-Dist', + 'Platform', + 'Project-URL', + 'Provides-Dist', + 'Provides-Extra', + 'Requires-Dist', + 'Requires-External', + 'Supported-Platform', + 'Dynamic', + ], + ) + ) + """ + Keys that may be indicated multiple times per PEP 566. + """ + + def __new__(cls, orig: email.message.Message): + res = super().__new__(cls) + vars(res).update(vars(orig)) + return res + + def __init__(self, *args, **kwargs): + self._headers = self._repair_headers() + + # suppress spurious error from mypy + def __iter__(self): + return super().__iter__() + + def _repair_headers(self): + def redent(value): + "Correct for RFC822 indentation" + if not value or '\n' not in value: + return value + return textwrap.dedent(' ' * 8 + value) + + headers = [(key, redent(value)) for key, value in vars(self)['_headers']] + if self._payload: + headers.append(('Description', self.get_payload())) + return headers + + @property + def json(self): + """ + Convert PackageMetadata to a JSON-compatible format + per PEP 0566. + """ + + def transform(key): + value = self.get_all(key) if key in self.multiple_use_keys else self[key] + if key == 'Keywords': + value = re.split(r'\s+', value) + tk = key.lower().replace('-', '_') + return tk, value + + return dict(map(transform, map(FoldedCase, self))) diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_collections.py b/libs/common/setuptools/_vendor/importlib_metadata/_collections.py new file mode 100644 index 00000000..cf0954e1 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_collections.py @@ -0,0 +1,30 @@ +import collections + + +# from jaraco.collections 3.3 +class FreezableDefaultDict(collections.defaultdict): + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_compat.py b/libs/common/setuptools/_vendor/importlib_metadata/_compat.py new file mode 100644 index 00000000..ef3136f8 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_compat.py @@ -0,0 +1,71 @@ +import sys +import platform + + +__all__ = ['install', 'NullFinder', 'Protocol'] + + +try: + from typing import Protocol +except ImportError: # pragma: no cover + from ..typing_extensions import Protocol # type: ignore + + +def install(cls): + """ + Class decorator for installation on sys.meta_path. + + Adds the backport DistributionFinder to sys.meta_path and + attempts to disable the finder functionality of the stdlib + DistributionFinder. + """ + sys.meta_path.append(cls()) + disable_stdlib_finder() + return cls + + +def disable_stdlib_finder(): + """ + Give the backport primacy for discovering path-based distributions + by monkey-patching the stdlib O_O. + + See #91 for more background for rationale on this sketchy + behavior. + """ + + def matches(finder): + return getattr( + finder, '__module__', None + ) == '_frozen_importlib_external' and hasattr(finder, 'find_distributions') + + for finder in filter(matches, sys.meta_path): # pragma: nocover + del finder.find_distributions + + +class NullFinder: + """ + A "Finder" (aka "MetaClassFinder") that never finds any modules, + but may find distributions. + """ + + @staticmethod + def find_spec(*args, **kwargs): + return None + + # In Python 2, the import system requires finders + # to have a find_module() method, but this usage + # is deprecated in Python 3 in favor of find_spec(). + # For the purposes of this finder (i.e. being present + # on sys.meta_path but having no other import + # system functionality), the two methods are identical. + find_module = find_spec + + +def pypy_partial(val): + """ + Adjust for variable stacklevel on partial under PyPy. + + Workaround for #327. + """ + is_pypy = platform.python_implementation() == 'PyPy' + return val + is_pypy diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_functools.py b/libs/common/setuptools/_vendor/importlib_metadata/_functools.py new file mode 100644 index 00000000..71f66bd0 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_functools.py @@ -0,0 +1,104 @@ +import types +import functools + + +# from jaraco.functools 3.3 +def method_cache(method, cache_wrapper=None): + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + cache_wrapper = cache_wrapper or functools.lru_cache() + + def wrapper(self, *args, **kwargs): + # it's the first call, replace the method with a cached, bound method + bound_method = types.MethodType(method, self) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None + + return wrapper + + +# From jaraco.functools 3.3 +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_itertools.py b/libs/common/setuptools/_vendor/importlib_metadata/_itertools.py new file mode 100644 index 00000000..d4ca9b91 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_itertools.py @@ -0,0 +1,73 @@ +from itertools import filterfalse + + +def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + +# copied from more_itertools 8.8 +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_meta.py b/libs/common/setuptools/_vendor/importlib_metadata/_meta.py new file mode 100644 index 00000000..37ee43e6 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_meta.py @@ -0,0 +1,48 @@ +from ._compat import Protocol +from typing import Any, Dict, Iterator, List, TypeVar, Union + + +_T = TypeVar("_T") + + +class PackageMetadata(Protocol): + def __len__(self) -> int: + ... # pragma: no cover + + def __contains__(self, item: str) -> bool: + ... # pragma: no cover + + def __getitem__(self, key: str) -> str: + ... # pragma: no cover + + def __iter__(self) -> Iterator[str]: + ... # pragma: no cover + + def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: + """ + Return all values associated with a possibly multi-valued key. + """ + + @property + def json(self) -> Dict[str, Union[str, List[str]]]: + """ + A JSON-compatible form of the metadata. + """ + + +class SimplePath(Protocol): + """ + A minimal subset of pathlib.Path required by PathDistribution. + """ + + def joinpath(self) -> 'SimplePath': + ... # pragma: no cover + + def __truediv__(self) -> 'SimplePath': + ... # pragma: no cover + + def parent(self) -> 'SimplePath': + ... # pragma: no cover + + def read_text(self) -> str: + ... # pragma: no cover diff --git a/libs/common/setuptools/_vendor/importlib_metadata/_text.py b/libs/common/setuptools/_vendor/importlib_metadata/_text.py new file mode 100644 index 00000000..c88cfbb2 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_metadata/_text.py @@ -0,0 +1,99 @@ +import re + +from ._functools import method_cache + + +# from jaraco.text 3.5 +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use in_: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) diff --git a/libs/common/setuptools/_vendor/importlib_resources/__init__.py b/libs/common/setuptools/_vendor/importlib_resources/__init__.py new file mode 100644 index 00000000..34e3a995 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/__init__.py @@ -0,0 +1,36 @@ +"""Read resources contained within a package.""" + +from ._common import ( + as_file, + files, + Package, +) + +from ._legacy import ( + contents, + open_binary, + read_binary, + open_text, + read_text, + is_resource, + path, + Resource, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Resource', + 'ResourceReader', + 'as_file', + 'contents', + 'files', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/libs/common/setuptools/_vendor/importlib_resources/_adapters.py b/libs/common/setuptools/_vendor/importlib_resources/_adapters.py new file mode 100644 index 00000000..ea363d86 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/_adapters.py @@ -0,0 +1,170 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError( + "Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode) + ) + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/libs/common/setuptools/_vendor/importlib_resources/_common.py b/libs/common/setuptools/_vendor/importlib_resources/_common.py new file mode 100644 index 00000000..a12e2c75 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/_common.py @@ -0,0 +1,104 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib + +from typing import Union, Optional +from .abc import ResourceReader, Traversable + +from ._compat import wrap_spec + +Package = Union[types.ModuleType, str] + + +def files(package): + # type: (Package) -> Traversable + """ + Get a Traversable resource from a package + """ + return from_package(get_package(package)) + + +def get_resource_reader(package): + # type: (types.ModuleType) -> Optional[ResourceReader] + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore + if reader is None: + return None + return reader(spec.name) # type: ignore + + +def resolve(cand): + # type: (Package) -> types.ModuleType + return cand if isinstance(cand, types.ModuleType) else importlib.import_module(cand) + + +def get_package(package): + # type: (Package) -> types.ModuleType + """Take a package name or module object and return the module. + + Raise an exception if the resolved module is not a package. + """ + resolved = resolve(package) + if wrap_spec(resolved).submodule_search_locations is None: + raise TypeError(f'{package!r} is not a package') + return resolved + + +def from_package(package): + """ + Return a Traversable object for the given package. + + """ + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile(reader, suffix=''): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + os.remove(raw_path) + except FileNotFoundError: + pass + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _tempfile(path.read_bytes, suffix=path.name) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path diff --git a/libs/common/setuptools/_vendor/importlib_resources/_compat.py b/libs/common/setuptools/_vendor/importlib_resources/_compat.py new file mode 100644 index 00000000..cb9fc820 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/_compat.py @@ -0,0 +1,98 @@ +# flake8: noqa + +import abc +import sys +import pathlib +from contextlib import suppress + +if sys.version_info >= (3, 10): + from zipfile import Path as ZipPath # type: ignore +else: + from ..zipp import Path as ZipPath # type: ignore + + +try: + from typing import runtime_checkable # type: ignore +except ImportError: + + def runtime_checkable(cls): # type: ignore + return cls + + +try: + from typing import Protocol # type: ignore +except ImportError: + Protocol = abc.ABC # type: ignore + + +class TraversableResourcesLoader: + """ + Adapt loaders to provide TraversableResources and other + compatibility. + + Used primarily for Python 3.9 and earlier where the native + loaders do not yet implement TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + @property + def path(self): + return self.spec.origin + + def get_resource_reader(self, name): + from . import readers, _adapters + + def _zip_reader(spec): + with suppress(AttributeError): + return readers.ZipReader(spec.loader, spec.name) + + def _namespace_reader(spec): + with suppress(AttributeError, ValueError): + return readers.NamespaceReader(spec.submodule_search_locations) + + def _available_reader(spec): + with suppress(AttributeError): + return spec.loader.get_resource_reader(spec.name) + + def _native_reader(spec): + reader = _available_reader(spec) + return reader if hasattr(reader, 'files') else None + + def _file_reader(spec): + try: + path = pathlib.Path(self.path) + except TypeError: + return None + if path.exists(): + return readers.FileReader(self) + + return ( + # native reader if it supplies 'files' + _native_reader(self.spec) + or + # local ZipReader if a zip module + _zip_reader(self.spec) + or + # local NamespaceReader if a namespace module + _namespace_reader(self.spec) + or + # local FileReader + _file_reader(self.spec) + # fallback - adapt the spec ResourceReader to TraversableReader + or _adapters.CompatibilityFiles(self.spec) + ) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + + Supersedes _adapters.wrap_spec to use TraversableResourcesLoader + from above for older Python compatibility (<3.10). + """ + from . import _adapters + + return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/libs/common/setuptools/_vendor/importlib_resources/_itertools.py b/libs/common/setuptools/_vendor/importlib_resources/_itertools.py new file mode 100644 index 00000000..cce05582 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/_itertools.py @@ -0,0 +1,35 @@ +from itertools import filterfalse + +from typing import ( + Callable, + Iterable, + Iterator, + Optional, + Set, + TypeVar, + Union, +) + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + + +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None +) -> Iterator[_T]: + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen: Set[Union[_T, _U]] = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element diff --git a/libs/common/setuptools/_vendor/importlib_resources/_legacy.py b/libs/common/setuptools/_vendor/importlib_resources/_legacy.py new file mode 100644 index 00000000..1d5d3f1f --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/_legacy.py @@ -0,0 +1,121 @@ +import functools +import os +import pathlib +import types +import warnings + +from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any + +from . import _common + +Package = Union[types.ModuleType, str] +Resource = str + + +def deprecated(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + warnings.warn( + f"{func.__name__} is deprecated. Use files() instead. " + "Refer to https://importlib-resources.readthedocs.io" + "/en/latest/using.html#migrating-from-legacy for migration advice.", + DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + +def normalize_path(path): + # type: (Any) -> str + """Normalize a path by ensuring it is a string. + + If the resulting string contains path separators, an exception is raised. + """ + str_path = str(path) + parent, file_name = os.path.split(str_path) + if parent: + raise ValueError(f'{path!r} must be only a file name') + return file_name + + +@deprecated +def open_binary(package: Package, resource: Resource) -> BinaryIO: + """Return a file-like object opened for binary reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open('rb') + + +@deprecated +def read_binary(package: Package, resource: Resource) -> bytes: + """Return the binary contents of the resource.""" + return (_common.files(package) / normalize_path(resource)).read_bytes() + + +@deprecated +def open_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> TextIO: + """Return a file-like object opened for text reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open( + 'r', encoding=encoding, errors=errors + ) + + +@deprecated +def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> str: + """Return the decoded string of the resource. + + The decoding-related arguments have the same semantics as those of + bytes.decode(). + """ + with open_text(package, resource, encoding, errors) as fp: + return fp.read() + + +@deprecated +def contents(package: Package) -> Iterable[str]: + """Return an iterable of entries in `package`. + + Note that not all entries are resources. Specifically, directories are + not considered resources. Use `is_resource()` on each entry returned here + to check if it is a resource or not. + """ + return [path.name for path in _common.files(package).iterdir()] + + +@deprecated +def is_resource(package: Package, name: str) -> bool: + """True if `name` is a resource inside `package`. + + Directories are *not* resources. + """ + resource = normalize_path(name) + return any( + traversable.name == resource and traversable.is_file() + for traversable in _common.files(package).iterdir() + ) + + +@deprecated +def path( + package: Package, + resource: Resource, +) -> ContextManager[pathlib.Path]: + """A context manager providing a file path object to the resource. + + If the resource does not already exist on its own on the file system, + a temporary file will be created. If the file was created, the file + will be deleted upon exiting the context manager (no exception is + raised if the file was deleted prior to the context manager + exiting). + """ + return _common.as_file(_common.files(package) / normalize_path(resource)) diff --git a/libs/common/setuptools/_vendor/importlib_resources/abc.py b/libs/common/setuptools/_vendor/importlib_resources/abc.py new file mode 100644 index 00000000..d39dc1ad --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/abc.py @@ -0,0 +1,137 @@ +import abc +from typing import BinaryIO, Iterable, Text + +from ._compat import runtime_checkable, Protocol + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + """ + + @abc.abstractmethod + def iterdir(self): + """ + Yield Traversable objects in self + """ + + def read_bytes(self): + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding=None): + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + @abc.abstractmethod + def joinpath(self, child): + """ + Return Traversable child in self + """ + + def __truediv__(self, child): + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @abc.abstractproperty + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self): + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource): + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource): + raise FileNotFoundError(resource) + + def is_resource(self, path): + return self.files().joinpath(path).is_file() + + def contents(self): + return (item.name for item in self.files().iterdir()) diff --git a/libs/common/setuptools/_vendor/importlib_resources/readers.py b/libs/common/setuptools/_vendor/importlib_resources/readers.py new file mode 100644 index 00000000..f1190ca4 --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/readers.py @@ -0,0 +1,122 @@ +import collections +import pathlib +import operator + +from . import abc + +from ._itertools import unique_everseen +from ._compat import ZipPath + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + _, _, name = module.rpartition('.') + self.prefix = loader.prefix.replace('\\', '/') + name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + # workaround for `zipfile.Path.is_file` returning true + # for non-existent paths. + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return ZipPath(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + files = (file for path in self._paths for file in path.iterdir()) + return unique_everseen(files, key=operator.attrgetter('name')) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, child): + # first try to find child in current paths + for file in self.iterdir(): + if file.name == child: + return file + # if it does not exist, construct it with the first path + return self._paths[0] / child + + __truediv__ = joinpath + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*list(namespace_path)) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path diff --git a/libs/common/setuptools/_vendor/importlib_resources/simple.py b/libs/common/setuptools/_vendor/importlib_resources/simple.py new file mode 100644 index 00000000..da073cbd --- /dev/null +++ b/libs/common/setuptools/_vendor/importlib_resources/simple.py @@ -0,0 +1,116 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @abc.abstractproperty + def package(self): + # type: () -> str + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self): + # type: () -> List['SimpleReader'] + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self): + # type: () -> List[str] + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource): + # type: (str) -> BinaryIO + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent, name): + # type: (ResourceContainer, str) -> None + self.parent = parent + self.name = name # type: ignore + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(*args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader): + # type: (SimpleReader) -> None + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + def joinpath(self, name): + return next( + traversable for traversable in self.iterdir() if traversable.name == name + ) + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/libs/common/setuptools/_vendor/jaraco/__init__.py b/libs/common/setuptools/_vendor/jaraco/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/common/setuptools/_vendor/jaraco/context.py b/libs/common/setuptools/_vendor/jaraco/context.py new file mode 100644 index 00000000..87a4e3dc --- /dev/null +++ b/libs/common/setuptools/_vendor/jaraco/context.py @@ -0,0 +1,213 @@ +import os +import subprocess +import contextlib +import functools +import tempfile +import shutil +import operator + + +@contextlib.contextmanager +def pushd(dir): + orig = os.getcwd() + os.chdir(dir) + try: + yield dir + finally: + os.chdir(orig) + + +@contextlib.contextmanager +def tarball_context(url, target_dir=None, runner=None, pushd=pushd): + """ + Get a tarball, extract it, change to that directory, yield, then + clean up. + `runner` is the function to invoke commands. + `pushd` is a context manager for changing the directory. + """ + if target_dir is None: + target_dir = os.path.basename(url).replace('.tar.gz', '').replace('.tgz', '') + if runner is None: + runner = functools.partial(subprocess.check_call, shell=True) + # In the tar command, use --strip-components=1 to strip the first path and + # then + # use -C to cause the files to be extracted to {target_dir}. This ensures + # that we always know where the files were extracted. + runner('mkdir {target_dir}'.format(**vars())) + try: + getter = 'wget {url} -O -' + extract = 'tar x{compression} --strip-components=1 -C {target_dir}' + cmd = ' | '.join((getter, extract)) + runner(cmd.format(compression=infer_compression(url), **vars())) + with pushd(target_dir): + yield target_dir + finally: + runner('rm -Rf {target_dir}'.format(**vars())) + + +def infer_compression(url): + """ + Given a URL or filename, infer the compression code for tar. + """ + # cheat and just assume it's the last two characters + compression_indicator = url[-2:] + mapping = dict(gz='z', bz='j', xz='J') + # Assume 'z' (gzip) if no match + return mapping.get(compression_indicator, 'z') + + +@contextlib.contextmanager +def temp_dir(remover=shutil.rmtree): + """ + Create a temporary directory context. Pass a custom remover + to override the removal behavior. + """ + temp_dir = tempfile.mkdtemp() + try: + yield temp_dir + finally: + remover(temp_dir) + + +@contextlib.contextmanager +def repo_context(url, branch=None, quiet=True, dest_ctx=temp_dir): + """ + Check out the repo indicated by url. + + If dest_ctx is supplied, it should be a context manager + to yield the target directory for the check out. + """ + exe = 'git' if 'git' in url else 'hg' + with dest_ctx() as repo_dir: + cmd = [exe, 'clone', url, repo_dir] + if branch: + cmd.extend(['--branch', branch]) + devnull = open(os.path.devnull, 'w') + stdout = devnull if quiet else None + subprocess.check_call(cmd, stdout=stdout) + yield repo_dir + + +@contextlib.contextmanager +def null(): + yield + + +class ExceptionTrap: + """ + A context manager that will catch certain exceptions and provide an + indication they occurred. + + >>> with ExceptionTrap() as trap: + ... raise Exception() + >>> bool(trap) + True + + >>> with ExceptionTrap() as trap: + ... pass + >>> bool(trap) + False + + >>> with ExceptionTrap(ValueError) as trap: + ... raise ValueError("1 + 1 is not 3") + >>> bool(trap) + True + + >>> with ExceptionTrap(ValueError) as trap: + ... raise Exception() + Traceback (most recent call last): + ... + Exception + + >>> bool(trap) + False + """ + + exc_info = None, None, None + + def __init__(self, exceptions=(Exception,)): + self.exceptions = exceptions + + def __enter__(self): + return self + + @property + def type(self): + return self.exc_info[0] + + @property + def value(self): + return self.exc_info[1] + + @property + def tb(self): + return self.exc_info[2] + + def __exit__(self, *exc_info): + type = exc_info[0] + matches = type and issubclass(type, self.exceptions) + if matches: + self.exc_info = exc_info + return matches + + def __bool__(self): + return bool(self.type) + + def raises(self, func, *, _test=bool): + """ + Wrap func and replace the result with the truth + value of the trap (True if an exception occurred). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> raises = ExceptionTrap(ValueError).raises + + Now decorate a function that always fails. + + >>> @raises + ... def fail(): + ... raise ValueError('failed') + >>> fail() + True + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with ExceptionTrap(self.exceptions) as trap: + func(*args, **kwargs) + return _test(trap) + + return wrapper + + def passes(self, func): + """ + Wrap func and replace the result with the truth + value of the trap (True if no exception). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> passes = ExceptionTrap(ValueError).passes + + Now decorate a function that always fails. + + >>> @passes + ... def fail(): + ... raise ValueError('failed') + + >>> fail() + False + """ + return self.raises(func, _test=operator.not_) + + +class suppress(contextlib.suppress, contextlib.ContextDecorator): + """ + A version of contextlib.suppress with decorator support. + + >>> @suppress(KeyError) + ... def key_error(): + ... {}[''] + >>> key_error() + """ diff --git a/libs/common/setuptools/_vendor/jaraco/functools.py b/libs/common/setuptools/_vendor/jaraco/functools.py new file mode 100644 index 00000000..bbd8b29f --- /dev/null +++ b/libs/common/setuptools/_vendor/jaraco/functools.py @@ -0,0 +1,525 @@ +import functools +import time +import inspect +import collections +import types +import itertools + +import setuptools.extern.more_itertools + +from typing import Callable, TypeVar + + +CallableT = TypeVar("CallableT", bound=Callable[..., object]) + + +def compose(*funcs): + """ + Compose any number of unary functions into a single unary function. + + >>> import textwrap + >>> expected = str.strip(textwrap.dedent(compose.__doc__)) + >>> strip_and_dedent = compose(str.strip, textwrap.dedent) + >>> strip_and_dedent(compose.__doc__) == expected + True + + Compose also allows the innermost function to take arbitrary arguments. + + >>> round_three = lambda x: round(x, ndigits=3) + >>> f = compose(round_three, int.__truediv__) + >>> [f(3*x, x+1) for x in range(1,10)] + [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7] + """ + + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) + + +def method_caller(method_name, *args, **kwargs): + """ + Return a function that will call a named method on the + target object with optional positional and keyword + arguments. + + >>> lower = method_caller('lower') + >>> lower('MyString') + 'mystring' + """ + + def call_method(target): + func = getattr(target, method_name) + return func(*args, **kwargs) + + return call_method + + +def once(func): + """ + Decorate func so it's only ever called the first time. + + This decorator can ensure that an expensive or non-idempotent function + will not be expensive on subsequent calls and is idempotent. + + >>> add_three = once(lambda a: a+3) + >>> add_three(3) + 6 + >>> add_three(9) + 6 + >>> add_three('12') + 6 + + To reset the stored value, simply clear the property ``saved_result``. + + >>> del add_three.saved_result + >>> add_three(9) + 12 + >>> add_three(8) + 12 + + Or invoke 'reset()' on it. + + >>> add_three.reset() + >>> add_three(-3) + 0 + >>> add_three(0) + 0 + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not hasattr(wrapper, 'saved_result'): + wrapper.saved_result = func(*args, **kwargs) + return wrapper.saved_result + + wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result') + return wrapper + + +def method_cache( + method: CallableT, + cache_wrapper: Callable[ + [CallableT], CallableT + ] = functools.lru_cache(), # type: ignore[assignment] +) -> CallableT: + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + + def wrapper(self: object, *args: object, **kwargs: object) -> object: + # it's the first call, replace the method with a cached, bound method + bound_method: CallableT = types.MethodType( # type: ignore[assignment] + method, self + ) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None # type: ignore[attr-defined] + + return ( # type: ignore[return-value] + _special_method_cache(method, cache_wrapper) or wrapper + ) + + +def _special_method_cache(method, cache_wrapper): + """ + Because Python treats special methods differently, it's not + possible to use instance attributes to implement the cached + methods. + + Instead, install the wrapper method under a different name + and return a simple proxy to that wrapper. + + https://github.com/jaraco/jaraco.functools/issues/5 + """ + name = method.__name__ + special_names = '__getattr__', '__getitem__' + if name not in special_names: + return + + wrapper_name = '__cached' + name + + def proxy(self, *args, **kwargs): + if wrapper_name not in vars(self): + bound = types.MethodType(method, self) + cache = cache_wrapper(bound) + setattr(self, wrapper_name, cache) + else: + cache = getattr(self, wrapper_name) + return cache(*args, **kwargs) + + return proxy + + +def apply(transform): + """ + Decorate a function with a transform function that is + invoked on results returned from the decorated function. + + >>> @apply(reversed) + ... def get_numbers(start): + ... "doc for get_numbers" + ... return range(start, start+3) + >>> list(get_numbers(4)) + [6, 5, 4] + >>> get_numbers.__doc__ + 'doc for get_numbers' + """ + + def wrap(func): + return functools.wraps(func)(compose(transform, func)) + + return wrap + + +def result_invoke(action): + r""" + Decorate a function with an action function that is + invoked on the results returned from the decorated + function (for its side-effect), then return the original + result. + + >>> @result_invoke(print) + ... def add_two(a, b): + ... return a + b + >>> x = add_two(2, 3) + 5 + >>> x + 5 + """ + + def wrap(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + action(result) + return result + + return wrapper + + return wrap + + +def call_aside(f, *args, **kwargs): + """ + Call a function for its side effect after initialization. + + >>> @call_aside + ... def func(): print("called") + called + >>> func() + called + + Use functools.partial to pass parameters to the initial call + + >>> @functools.partial(call_aside, name='bingo') + ... def func(name): print("called with", name) + called with bingo + """ + f(*args, **kwargs) + return f + + +class Throttler: + """ + Rate-limit a function (or other callable) + """ + + def __init__(self, func, max_rate=float('Inf')): + if isinstance(func, Throttler): + func = func.func + self.func = func + self.max_rate = max_rate + self.reset() + + def reset(self): + self.last_called = 0 + + def __call__(self, *args, **kwargs): + self._wait() + return self.func(*args, **kwargs) + + def _wait(self): + "ensure at least 1/max_rate seconds from last call" + elapsed = time.time() - self.last_called + must_wait = 1 / self.max_rate - elapsed + time.sleep(max(0, must_wait)) + self.last_called = time.time() + + def __get__(self, obj, type=None): + return first_invoke(self._wait, functools.partial(self.func, obj)) + + +def first_invoke(func1, func2): + """ + Return a function that when invoked will invoke func1 without + any parameters (for its side-effect) and then invoke func2 + with whatever parameters were passed, returning its result. + """ + + def wrapper(*args, **kwargs): + func1() + return func2(*args, **kwargs) + + return wrapper + + +def retry_call(func, cleanup=lambda: None, retries=0, trap=()): + """ + Given a callable func, trap the indicated exceptions + for up to 'retries' times, invoking cleanup on the + exception. On the final attempt, allow any exceptions + to propagate. + """ + attempts = itertools.count() if retries == float('inf') else range(retries) + for attempt in attempts: + try: + return func() + except trap: + cleanup() + + return func() + + +def retry(*r_args, **r_kwargs): + """ + Decorator wrapper for retry_call. Accepts arguments to retry_call + except func and then returns a decorator for the decorated function. + + Ex: + + >>> @retry(retries=3) + ... def my_func(a, b): + ... "this is my funk" + ... print(a, b) + >>> my_func.__doc__ + 'this is my funk' + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*f_args, **f_kwargs): + bound = functools.partial(func, *f_args, **f_kwargs) + return retry_call(bound, *r_args, **r_kwargs) + + return wrapper + + return decorate + + +def print_yielded(func): + """ + Convert a generator into a function that prints all yielded elements + + >>> @print_yielded + ... def x(): + ... yield 3; yield None + >>> x() + 3 + None + """ + print_all = functools.partial(map, print) + print_results = compose(more_itertools.consume, print_all, func) + return functools.wraps(func)(print_results) + + +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper + + +def assign_params(func, namespace): + """ + Assign parameters from namespace where func solicits. + + >>> def func(x, y=3): + ... print(x, y) + >>> assigned = assign_params(func, dict(x=2, z=4)) + >>> assigned() + 2 3 + + The usual errors are raised if a function doesn't receive + its required parameters: + + >>> assigned = assign_params(func, dict(y=3, z=4)) + >>> assigned() + Traceback (most recent call last): + TypeError: func() ...argument... + + It even works on methods: + + >>> class Handler: + ... def meth(self, arg): + ... print(arg) + >>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))() + crystal + """ + sig = inspect.signature(func) + params = sig.parameters.keys() + call_ns = {k: namespace[k] for k in params if k in namespace} + return functools.partial(func, **call_ns) + + +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + + >>> class MyClass: + ... @save_method_args + ... def method(self, a, b): + ... print(a, b) + >>> my_ob = MyClass() + >>> my_ob.method(1, 2) + 1 2 + >>> my_ob._saved_method.args + (1, 2) + >>> my_ob._saved_method.kwargs + {} + >>> my_ob.method(a=3, b='foo') + 3 foo + >>> my_ob._saved_method.args + () + >>> my_ob._saved_method.kwargs == dict(a=3, b='foo') + True + + The arguments are stored on the instance, allowing for + different instance to save different args. + + >>> your_ob = MyClass() + >>> your_ob.method({str('x'): 3}, b=[4]) + {'x': 3} [4] + >>> your_ob._saved_method.args + ({'x': 3},) + >>> my_ob._saved_method.args + () + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') + + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper + + +def except_(*exceptions, replace=None, use=None): + """ + Replace the indicated exceptions, if raised, with the indicated + literal replacement or evaluated expression (if present). + + >>> safe_int = except_(ValueError)(int) + >>> safe_int('five') + >>> safe_int('5') + 5 + + Specify a literal replacement with ``replace``. + + >>> safe_int_r = except_(ValueError, replace=0)(int) + >>> safe_int_r('five') + 0 + + Provide an expression to ``use`` to pass through particular parameters. + + >>> safe_int_pt = except_(ValueError, use='args[0]')(int) + >>> safe_int_pt('five') + 'five' + + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exceptions: + try: + return eval(use) + except TypeError: + return replace + + return wrapper + + return decorate diff --git a/libs/common/setuptools/_vendor/jaraco/text/__init__.py b/libs/common/setuptools/_vendor/jaraco/text/__init__.py new file mode 100644 index 00000000..a0306d5f --- /dev/null +++ b/libs/common/setuptools/_vendor/jaraco/text/__init__.py @@ -0,0 +1,599 @@ +import re +import itertools +import textwrap +import functools + +try: + from importlib.resources import files # type: ignore +except ImportError: # pragma: nocover + from setuptools.extern.importlib_resources import files # type: ignore + +from setuptools.extern.jaraco.functools import compose, method_cache +from setuptools.extern.jaraco.context import ExceptionTrap + + +def substitution(old, new): + """ + Return a function that will perform a substitution on a string + """ + return lambda s: s.replace(old, new) + + +def multi_substitution(*substitutions): + """ + Take a sequence of pairs specifying substitutions, and create + a function that performs those substitutions. + + >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo') + 'baz' + """ + substitutions = itertools.starmap(substitution, substitutions) + # compose function applies last function first, so reverse the + # substitutions to get the expected order. + substitutions = reversed(tuple(substitutions)) + return compose(*substitutions) + + +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use ``in_``: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) + + +# Python 3.8 compatibility +_unicode_trap = ExceptionTrap(UnicodeDecodeError) + + +@_unicode_trap.passes +def is_decodable(value): + r""" + Return True if the supplied value is decodable (using the default + encoding). + + >>> is_decodable(b'\xff') + False + >>> is_decodable(b'\x32') + True + """ + value.decode() + + +def is_binary(value): + r""" + Return True if the value appears to be binary (that is, it's a byte + string and isn't decodable). + + >>> is_binary(b'\xff') + True + >>> is_binary('\xff') + False + """ + return isinstance(value, bytes) and not is_decodable(value) + + +def trim(s): + r""" + Trim something like a docstring to remove the whitespace that + is common due to indentation and formatting. + + >>> trim("\n\tfoo = bar\n\t\tbar = baz\n") + 'foo = bar\n\tbar = baz' + """ + return textwrap.dedent(s).strip() + + +def wrap(s): + """ + Wrap lines of text, retaining existing newlines as + paragraph markers. + + >>> print(wrap(lorem_ipsum)) + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris nisi ut + aliquip ex ea commodo consequat. Duis aute irure dolor in + reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in + culpa qui officia deserunt mollit anim id est laborum. + + Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam + varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus + magna felis sollicitudin mauris. Integer in mauris eu nibh euismod + gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis + risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, + eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas + fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla + a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, + neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing + sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque + nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus + quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, + molestie eu, feugiat in, orci. In hac habitasse platea dictumst. + """ + paragraphs = s.splitlines() + wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs) + return '\n\n'.join(wrapped) + + +def unwrap(s): + r""" + Given a multi-line string, return an unwrapped version. + + >>> wrapped = wrap(lorem_ipsum) + >>> wrapped.count('\n') + 20 + >>> unwrapped = unwrap(wrapped) + >>> unwrapped.count('\n') + 1 + >>> print(unwrapped) + Lorem ipsum dolor sit amet, consectetur adipiscing ... + Curabitur pretium tincidunt lacus. Nulla gravida orci ... + + """ + paragraphs = re.split(r'\n\n+', s) + cleaned = (para.replace('\n', ' ') for para in paragraphs) + return '\n'.join(cleaned) + + + + +class Splitter(object): + """object that will split a string with the given arguments for each call + + >>> s = Splitter(',') + >>> s('hello, world, this is your, master calling') + ['hello', ' world', ' this is your', ' master calling'] + """ + + def __init__(self, *args): + self.args = args + + def __call__(self, s): + return s.split(*self.args) + + +def indent(string, prefix=' ' * 4): + """ + >>> indent('foo') + ' foo' + """ + return prefix + string + + +class WordSet(tuple): + """ + Given an identifier, return the words that identifier represents, + whether in camel case, underscore-separated, etc. + + >>> WordSet.parse("camelCase") + ('camel', 'Case') + + >>> WordSet.parse("under_sep") + ('under', 'sep') + + Acronyms should be retained + + >>> WordSet.parse("firstSNL") + ('first', 'SNL') + + >>> WordSet.parse("you_and_I") + ('you', 'and', 'I') + + >>> WordSet.parse("A simple test") + ('A', 'simple', 'test') + + Multiple caps should not interfere with the first cap of another word. + + >>> WordSet.parse("myABCClass") + ('my', 'ABC', 'Class') + + The result is a WordSet, so you can get the form you need. + + >>> WordSet.parse("myABCClass").underscore_separated() + 'my_ABC_Class' + + >>> WordSet.parse('a-command').camel_case() + 'ACommand' + + >>> WordSet.parse('someIdentifier').lowered().space_separated() + 'some identifier' + + Slices of the result should return another WordSet. + + >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated() + 'out_of_context' + + >>> WordSet.from_class_name(WordSet()).lowered().space_separated() + 'word set' + + >>> example = WordSet.parse('figured it out') + >>> example.headless_camel_case() + 'figuredItOut' + >>> example.dash_separated() + 'figured-it-out' + + """ + + _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))') + + def capitalized(self): + return WordSet(word.capitalize() for word in self) + + def lowered(self): + return WordSet(word.lower() for word in self) + + def camel_case(self): + return ''.join(self.capitalized()) + + def headless_camel_case(self): + words = iter(self) + first = next(words).lower() + new_words = itertools.chain((first,), WordSet(words).camel_case()) + return ''.join(new_words) + + def underscore_separated(self): + return '_'.join(self) + + def dash_separated(self): + return '-'.join(self) + + def space_separated(self): + return ' '.join(self) + + def trim_right(self, item): + """ + Remove the item from the end of the set. + + >>> WordSet.parse('foo bar').trim_right('foo') + ('foo', 'bar') + >>> WordSet.parse('foo bar').trim_right('bar') + ('foo',) + >>> WordSet.parse('').trim_right('bar') + () + """ + return self[:-1] if self and self[-1] == item else self + + def trim_left(self, item): + """ + Remove the item from the beginning of the set. + + >>> WordSet.parse('foo bar').trim_left('foo') + ('bar',) + >>> WordSet.parse('foo bar').trim_left('bar') + ('foo', 'bar') + >>> WordSet.parse('').trim_left('bar') + () + """ + return self[1:] if self and self[0] == item else self + + def trim(self, item): + """ + >>> WordSet.parse('foo bar').trim('foo') + ('bar',) + """ + return self.trim_left(item).trim_right(item) + + def __getitem__(self, item): + result = super(WordSet, self).__getitem__(item) + if isinstance(item, slice): + result = WordSet(result) + return result + + @classmethod + def parse(cls, identifier): + matches = cls._pattern.finditer(identifier) + return WordSet(match.group(0) for match in matches) + + @classmethod + def from_class_name(cls, subject): + return cls.parse(subject.__class__.__name__) + + +# for backward compatibility +words = WordSet.parse + + +def simple_html_strip(s): + r""" + Remove HTML from the string `s`. + + >>> str(simple_html_strip('')) + '' + + >>> print(simple_html_strip('A stormy day in paradise')) + A stormy day in paradise + + >>> print(simple_html_strip('Somebody tell the truth.')) + Somebody tell the truth. + + >>> print(simple_html_strip('What about
\nmultiple lines?')) + What about + multiple lines? + """ + html_stripper = re.compile('()|(<[^>]*>)|([^<]+)', re.DOTALL) + texts = (match.group(3) or '' for match in html_stripper.finditer(s)) + return ''.join(texts) + + +class SeparatedValues(str): + """ + A string separated by a separator. Overrides __iter__ for getting + the values. + + >>> list(SeparatedValues('a,b,c')) + ['a', 'b', 'c'] + + Whitespace is stripped and empty values are discarded. + + >>> list(SeparatedValues(' a, b , c, ')) + ['a', 'b', 'c'] + """ + + separator = ',' + + def __iter__(self): + parts = self.split(self.separator) + return filter(None, (part.strip() for part in parts)) + + +class Stripper: + r""" + Given a series of lines, find the common prefix and strip it from them. + + >>> lines = [ + ... 'abcdefg\n', + ... 'abc\n', + ... 'abcde\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix + 'abc' + >>> list(res.lines) + ['defg\n', '\n', 'de\n'] + + If no prefix is common, nothing should be stripped. + + >>> lines = [ + ... 'abcd\n', + ... '1234\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix = '' + >>> list(res.lines) + ['abcd\n', '1234\n'] + """ + + def __init__(self, prefix, lines): + self.prefix = prefix + self.lines = map(self, lines) + + @classmethod + def strip_prefix(cls, lines): + prefix_lines, lines = itertools.tee(lines) + prefix = functools.reduce(cls.common_prefix, prefix_lines) + return cls(prefix, lines) + + def __call__(self, line): + if not self.prefix: + return line + null, prefix, rest = line.partition(self.prefix) + return rest + + @staticmethod + def common_prefix(s1, s2): + """ + Return the common prefix of two lines. + """ + index = min(len(s1), len(s2)) + while s1[:index] != s2[:index]: + index -= 1 + return s1[:index] + + +def remove_prefix(text, prefix): + """ + Remove the prefix from the text if it exists. + + >>> remove_prefix('underwhelming performance', 'underwhelming ') + 'performance' + + >>> remove_prefix('something special', 'sample') + 'something special' + """ + null, prefix, rest = text.rpartition(prefix) + return rest + + +def remove_suffix(text, suffix): + """ + Remove the suffix from the text if it exists. + + >>> remove_suffix('name.git', '.git') + 'name' + + >>> remove_suffix('something special', 'sample') + 'something special' + """ + rest, suffix, null = text.partition(suffix) + return rest + + +def normalize_newlines(text): + r""" + Replace alternate newlines with the canonical newline. + + >>> normalize_newlines('Lorem Ipsum\u2029') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\r\n') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\x85') + 'Lorem Ipsum\n' + """ + newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] + pattern = '|'.join(newlines) + return re.sub(pattern, '\n', text) + + +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(' #')[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith('\\'): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item diff --git a/libs/common/setuptools/_vendor/more_itertools/__init__.py b/libs/common/setuptools/_vendor/more_itertools/__init__.py new file mode 100644 index 00000000..19a169fc --- /dev/null +++ b/libs/common/setuptools/_vendor/more_itertools/__init__.py @@ -0,0 +1,4 @@ +from .more import * # noqa +from .recipes import * # noqa + +__version__ = '8.8.0' diff --git a/libs/common/more_itertools/more.py b/libs/common/setuptools/_vendor/more_itertools/more.py similarity index 54% rename from libs/common/more_itertools/more.py rename to libs/common/setuptools/_vendor/more_itertools/more.py index bd32a261..e6fca4d4 100644 --- a/libs/common/more_itertools/more.py +++ b/libs/common/setuptools/_vendor/more_itertools/more.py @@ -1,8 +1,9 @@ -from __future__ import print_function +import warnings -from collections import Counter, defaultdict, deque -from functools import partial, wraps -from heapq import merge +from collections import Counter, defaultdict, deque, abc +from collections.abc import Sequence +from functools import partial, reduce, wraps +from heapq import merge, heapify, heapreplace, heappop from itertools import ( chain, compress, @@ -14,37 +15,48 @@ from itertools import ( repeat, starmap, takewhile, - tee + tee, + zip_longest, ) -from operator import itemgetter, lt, gt, sub -from sys import maxsize, version_info -try: - from collections.abc import Sequence -except ImportError: - from collections import Sequence +from math import exp, factorial, floor, log +from queue import Empty, Queue +from random import random, randrange, uniform +from operator import itemgetter, mul, sub, gt, lt +from sys import hexversion, maxsize +from time import monotonic -from six import binary_type, string_types, text_type -from six.moves import filter, map, range, zip, zip_longest - -from .recipes import consume, flatten, take +from .recipes import ( + consume, + flatten, + pairwise, + powerset, + take, + unique_everseen, +) __all__ = [ + 'AbortThread', 'adjacent', 'always_iterable', 'always_reversible', 'bucket', + 'callback_iter', 'chunked', 'circular_shifts', 'collapse', 'collate', 'consecutive_groups', 'consumer', + 'countable', 'count_cycle', + 'mark_ends', 'difference', + 'distinct_combinations', 'distinct_permutations', 'distribute', 'divide', 'exactly_n', + 'filter_except', 'first', 'groupby_transform', 'ilen', @@ -53,19 +65,30 @@ __all__ = [ 'intersperse', 'islice_extended', 'iterate', + 'ichunked', + 'is_sorted', 'last', 'locate', 'lstrip', 'make_decorator', + 'map_except', 'map_reduce', + 'nth_or_last', + 'nth_permutation', + 'nth_product', 'numeric_range', 'one', + 'only', 'padded', + 'partitions', + 'set_partitions', 'peekable', + 'repeat_last', 'replace', 'rlocate', 'rstrip', 'run_length', + 'sample', 'seekable', 'SequenceView', 'side_effect', @@ -74,43 +97,63 @@ __all__ = [ 'split_at', 'split_after', 'split_before', + 'split_when', 'split_into', 'spy', 'stagger', 'strip', 'substrings', + 'substrings_indexes', + 'time_limited', 'unique_to_each', 'unzip', 'windowed', 'with_iter', + 'UnequalIterablesError', + 'zip_equal', 'zip_offset', + 'windowed_complete', + 'all_unique', + 'value_chain', + 'product_index', + 'combination_index', + 'permutation_index', ] _marker = object() -def chunked(iterable, n): +def chunked(iterable, n, strict=False): """Break *iterable* into lists of length *n*: >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) [[1, 2, 3], [4, 5, 6]] - If the length of *iterable* is not evenly divisible by *n*, the last - returned list will be shorter: + By the default, the last yielded list will have fewer than *n* elements + if the length of *iterable* is not divisible by *n*: >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) [[1, 2, 3], [4, 5, 6], [7, 8]] To use a fill-in value instead, see the :func:`grouper` recipe. - :func:`chunked` is useful for splitting up a computation on a large number - of keys into batches, to be pickled and sent off to worker processes. One - example is operations on rows in MySQL, which does not implement - server-side cursors properly and would otherwise load the entire dataset - into RAM on the client. + If the length of *iterable* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + list is yielded. """ - return iter(partial(take, n, iter(iterable)), []) + iterator = iter(partial(take, n, iter(iterable)), []) + if strict: + + def ret(): + for chunk in iterator: + if len(chunk) != n: + raise ValueError('iterable is not divisible by n.') + yield chunk + + return iter(ret()) + else: + return iterator def first(iterable, default=_marker): @@ -132,14 +175,12 @@ def first(iterable, default=_marker): """ try: return next(iter(iterable)) - except StopIteration: - # I'm on the edge about raising ValueError instead of StopIteration. At - # the moment, ValueError wins, because the caller could conceivably - # want to do something different with flow control when I raise the - # exception, and it's weird to explicitly catch StopIteration. + except StopIteration as e: if default is _marker: - raise ValueError('first() was called on an empty iterable, and no ' - 'default value was provided.') + raise ValueError( + 'first() was called on an empty iterable, and no ' + 'default value was provided.' + ) from e return default @@ -156,20 +197,40 @@ def last(iterable, default=_marker): raise ``ValueError``. """ try: - try: - # Try to access the last item directly + if isinstance(iterable, Sequence): return iterable[-1] - except (TypeError, AttributeError, KeyError): - # If not slice-able, iterate entirely using length-1 deque - return deque(iterable, maxlen=1)[0] - except IndexError: # If the iterable was empty + # Work around https://bugs.python.org/issue38525 + elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + return next(reversed(iterable)) + else: + return deque(iterable, maxlen=1)[-1] + except (IndexError, TypeError, StopIteration): if default is _marker: - raise ValueError('last() was called on an empty iterable, and no ' - 'default value was provided.') + raise ValueError( + 'last() was called on an empty iterable, and no default was ' + 'provided.' + ) return default -class peekable(object): +def nth_or_last(iterable, n, default=_marker): + """Return the nth or the last item of *iterable*, + or *default* if *iterable* is empty. + + >>> nth_or_last([0, 1, 2, 3], 2) + 2 + >>> nth_or_last([0, 1], 2) + 1 + >>> nth_or_last([], 0, 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + return last(islice(iterable, n + 1), default=default) + + +class peekable: """Wrap an iterator to allow lookahead and prepending elements. Call :meth:`peek` on the result to get the value that will be returned @@ -222,11 +283,12 @@ class peekable(object): >>> if p: # peekable has items ... list(p) ['a', 'b'] - >>> if not p: # peekable is exhaused + >>> if not p: # peekable is exhausted ... list(p) [] """ + def __init__(self, iterable): self._it = iter(iterable) self._cache = deque() @@ -241,10 +303,6 @@ class peekable(object): return False return True - def __nonzero__(self): - # For Python 2 compatibility - return self.__bool__() - def peek(self, default=_marker): """Return the item that will be next returned from ``next()``. @@ -298,8 +356,6 @@ class peekable(object): return next(self._it) - next = __next__ # For Python 2 compatibility - def _get_slice(self, index): # Normalize the slice's arguments step = 1 if (index.step is None) else index.step @@ -339,23 +395,6 @@ class peekable(object): return self._cache[index] -def _collate(*iterables, **kwargs): - """Helper for ``collate()``, called when the user is using the ``reverse`` - or ``key`` keyword arguments on Python versions below 3.5. - - """ - key = kwargs.pop('key', lambda a: a) - reverse = kwargs.pop('reverse', False) - - min_or_max = partial(max if reverse else min, key=itemgetter(0)) - peekables = [peekable(it) for it in iterables] - peekables = [p for p in peekables if p] # Kill empties. - while peekables: - _, p = min_or_max((key(p.peek()), p) for p in peekables) - yield next(p) - peekables = [x for x in peekables if x] - - def collate(*iterables, **kwargs): """Return a sorted merge of the items from each of several already-sorted *iterables*. @@ -384,23 +423,14 @@ def collate(*iterables, **kwargs): If the elements of the passed-in iterables are out of order, you might get unexpected results. - On Python 2.7, this function delegates to :func:`heapq.merge` if neither - of the keyword arguments are specified. On Python 3.5+, this function - is an alias for :func:`heapq.merge`. + On Python 3.5+, this function is an alias for :func:`heapq.merge`. """ - if not kwargs: - return merge(*iterables) - - return _collate(*iterables, **kwargs) - - -# If using Python version 3.5 or greater, heapq.merge() will be faster than -# collate - use that instead. -if version_info >= (3, 5, 0): - _collate_docstring = collate.__doc__ - collate = partial(merge) - collate.__doc__ = _collate_docstring + warnings.warn( + "collate is no longer part of more_itertools, use heapq.merge", + DeprecationWarning, + ) + return merge(*iterables, **kwargs) def consumer(func): @@ -425,11 +455,13 @@ def consumer(func): ``t.send()`` could be used. """ + @wraps(func) def wrapper(*args, **kwargs): gen = func(*args, **kwargs) next(gen) return gen + return wrapper @@ -453,9 +485,9 @@ def ilen(iterable): def iterate(func, start): """Return ``start``, ``func(start)``, ``func(func(start))``, ... - >>> from itertools import islice - >>> list(islice(iterate(lambda x: 2*x, 1), 10)) - [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] + >>> from itertools import islice + >>> list(islice(iterate(lambda x: 2*x, 1), 10)) + [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] """ while True: @@ -475,8 +507,7 @@ def with_iter(context_manager): """ with context_manager as iterable: - for item in iterable: - yield item + yield from iterable def one(iterable, too_short=None, too_long=None): @@ -510,7 +541,8 @@ def one(iterable, too_short=None, too_long=None): >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... - ValueError: too many items in iterable (expected 1)' + ValueError: Expected exactly one item in iterable, but got 'too', + 'many', and perhaps more. >>> too_long = RuntimeError >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): @@ -518,29 +550,34 @@ def one(iterable, too_short=None, too_long=None): RuntimeError Note that :func:`one` attempts to advance *iterable* twice to ensure there - is only one item. If there is more than one, both items will be discarded. - See :func:`spy` or :func:`peekable` to check iterable contents less - destructively. + is only one item. See :func:`spy` or :func:`peekable` to check iterable + contents less destructively. """ it = iter(iterable) try: - value = next(it) - except StopIteration: - raise too_short or ValueError('too few items in iterable (expected 1)') + first_value = next(it) + except StopIteration as e: + raise ( + too_short or ValueError('too few items in iterable (expected 1)') + ) from e try: - next(it) + second_value = next(it) except StopIteration: pass else: - raise too_long or ValueError('too many items in iterable (expected 1)') + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) - return value + return first_value -def distinct_permutations(iterable): +def distinct_permutations(iterable, r=None): """Yield successive distinct permutations of the elements in *iterable*. >>> sorted(distinct_permutations([1, 0, 1])) @@ -556,34 +593,88 @@ def distinct_permutations(iterable): items input, and each `x_i` is the count of a distinct item in the input sequence. + If *r* is given, only the *r*-length permutations are yielded. + + >>> sorted(distinct_permutations([1, 0, 1], r=2)) + [(0, 1), (1, 0), (1, 1)] + >>> sorted(distinct_permutations(range(3), r=2)) + [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + """ - def perm_unique_helper(item_counts, perm, i): - """Internal helper function + # Algorithm: https://w.wiki/Qai + def _full(A): + while True: + # Yield the permutation we have + yield tuple(A) - :arg item_counts: Stores the unique items in ``iterable`` and how many - times they are repeated - :arg perm: The permutation that is being built for output - :arg i: The index of the permutation being modified + # Find the largest index i such that A[i] < A[i + 1] + for i in range(size - 2, -1, -1): + if A[i] < A[i + 1]: + break + # If no such index exists, this permutation is the last one + else: + return - The output permutations are built up recursively; the distinct items - are placed until their repetitions are exhausted. - """ - if i < 0: - yield tuple(perm) - else: - for item in item_counts: - if item_counts[item] <= 0: - continue - perm[i] = item - item_counts[item] -= 1 - for x in perm_unique_helper(item_counts, perm, i - 1): - yield x - item_counts[item] += 1 + # Find the largest index j greater than j such that A[i] < A[j] + for j in range(size - 1, i, -1): + if A[i] < A[j]: + break - item_counts = Counter(iterable) - length = sum(item_counts.values()) + # Swap the value of A[i] with that of A[j], then reverse the + # sequence from A[i + 1] to form the new permutation + A[i], A[j] = A[j], A[i] + A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] - return perm_unique_helper(item_counts, [None] * length, length - 1) + # Algorithm: modified from the above + def _partial(A, r): + # Split A into the first r items and the last r items + head, tail = A[:r], A[r:] + right_head_indexes = range(r - 1, -1, -1) + left_tail_indexes = range(len(tail)) + + while True: + # Yield the permutation we have + yield tuple(head) + + # Starting from the right, find the first index of the head with + # value smaller than the maximum value of the tail - call it i. + pivot = tail[-1] + for i in right_head_indexes: + if head[i] < pivot: + break + pivot = head[i] + else: + return + + # Starting from the left, find the first value of the tail + # with a value greater than head[i] and swap. + for j in left_tail_indexes: + if tail[j] > head[i]: + head[i], tail[j] = tail[j], head[i] + break + # If we didn't find one, start from the right and find the first + # index of the head with a value greater than head[i] and swap. + else: + for j in right_head_indexes: + if head[j] > head[i]: + head[i], head[j] = head[j], head[i] + break + + # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] + tail += head[: i - r : -1] # head[i + 1:][::-1] + i += 1 + head[i:], tail[:] = tail[: r - i], tail[r - i :] + + items = sorted(iterable) + + size = len(items) + if r is None: + r = size + + if 0 < r <= size: + return _full(items) if (r == size) else _partial(items, r) + + return iter(() if r else ((),)) def intersperse(e, iterable, n=1): @@ -653,7 +744,7 @@ def windowed(seq, n, fillvalue=None, step=1): [(1, 2, 3), (2, 3, 4), (3, 4, 5)] When the window is larger than the iterable, *fillvalue* is used in place - of missing values:: + of missing values: >>> list(windowed([1, 2, 3], 4)) [(1, 2, 3, None)] @@ -663,6 +754,14 @@ def windowed(seq, n, fillvalue=None, step=1): >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) [(1, 2, 3), (3, 4, 5), (5, 6, '!')] + To slide into the iterable's items, use :func:`chain` to add filler items + to the left: + + >>> iterable = [1, 2, 3, 4] + >>> n = 3 + >>> padding = [None] * (n - 1) + >>> list(windowed(chain(padding, iterable), 3)) + [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] """ if n < 0: raise ValueError('n must be >= 0') @@ -672,32 +771,23 @@ def windowed(seq, n, fillvalue=None, step=1): if step < 1: raise ValueError('step must be >= 1') - it = iter(seq) - window = deque([], n) - append = window.append - - # Initial deque fill - for _ in range(n): - append(next(it, fillvalue)) - yield tuple(window) - - # Appending new items to the right causes old items to fall off the left - i = 0 - for item in it: - append(item) - i = (i + 1) % step - if i % step == 0: + window = deque(maxlen=n) + i = n + for _ in map(window.append, seq): + i -= 1 + if not i: + i = step yield tuple(window) - # If there are items from the iterable in the window, pad with the given - # value and emit them. - if (i % step) and (step - i < n): - for _ in range(step - i): - append(fillvalue) + size = len(window) + if size < n: + yield tuple(chain(window, repeat(fillvalue, n - size))) + elif 0 < i < min(step, n): + window += (fillvalue,) * i yield tuple(window) -def substrings(iterable, join_func=None): +def substrings(iterable): """Yield all of the substrings of *iterable*. >>> [''.join(s) for s in substrings('more')] @@ -720,15 +810,51 @@ def substrings(iterable, join_func=None): # And the rest for n in range(2, item_count + 1): for i in range(item_count - n + 1): - yield seq[i:i + n] + yield seq[i : i + n] -class bucket(object): +def substrings_indexes(seq, reverse=False): + """Yield all substrings and their positions in *seq* + + The items yielded will be a tuple of the form ``(substr, i, j)``, where + ``substr == seq[i:j]``. + + This function only works for iterables that support slicing, such as + ``str`` objects. + + >>> for item in substrings_indexes('more'): + ... print(item) + ('m', 0, 1) + ('o', 1, 2) + ('r', 2, 3) + ('e', 3, 4) + ('mo', 0, 2) + ('or', 1, 3) + ('re', 2, 4) + ('mor', 0, 3) + ('ore', 1, 4) + ('more', 0, 4) + + Set *reverse* to ``True`` to yield the same items in the opposite order. + + + """ + r = range(1, len(seq) + 1) + if reverse: + r = reversed(r) + return ( + (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) + ) + + +class bucket: """Wrap *iterable* and return an object that buckets it iterable into child iterables based on a *key* function. >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] - >>> s = bucket(iterable, key=lambda x: x[0]) + >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character + >>> sorted(list(s)) # Get the keys + ['a', 'b', 'c'] >>> a_iterable = s['a'] >>> next(a_iterable) 'a1' @@ -756,6 +882,7 @@ class bucket(object): [] """ + def __init__(self, iterable, key, validator=None): self._it = iter(iterable) self._key = key @@ -801,6 +928,14 @@ class bucket(object): elif self._validator(item_value): self._cache[item_value].append(item) + def __iter__(self): + for item in self._it: + item_value = self._key(item) + if self._validator(item_value): + self._cache[item_value].append(item) + + yield from self._cache.keys() + def __getitem__(self, value): if not self._validator(value): return iter(()) @@ -848,7 +983,7 @@ def spy(iterable, n=1): it = iter(iterable) head = take(n, it) - return head, chain(head, it) + return head.copy(), chain(head, it) def interleave(*iterables): @@ -889,7 +1024,9 @@ def collapse(iterable, base_type=None, levels=None): >>> list(collapse(iterable)) [1, 2, 3, 4, 5, 6] - String types are not considered iterable and will not be collapsed. + Binary and text strings are not considered iterable and + will not be collapsed. + To avoid collapsing other types, specify *base_type*: >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] @@ -905,11 +1042,12 @@ def collapse(iterable, base_type=None, levels=None): ['a', ['b'], 'c', ['d']] """ + def walk(node, level): if ( - ((levels is not None) and (level > levels)) or - isinstance(node, string_types) or - ((base_type is not None) and isinstance(node, base_type)) + ((levels is not None) and (level > levels)) + or isinstance(node, (str, bytes)) + or ((base_type is not None) and isinstance(node, base_type)) ): yield node return @@ -921,11 +1059,9 @@ def collapse(iterable, base_type=None, levels=None): return else: for child in tree: - for x in walk(child, level + 1): - yield x + yield from walk(child, level + 1) - for x in walk(iterable, 0): - yield x + yield from walk(iterable, 0) def side_effect(func, iterable, chunk_size=None, before=None, after=None): @@ -983,56 +1119,93 @@ def side_effect(func, iterable, chunk_size=None, before=None, after=None): else: for chunk in chunked(iterable, chunk_size): func(chunk) - for item in chunk: - yield item + yield from chunk finally: if after is not None: after() -def sliced(seq, n): +def sliced(seq, n, strict=False): """Yield slices of length *n* from the sequence *seq*. - >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) - [(1, 2, 3), (4, 5, 6)] + >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) + [(1, 2, 3), (4, 5, 6)] - If the length of the sequence is not divisible by the requested slice - length, the last slice will be shorter. + By the default, the last yielded slice will have fewer than *n* elements + if the length of *seq* is not divisible by *n*: - >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) - [(1, 2, 3), (4, 5, 6), (7, 8)] + >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) + [(1, 2, 3), (4, 5, 6), (7, 8)] + + If the length of *seq* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + slice is yielded. This function will only work for iterables that support slicing. For non-sliceable iterables, see :func:`chunked`. """ - return takewhile(bool, (seq[i: i + n] for i in count(0, n))) + iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) + if strict: + + def ret(): + for _slice in iterator: + if len(_slice) != n: + raise ValueError("seq is not divisible by n.") + yield _slice + + return iter(ret()) + else: + return iterator -def split_at(iterable, pred): +def split_at(iterable, pred, maxsplit=-1, keep_separator=False): """Yield lists of items from *iterable*, where each list is delimited by - an item where callable *pred* returns ``True``. The lists do not include - the delimiting items. + an item where callable *pred* returns ``True``. >>> list(split_at('abcdcba', lambda x: x == 'b')) [['a'], ['c', 'd', 'c'], ['a']] >>> list(split_at(range(10), lambda n: n % 2 == 1)) [[0], [2], [4], [6], [8], []] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) + [[0], [2], [4, 5, 6, 7, 8, 9]] + + By default, the delimiting items are not included in the output. + The include them, set *keep_separator* to ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) + [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] + """ + if maxsplit == 0: + yield list(iterable) + return + buf = [] - for item in iterable: + it = iter(iterable) + for item in it: if pred(item): yield buf + if keep_separator: + yield [item] + if maxsplit == 1: + yield list(it) + return buf = [] + maxsplit -= 1 else: buf.append(item) yield buf -def split_before(iterable, pred): - """Yield lists of items from *iterable*, where each list starts with an - item where callable *pred* returns ``True``: +def split_before(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends just before + an item for which callable *pred* returns ``True``: >>> list(split_before('OneTwo', lambda s: s.isupper())) [['O', 'n', 'e'], ['T', 'w', 'o']] @@ -1040,17 +1213,32 @@ def split_before(iterable, pred): >>> list(split_before(range(10), lambda n: n % 3 == 0)) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] """ + if maxsplit == 0: + yield list(iterable) + return + buf = [] - for item in iterable: + it = iter(iterable) + for item in it: if pred(item) and buf: yield buf + if maxsplit == 1: + yield [item] + list(it) + return buf = [] + maxsplit -= 1 buf.append(item) - yield buf + if buf: + yield buf -def split_after(iterable, pred): +def split_after(iterable, pred, maxsplit=-1): """Yield lists of items from *iterable*, where each list ends with an item where callable *pred* returns ``True``: @@ -1060,17 +1248,77 @@ def split_after(iterable, pred): >>> list(split_after(range(10), lambda n: n % 3 == 0)) [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] + """ + if maxsplit == 0: + yield list(iterable) + return + buf = [] - for item in iterable: + it = iter(iterable) + for item in it: buf.append(item) if pred(item) and buf: yield buf + if maxsplit == 1: + yield list(it) + return buf = [] + maxsplit -= 1 if buf: yield buf +def split_when(iterable, pred, maxsplit=-1): + """Split *iterable* into pieces based on the output of *pred*. + *pred* should be a function that takes successive pairs of items and + returns ``True`` if the iterable should be split in between them. + + For example, to find runs of increasing numbers, split the iterable when + element ``i`` is larger than element ``i + 1``: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) + [[1, 2, 3, 3], [2, 5], [2, 4], [2]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], + ... lambda x, y: x > y, maxsplit=2)) + [[1, 2, 3, 3], [2, 5], [2, 4, 2]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + it = iter(iterable) + try: + cur_item = next(it) + except StopIteration: + return + + buf = [cur_item] + for next_item in it: + if pred(cur_item, next_item): + yield buf + if maxsplit == 1: + yield [next_item] + list(it) + return + buf = [] + maxsplit -= 1 + + buf.append(next_item) + cur_item = next_item + + yield buf + + def split_into(iterable, sizes): """Yield a list of sequential items from *iterable* of length 'n' for each integer 'n' in *sizes*. @@ -1134,8 +1382,7 @@ def padded(iterable, fillvalue=None, n=None, next_multiple=False): """ it = iter(iterable) if n is None: - for item in chain(it, repeat(fillvalue)): - yield item + yield from chain(it, repeat(fillvalue)) elif n < 1: raise ValueError('n must be at least 1') else: @@ -1149,6 +1396,25 @@ def padded(iterable, fillvalue=None, n=None, next_multiple=False): yield fillvalue +def repeat_last(iterable, default=None): + """After the *iterable* is exhausted, keep yielding its last element. + + >>> list(islice(repeat_last(range(3)), 5)) + [0, 1, 2, 2, 2] + + If the iterable is empty, yield *default* forever:: + + >>> list(islice(repeat_last(range(0), 42), 5)) + [42, 42, 42, 42, 42] + + """ + item = _marker + for item in iterable: + yield item + final = default if item is _marker else item + yield from repeat(final) + + def distribute(n, iterable): """Distribute the items from *iterable* among *n* smaller iterables. @@ -1212,7 +1478,72 @@ def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): ) -def zip_offset(*iterables, **kwargs): +class UnequalIterablesError(ValueError): + def __init__(self, details=None): + msg = 'Iterables have different lengths' + if details is not None: + msg += (': index 0 has length {}; index {} has length {}').format( + *details + ) + + super().__init__(msg) + + +def _zip_equal_generator(iterables): + for combo in zip_longest(*iterables, fillvalue=_marker): + for val in combo: + if val is _marker: + raise UnequalIterablesError() + yield combo + + +def zip_equal(*iterables): + """``zip`` the input *iterables* together, but raise + ``UnequalIterablesError`` if they aren't all the same length. + + >>> it_1 = range(3) + >>> it_2 = iter('abc') + >>> list(zip_equal(it_1, it_2)) + [(0, 'a'), (1, 'b'), (2, 'c')] + + >>> it_1 = range(3) + >>> it_2 = iter('abcd') + >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + more_itertools.more.UnequalIterablesError: Iterables have different + lengths + + """ + if hexversion >= 0x30A00A6: + warnings.warn( + ( + 'zip_equal will be removed in a future version of ' + 'more-itertools. Use the builtin zip function with ' + 'strict=True instead.' + ), + DeprecationWarning, + ) + # Check whether the iterables are all the same size. + try: + first_size = len(iterables[0]) + for i, it in enumerate(iterables[1:], 1): + size = len(it) + if size != first_size: + break + else: + # If we didn't break out, we can use the built-in zip. + return zip(*iterables) + + # If we did break out, there was a mismatch. + raise UnequalIterablesError(details=(first_size, i, size)) + # If any one of the iterables didn't have a length, start reading + # them until one runs out. + except TypeError: + return _zip_equal_generator(iterables) + + +def zip_offset(*iterables, offsets, longest=False, fillvalue=None): """``zip`` the input *iterables* together, but offset the `i`-th iterable by the `i`-th item in *offsets*. @@ -1233,10 +1564,6 @@ def zip_offset(*iterables, **kwargs): sequence. Specify *fillvalue* to use some other value. """ - offsets = kwargs['offsets'] - longest = kwargs.get('longest', False) - fillvalue = kwargs.get('fillvalue', None) - if len(iterables) != len(offsets): raise ValueError("Number of iterables and offsets didn't match") @@ -1255,7 +1582,7 @@ def zip_offset(*iterables, **kwargs): return zip(*staggered) -def sort_together(iterables, key_list=(0,), reverse=False): +def sort_together(iterables, key_list=(0,), key=None, reverse=False): """Return the input iterables sorted together, with *key_list* as the priority for sorting. All iterables are trimmed to the length of the shortest one. @@ -1277,15 +1604,48 @@ def sort_together(iterables, key_list=(0,), reverse=False): >>> sort_together(iterables, key_list=(1, 2)) [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + Set *reverse* to ``True`` to sort in descending order. >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) [(3, 2, 1), ('a', 'b', 'c')] """ - return list(zip(*sorted(zip(*iterables), - key=itemgetter(*key_list), - reverse=reverse))) + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) def unzip(iterable): @@ -1331,6 +1691,7 @@ def unzip(iterable): # which just stops the unzipped iterables # at first length mismatch raise StopIteration + return getter return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) @@ -1368,19 +1729,26 @@ def divide(n, iterable): if n < 1: raise ValueError('n must be at least 1') - seq = tuple(iterable) + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + q, r = divmod(len(seq), n) ret = [] - for i in range(n): - start = (i * q) + (i if i < r else r) - stop = ((i + 1) * q) + (i + 1 if i + 1 < r else r) + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q ret.append(iter(seq[start:stop])) return ret -def always_iterable(obj, base_type=(text_type, binary_type)): +def always_iterable(obj, base_type=(str, bytes)): """If *obj* is iterable, return an iterator over its items:: >>> obj = (1, 2, 3) @@ -1472,21 +1840,23 @@ def adjacent(predicate, iterable, distance=1): return zip(adjacent_to_selected, i2) -def groupby_transform(iterable, keyfunc=None, valuefunc=None): - """An extension of :func:`itertools.groupby` that transforms the values of - *iterable* after grouping them. - *keyfunc* is a function used to compute a grouping key for each item. - *valuefunc* is a function for transforming the items after grouping. +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. - >>> iterable = 'AaaABbBCcA' - >>> keyfunc = lambda x: x.upper() - >>> valuefunc = lambda x: x.lower() - >>> grouper = groupby_transform(iterable, keyfunc, valuefunc) - >>> [(k, ''.join(g)) for k, g in grouper] - [('A', 'aaaa'), ('B', 'bbb'), ('C', 'cc'), ('A', 'a')] + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items - *keyfunc* and *valuefunc* default to identity functions if they are not - specified. + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. :func:`groupby_transform` is useful when grouping elements of an iterable using a separate iterable as the key. To do this, :func:`zip` the iterables @@ -1506,11 +1876,16 @@ def groupby_transform(iterable, keyfunc=None, valuefunc=None): duplicate groups, you should sort the iterable by the key function. """ - valuefunc = (lambda x: x) if valuefunc is None else valuefunc - return ((k, map(valuefunc, g)) for k, g in groupby(iterable, keyfunc)) + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret -def numeric_range(*args): +class numeric_range(abc.Sequence, abc.Hashable): """An extension of the built-in ``range()`` function whose arguments can be any orderable numeric type. @@ -1547,28 +1922,184 @@ def numeric_range(*args): Be aware of the limitations of floating point numbers; the representation of the yielded numbers may be surprising. - """ - argc = len(args) - if argc == 1: - stop, = args - start = type(stop)(0) - step = 1 - elif argc == 2: - start, stop = args - step = 1 - elif argc == 3: - start, stop, step = args - else: - err_msg = 'numeric_range takes at most 3 arguments, got {}' - raise TypeError(err_msg.format(argc)) + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: - values = (start + (step * n) for n in count()) - if step > 0: - return takewhile(partial(gt, stop), values) - elif step < 0: - return takewhile(partial(lt, stop), values) - else: - raise ValueError('numeric_range arg 3 must not be zero') + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step def count_cycle(iterable, n=None): @@ -1587,6 +2118,43 @@ def count_cycle(iterable, n=None): return ((i, item) for i in counter for item in iterable) +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + def locate(iterable, pred=bool, window_size=None): """Yield the index of each item in *iterable* for which *pred* returns ``True``. @@ -1669,13 +2237,13 @@ def rstrip(iterable, pred): """ cache = [] cache_append = cache.append + cache_clear = cache.clear for x in iterable: if pred(x): cache_append(x) else: - for y in cache: - yield y - del cache[:] + yield from cache + cache_clear() yield x @@ -1696,7 +2264,7 @@ def strip(iterable, pred): return rstrip(lstrip(iterable, pred), pred) -def islice_extended(iterable, *args): +class islice_extended: """An extension of :func:`itertools.islice` that supports negative values for *stop*, *start*, and *step*. @@ -1713,20 +2281,46 @@ def islice_extended(iterable, *args): >>> list(islice_extended(count(), 110, 99, -2)) [110, 108, 106, 104, 102, 100] + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + """ - s = slice(*args) + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): start = s.start stop = s.stop if s.step == 0: raise ValueError('step argument must be a non-zero integer or None.') step = s.step or 1 - it = iter(iterable) - if step > 0: start = 0 if (start is None) else start - if (start < 0): + if start < 0: # Consume all but the last -start items cache = deque(enumerate(it, 1), maxlen=-start) len_iter = cache[-1][0] if cache else 0 @@ -1764,8 +2358,7 @@ def islice_extended(iterable, *args): cache.append(item) else: # When both start and stop are positive we have the normal case - for item in islice(it, start, stop, step): - yield item + yield from islice(it, start, stop, step) else: start = -1 if (start is None) else start @@ -1810,8 +2403,7 @@ def islice_extended(iterable, *args): cache = list(islice(it, n)) - for item in cache[i::step]: - yield item + yield from cache[i::step] def always_reversible(iterable): @@ -1862,6 +2454,17 @@ def consecutive_groups(iterable, ordering=lambda x: x): ['i'] ['l', 'm', 'n', 'o', 'p'] + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + """ for k, g in groupby( enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) @@ -1869,42 +2472,46 @@ def consecutive_groups(iterable, ordering=lambda x: x): yield map(itemgetter(1), g) -def difference(iterable, func=sub): - """By default, compute the first difference of *iterable* using - :func:`operator.sub`. +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: - >>> iterable = [0, 1, 3, 6, 10] + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 >>> list(difference(iterable)) [0, 1, 2, 3, 4] - This is the opposite of :func:`accumulate`'s default behavior: - - >>> from more_itertools import accumulate - >>> iterable = [0, 1, 2, 3, 4] - >>> list(accumulate(iterable)) - [0, 1, 3, 6, 10] - >>> list(difference(accumulate(iterable))) - [0, 1, 2, 3, 4] - - By default *func* is :func:`operator.sub`, but other functions can be + *func* defaults to :func:`operator.sub`, but other functions can be specified. They will be applied as follows:: A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... For example, to do progressive division: - >>> iterable = [1, 2, 6, 24, 120] # Factorial sequence + >>> iterable = [1, 2, 6, 24, 120] >>> func = lambda x, y: x // y >>> list(difference(iterable, func)) [1, 2, 3, 4, 5] + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + """ a, b = tee(iterable) try: - item = next(b) + first = [next(b)] except StopIteration: return iter([]) - return chain([item], map(lambda x: func(x[1], x[0]), zip(a, b))) + + if initial is not None: + first = [] + + return chain(first, starmap(func, zip(b, a))) class SequenceView(Sequence): @@ -1936,6 +2543,7 @@ class SequenceView(Sequence): require (much) extra storage. """ + def __init__(self, target): if not isinstance(target, Sequence): raise TypeError @@ -1951,7 +2559,7 @@ class SequenceView(Sequence): return '{}({})'.format(self.__class__.__name__, repr(self._target)) -class seekable(object): +class seekable: """Wrap an iterator to allow for seeking backward and forward. This progressively caches the items in the source iterable so they can be re-visited. @@ -1984,8 +2592,26 @@ class seekable(object): >>> next(it), next(it), next(it) ('0', '1', '2') - The cache grows as the source iterable progresses, so beware of wrapping - very large or infinite iterables. + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False You may view the contents of the cache with the :meth:`elements` method. That returns a :class:`SequenceView`, a view that updates automatically: @@ -2001,11 +2627,30 @@ class seekable(object): >>> elements SequenceView(['0', '1', '2', '3']) + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + """ - def __init__(self, iterable): + def __init__(self, iterable, maxlen=None): self._source = iter(iterable) - self._cache = [] + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) self._index = None def __iter__(self): @@ -2025,7 +2670,24 @@ class seekable(object): self._cache.append(item) return item - next = __next__ + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked def elements(self): return SequenceView(self._cache) @@ -2037,7 +2699,7 @@ class seekable(object): consume(self, remainder) -class run_length(object): +class run_length: """ :func:`run_length.encode` compresses an iterable with run-length encoding. It yields groups of repeated items with the count of how many times they @@ -2087,8 +2749,8 @@ def exactly_n(iterable, n, predicate=bool): def circular_shifts(iterable): """Return a list of circular shifts of *iterable*. - >>> circular_shifts(range(4)) - [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] """ lst = list(iterable) return take(len(lst), windowed(cycle(lst), len(lst))) @@ -2262,9 +2924,7 @@ def rlocate(iterable, pred=bool, window_size=None): if window_size is None: try: len_iter = len(iterable) - return ( - len_iter - i - 1 for i in locate(reversed(iterable), pred) - ) + return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) except TypeError: pass @@ -2322,8 +2982,7 @@ def replace(iterable, pred, substitutes, count=None, window_size=1): if pred(*w): if (count is None) or (n < count): n += 1 - for s in substitutes: - yield s + yield from substitutes consume(windows, window_size - 1) continue @@ -2331,3 +2990,835 @@ def replace(iterable, pred, substitutes, count=None, window_size=1): # yield the first item from the window. if w and (w[0] is not _marker): yield w[0] + + +def partitions(iterable): + """Yield all possible order-preserving partitions of *iterable*. + + >>> iterable = 'abc' + >>> for part in partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['a', 'b', 'c'] + + This is unrelated to :func:`partition`. + + """ + sequence = list(iterable) + n = len(sequence) + for i in powerset(range(1, n)): + yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] + + +def set_partitions(iterable, k=None): + """ + Yield the set partitions of *iterable* into *k* parts. Set partitions are + not order-preserving. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable, 2): + ... print([''.join(p) for p in part]) + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + + + If *k* is not given, every set partition is generated. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + ['a', 'b', 'c'] + + """ + L = list(iterable) + n = len(L) + if k is not None: + if k < 1: + raise ValueError( + "Can't partition in a negative or zero number of groups" + ) + elif k > n: + return + + def set_partitions_helper(L, k): + n = len(L) + if k == 1: + yield [L] + elif n == k: + yield [[s] for s in L] + else: + e, *M = L + for p in set_partitions_helper(M, k - 1): + yield [[e], *p] + for p in set_partitions_helper(M, k): + for i in range(len(p)): + yield p[:i] + [[e] + p[i]] + p[i + 1 :] + + if k is None: + for k in range(1, n + 1): + yield from set_partitions_helper(L, k) + else: + yield from set_partitions_helper(L, k) + + +class time_limited: + """ + Yield items from *iterable* until *limit_seconds* have passed. + If the time limit expires before all items have been yielded, the + ``timed_out`` parameter will be set to ``True``. + + >>> from time import sleep + >>> def generator(): + ... yield 1 + ... yield 2 + ... sleep(0.2) + ... yield 3 + >>> iterable = time_limited(0.1, generator()) + >>> list(iterable) + [1, 2] + >>> iterable.timed_out + True + + Note that the time is checked before each item is yielded, and iteration + stops if the time elapsed is greater than *limit_seconds*. If your time + limit is 1 second, but it takes 2 seconds to generate the first item from + the iterable, the function will run for 2 seconds and not yield anything. + + """ + + def __init__(self, limit_seconds, iterable): + if limit_seconds < 0: + raise ValueError('limit_seconds must be positive') + self.limit_seconds = limit_seconds + self._iterable = iter(iterable) + self._start_time = monotonic() + self.timed_out = False + + def __iter__(self): + return self + + def __next__(self): + item = next(self._iterable) + if monotonic() - self._start_time > self.limit_seconds: + self.timed_out = True + raise StopIteration + + return item + + +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def ichunked(iterable, n): + """Break *iterable* into sub-iterables with *n* elements each. + :func:`ichunked` is like :func:`chunked`, but it yields iterables + instead of lists. + + If the sub-iterables are read in order, the elements of *iterable* + won't be stored in memory. + If they are read out of order, :func:`itertools.tee` is used to cache + elements as necessary. + + >>> from itertools import count + >>> all_chunks = ichunked(count(), 4) + >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) + >>> list(c_2) # c_1's elements have been cached; c_3's haven't been + [4, 5, 6, 7] + >>> list(c_1) + [0, 1, 2, 3] + >>> list(c_3) + [8, 9, 10, 11] + + """ + source = iter(iterable) + + while True: + # Check to see whether we're at the end of the source iterable + item = next(source, _marker) + if item is _marker: + return + + # Clone the source and yield an n-length slice + source, it = tee(chain([item], source)) + yield islice(it, n) + + # Advance the source iterable + consume(source, n) + + +def distinct_combinations(iterable, r): + """Yield the distinct combinations of *r* items taken from *iterable*. + + >>> list(distinct_combinations([0, 0, 1], 2)) + [(0, 0), (0, 1)] + + Equivalent to ``set(combinations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + """ + if r < 0: + raise ValueError('r must be non-negative') + elif r == 0: + yield () + return + pool = tuple(iterable) + generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] + current_combo = [None] * r + level = 0 + while generators: + try: + cur_idx, p = next(generators[-1]) + except StopIteration: + generators.pop() + level -= 1 + continue + current_combo[level] = p + if level + 1 == r: + yield tuple(current_combo) + else: + generators.append( + unique_everseen( + enumerate(pool[cur_idx + 1 :], cur_idx + 1), + key=itemgetter(1), + ) + ) + level += 1 + + +def filter_except(validator, iterable, *exceptions): + """Yield the items from *iterable* for which the *validator* function does + not raise one of the specified *exceptions*. + + *validator* is called for each item in *iterable*. + It should be a function that accepts one argument and raises an exception + if that item is not valid. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(filter_except(int, iterable, ValueError, TypeError)) + ['1', '2', '4'] + + If an exception other than one given by *exceptions* is raised by + *validator*, it is raised like normal. + """ + for item in iterable: + try: + validator(item) + except exceptions: + pass + else: + yield item + + +def map_except(function, iterable, *exceptions): + """Transform each item from *iterable* with *function* and yield the + result, unless *function* raises one of the specified *exceptions*. + + *function* is called to transform each item in *iterable*. + It should be a accept one argument. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(map_except(int, iterable, ValueError, TypeError)) + [1, 2, 4] + + If an exception other than one given by *exceptions* is raised by + *function*, it is raised like normal. + """ + for item in iterable: + try: + yield function(item) + except exceptions: + pass + + +def _sample_unweighted(iterable, k): + # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: + # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". + + # Fill up the reservoir (collection of samples) with the first `k` samples + reservoir = take(k, iterable) + + # Generate random number that's the largest in a sample of k U(0,1) numbers + # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic + W = exp(log(random()) / k) + + # The number of elements to skip before changing the reservoir is a random + # number with a geometric distribution. Sample it using random() and logs. + next_index = k + floor(log(random()) / log(1 - W)) + + for index, element in enumerate(iterable, k): + + if index == next_index: + reservoir[randrange(k)] = element + # The new W is the largest in a sample of k U(0, `old_W`) numbers + W *= exp(log(random()) / k) + next_index += floor(log(random()) / log(1 - W)) + 1 + + return reservoir + + +def _sample_weighted(iterable, k, weights): + # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : + # "Weighted random sampling with a reservoir". + + # Log-transform for numerical stability for weights that are small/large + weight_keys = (log(random()) / weight for weight in weights) + + # Fill up the reservoir (collection of samples) with the first `k` + # weight-keys and elements, then heapify the list. + reservoir = take(k, zip(weight_keys, iterable)) + heapify(reservoir) + + # The number of jumps before changing the reservoir is a random variable + # with an exponential distribution. Sample it using random() and logs. + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + + for weight, element in zip(weights, iterable): + if weight >= weights_to_skip: + # The notation here is consistent with the paper, but we store + # the weight-keys in log-space for better numerical stability. + smallest_weight_key, _ = reservoir[0] + t_w = exp(weight * smallest_weight_key) + r_2 = uniform(t_w, 1) # generate U(t_w, 1) + weight_key = log(r_2) / weight + heapreplace(reservoir, (weight_key, element)) + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + else: + weights_to_skip -= weight + + # Equivalent to [element for weight_key, element in sorted(reservoir)] + return [heappop(reservoir)[1] for _ in range(k)] + + +def sample(iterable, k, weights=None): + """Return a *k*-length list of elements chosen (without replacement) + from the *iterable*. Like :func:`random.sample`, but works on iterables + of unknown length. + + >>> iterable = range(100) + >>> sample(iterable, 5) # doctest: +SKIP + [81, 60, 96, 16, 4] + + An iterable with *weights* may also be given: + + >>> iterable = range(100) + >>> weights = (i * i + 1 for i in range(100)) + >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP + [79, 67, 74, 66, 78] + + The algorithm can also be used to generate weighted random permutations. + The relative weight of each item determines the probability that it + appears late in the permutation. + + >>> data = "abcdefgh" + >>> weights = range(1, len(data) + 1) + >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP + ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] + """ + if k == 0: + return [] + + iterable = iter(iterable) + if weights is None: + return _sample_unweighted(iterable, k) + else: + weights = iter(weights) + return _sample_weighted(iterable, k, weights) + + +def is_sorted(iterable, key=None, reverse=False): + """Returns ``True`` if the items of iterable are in sorted order, and + ``False`` otherwise. *key* and *reverse* have the same meaning that they do + in the built-in :func:`sorted` function. + + >>> is_sorted(['1', '2', '3', '4', '5'], key=int) + True + >>> is_sorted([5, 4, 3, 1, 2], reverse=True) + False + + The function returns ``False`` after encountering the first out-of-order + item. If there are no out-of-order items, the iterable is exhausted. + """ + + compare = lt if reverse else gt + it = iterable if (key is None) else map(key, iterable) + return not any(starmap(compare, pairwise(it))) + + +class AbortThread(BaseException): + pass + + +class callback_iter: + """Convert a function that uses callbacks to an iterator. + + Let *func* be a function that takes a `callback` keyword argument. + For example: + + >>> def func(callback=None): + ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: + ... if callback: + ... callback(i, c) + ... return 4 + + + Use ``with callback_iter(func)`` to get an iterator over the parameters + that are delivered to the callback. + + >>> with callback_iter(func) as it: + ... for args, kwargs in it: + ... print(args) + (1, 'a') + (2, 'b') + (3, 'c') + + The function will be called in a background thread. The ``done`` property + indicates whether it has completed execution. + + >>> it.done + True + + If it completes successfully, its return value will be available + in the ``result`` property. + + >>> it.result + 4 + + Notes: + + * If the function uses some keyword argument besides ``callback``, supply + *callback_kwd*. + * If it finished executing, but raised an exception, accessing the + ``result`` property will raise the same exception. + * If it hasn't finished executing, accessing the ``result`` + property from within the ``with`` block will raise ``RuntimeError``. + * If it hasn't finished executing, accessing the ``result`` property from + outside the ``with`` block will raise a + ``more_itertools.AbortThread`` exception. + * Provide *wait_seconds* to adjust how frequently the it is polled for + output. + + """ + + def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): + self._func = func + self._callback_kwd = callback_kwd + self._aborted = False + self._future = None + self._wait_seconds = wait_seconds + self._executor = __import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1) + self._iterator = self._reader() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._aborted = True + self._executor.shutdown() + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterator) + + @property + def done(self): + if self._future is None: + return False + return self._future.done() + + @property + def result(self): + if not self.done: + raise RuntimeError('Function has not yet completed') + + return self._future.result() + + def _reader(self): + q = Queue() + + def callback(*args, **kwargs): + if self._aborted: + raise AbortThread('canceled by user') + + q.put((args, kwargs)) + + self._future = self._executor.submit( + self._func, **{self._callback_kwd: callback} + ) + + while True: + try: + item = q.get(timeout=self._wait_seconds) + except Empty: + pass + else: + q.task_done() + yield item + + if self._future.done(): + break + + remaining = [] + while True: + try: + item = q.get_nowait() + except Empty: + break + else: + q.task_done() + remaining.append(item) + q.join() + yield from remaining + + +def windowed_complete(iterable, n): + """ + Yield ``(beginning, middle, end)`` tuples, where: + + * Each ``middle`` has *n* items from *iterable* + * Each ``beginning`` has the items before the ones in ``middle`` + * Each ``end`` has the items after the ones in ``middle`` + + >>> iterable = range(7) + >>> n = 3 + >>> for beginning, middle, end in windowed_complete(iterable, n): + ... print(beginning, middle, end) + () (0, 1, 2) (3, 4, 5, 6) + (0,) (1, 2, 3) (4, 5, 6) + (0, 1) (2, 3, 4) (5, 6) + (0, 1, 2) (3, 4, 5) (6,) + (0, 1, 2, 3) (4, 5, 6) () + + Note that *n* must be at least 0 and most equal to the length of + *iterable*. + + This function will exhaust the iterable and may require significant + storage. + """ + if n < 0: + raise ValueError('n must be >= 0') + + seq = tuple(iterable) + size = len(seq) + + if n > size: + raise ValueError('n must be <= len(seq)') + + for i in range(size - n + 1): + beginning = seq[:i] + middle = seq[i : i + n] + end = seq[i + n :] + yield beginning, middle, end + + +def all_unique(iterable, key=None): + """ + Returns ``True`` if all the elements of *iterable* are unique (no two + elements are equal). + + >>> all_unique('ABCB') + False + + If a *key* function is specified, it will be used to make comparisons. + + >>> all_unique('ABCb') + True + >>> all_unique('ABCb', str.lower) + False + + The function returns as soon as the first non-unique element is + encountered. Iterables with a mix of hashable and unhashable items can + be used, but the function will be slower for unhashable items. + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + for element in map(key, iterable) if key else iterable: + try: + if element in seenset: + return False + seenset_add(element) + except TypeError: + if element in seenlist: + return False + seenlist_add(element) + return True + + +def nth_product(index, *args): + """Equivalent to ``list(product(*args))[index]``. + + The products of *args* can be ordered lexicographically. + :func:`nth_product` computes the product at sort position *index* without + computing the previous products. + + >>> nth_product(8, range(2), range(2), range(2), range(2)) + (1, 0, 0, 0) + + ``IndexError`` will be raised if the given *index* is invalid. + """ + pools = list(map(tuple, reversed(args))) + ns = list(map(len, pools)) + + c = reduce(mul, ns) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + result = [] + for pool, n in zip(pools, ns): + result.append(pool[index % n]) + index //= n + + return tuple(reversed(result)) + + +def nth_permutation(iterable, r, index): + """Equivalent to ``list(permutations(iterable, r))[index]``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`nth_permutation` + computes the subsequence at sort position *index* directly, without + computing the previous subsequences. + + >>> nth_permutation('ghijk', 2, 5) + ('h', 'i') + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = list(iterable) + n = len(pool) + + if r is None or r == n: + r, c = n, factorial(n) + elif not 0 <= r < n: + raise ValueError + else: + c = factorial(n) // factorial(n - r) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + if c == 0: + return tuple() + + result = [0] * r + q = index * factorial(n) // c if r < n else index + for d in range(1, n + 1): + q, i = divmod(q, d) + if 0 <= n - d < r: + result[n - d] = i + if q == 0: + break + + return tuple(map(pool.pop, result)) + + +def value_chain(*args): + """Yield all arguments passed to the function in the same order in which + they were passed. If an argument itself is iterable then iterate over its + values. + + >>> list(value_chain(1, 2, 3, [4, 5, 6])) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and are emitted + as-is: + + >>> list(value_chain('12', '34', ['56', '78'])) + ['12', '34', '56', '78'] + + + Multiple levels of nesting are not flattened. + + """ + for value in args: + if isinstance(value, (str, bytes)): + yield value + continue + try: + yield from value + except TypeError: + yield value + + +def product_index(element, *args): + """Equivalent to ``list(product(*args)).index(element)`` + + The products of *args* can be ordered lexicographically. + :func:`product_index` computes the first index of *element* without + computing the previous products. + + >>> product_index([8, 2], range(10), range(5)) + 42 + + ``ValueError`` will be raised if the given *element* isn't in the product + of *args*. + """ + index = 0 + + for x, pool in zip_longest(element, args, fillvalue=_marker): + if x is _marker or pool is _marker: + raise ValueError('element is not a product of args') + + pool = tuple(pool) + index = index * len(pool) + pool.index(x) + + return index + + +def combination_index(element, iterable): + """Equivalent to ``list(combinations(iterable, r)).index(element)`` + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`combination_index` computes the index of the + first *element*, without computing the previous combinations. + + >>> combination_index('adf', 'abcdefg') + 10 + + ``ValueError`` will be raised if the given *element* isn't one of the + combinations of *iterable*. + """ + element = enumerate(element) + k, y = next(element, (None, None)) + if k is None: + return 0 + + indexes = [] + pool = enumerate(iterable) + for n, x in pool: + if x == y: + indexes.append(n) + tmp, y = next(element, (None, None)) + if tmp is None: + break + else: + k = tmp + else: + raise ValueError('element is not a combination of iterable') + + n, _ = last(pool, default=(n, None)) + + # Python versiosn below 3.8 don't have math.comb + index = 1 + for i, j in enumerate(reversed(indexes), start=1): + j = n - j + if i <= j: + index += factorial(j) // (factorial(i) * factorial(j - i)) + + return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index + + +def permutation_index(element, iterable): + """Equivalent to ``list(permutations(iterable, r)).index(element)``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`permutation_index` + computes the index of the first *element* directly, without computing + the previous permutations. + + >>> permutation_index([1, 3, 2], range(5)) + 19 + + ``ValueError`` will be raised if the given *element* isn't one of the + permutations of *iterable*. + """ + index = 0 + pool = list(iterable) + for i, x in zip(range(len(pool), -1, -1), element): + r = pool.index(x) + index = index * i + r + del pool[r] + + return index + + +class countable: + """Wrap *iterable* and keep a count of how many items have been consumed. + + The ``items_seen`` attribute starts at ``0`` and increments as the iterable + is consumed: + + >>> iterable = map(str, range(10)) + >>> it = countable(iterable) + >>> it.items_seen + 0 + >>> next(it), next(it) + ('0', '1') + >>> list(it) + ['2', '3', '4', '5', '6', '7', '8', '9'] + >>> it.items_seen + 10 + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self.items_seen = 0 + + def __iter__(self): + return self + + def __next__(self): + item = next(self._it) + self.items_seen += 1 + + return item diff --git a/libs/common/more_itertools/recipes.py b/libs/common/setuptools/_vendor/more_itertools/recipes.py similarity index 75% rename from libs/common/more_itertools/recipes.py rename to libs/common/setuptools/_vendor/more_itertools/recipes.py index 3b455d4e..521abd7c 100644 --- a/libs/common/more_itertools/recipes.py +++ b/libs/common/setuptools/_vendor/more_itertools/recipes.py @@ -7,20 +7,27 @@ Some backward-compatible usability improvements have been made. .. [1] http://docs.python.org/library/itertools.html#recipes """ +import warnings from collections import deque from itertools import ( - chain, combinations, count, cycle, groupby, islice, repeat, starmap, tee + chain, + combinations, + count, + cycle, + groupby, + islice, + repeat, + starmap, + tee, + zip_longest, ) import operator from random import randrange, sample, choice -from six import PY2 -from six.moves import filter, filterfalse, map, range, zip, zip_longest - __all__ = [ - 'accumulate', 'all_equal', 'consume', + 'convolve', 'dotproduct', 'first_true', 'flatten', @@ -30,6 +37,7 @@ __all__ = [ 'nth', 'nth_combination', 'padnone', + 'pad_none', 'pairwise', 'partition', 'powerset', @@ -49,46 +57,17 @@ __all__ = [ ] -def accumulate(iterable, func=operator.add): - """ - Return an iterator whose items are the accumulated results of a function - (specified by the optional *func* argument) that takes two arguments. - By default, returns accumulated sums with :func:`operator.add`. - - >>> list(accumulate([1, 2, 3, 4, 5])) # Running sum - [1, 3, 6, 10, 15] - >>> list(accumulate([1, 2, 3], func=operator.mul)) # Running product - [1, 2, 6] - >>> list(accumulate([0, 1, -1, 2, 3, 2], func=max)) # Running maximum - [0, 1, 1, 2, 3, 3] - - This function is available in the ``itertools`` module for Python 3.2 and - greater. - - """ - it = iter(iterable) - try: - total = next(it) - except StopIteration: - return - else: - yield total - - for element in it: - total = func(total, element) - yield total - - def take(n, iterable): """Return first *n* items of the iterable as a list. >>> take(3, range(10)) [0, 1, 2] - >>> take(5, range(3)) - [0, 1, 2] - Effectively a short replacement for ``next`` based iterator consumption - when you want more than one item, but less than the whole iterator. + If there are fewer than *n* items in the iterable, all of them are + returned. + + >>> take(10, range(3)) + [0, 1, 2] """ return list(islice(iterable, n)) @@ -115,9 +94,9 @@ def tabulate(function, start=0): def tail(n, iterable): """Return an iterator over the last *n* items of *iterable*. - >>> t = tail(3, 'ABCDEFG') - >>> list(t) - ['E', 'F', 'G'] + >>> t = tail(3, 'ABCDEFG') + >>> list(t) + ['E', 'F', 'G'] """ return iter(deque(iterable, maxlen=n)) @@ -166,11 +145,11 @@ def consume(iterator, n=None): def nth(iterable, n, default=None): """Returns the nth item or a default value. - >>> l = range(10) - >>> nth(l, 3) - 3 - >>> nth(l, 20, "zebra") - 'zebra' + >>> l = range(10) + >>> nth(l, 3) + 3 + >>> nth(l, 20, "zebra") + 'zebra' """ return next(islice(iterable, n, None), default) @@ -193,17 +172,17 @@ def all_equal(iterable): def quantify(iterable, pred=bool): """Return the how many times the predicate is true. - >>> quantify([True, False, True]) - 2 + >>> quantify([True, False, True]) + 2 """ return sum(map(pred, iterable)) -def padnone(iterable): +def pad_none(iterable): """Returns the sequence of elements and then returns ``None`` indefinitely. - >>> take(5, padnone(range(3))) + >>> take(5, pad_none(range(3))) [0, 1, 2, None, None] Useful for emulating the behavior of the built-in :func:`map` function. @@ -214,11 +193,14 @@ def padnone(iterable): return chain(iterable, repeat(None)) +padnone = pad_none + + def ncycles(iterable, n): """Returns the sequence elements *n* times - >>> list(ncycles(["a", "b"], 3)) - ['a', 'b', 'a', 'b', 'a', 'b'] + >>> list(ncycles(["a", "b"], 3)) + ['a', 'b', 'a', 'b', 'a', 'b'] """ return chain.from_iterable(repeat(tuple(iterable), n)) @@ -227,8 +209,8 @@ def ncycles(iterable, n): def dotproduct(vec1, vec2): """Returns the dot product of the two iterables. - >>> dotproduct([10, 10], [20, 20]) - 400 + >>> dotproduct([10, 10], [20, 20]) + 400 """ return sum(map(operator.mul, vec1, vec2)) @@ -273,25 +255,44 @@ def repeatfunc(func, times=None, *args): return starmap(func, repeat(args, times)) -def pairwise(iterable): +def _pairwise(iterable): """Returns an iterator of paired items, overlapping, from the original - >>> take(4, pairwise(count())) - [(0, 1), (1, 2), (2, 3), (3, 4)] + >>> take(4, pairwise(count())) + [(0, 1), (1, 2), (2, 3), (3, 4)] + + On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`. """ a, b = tee(iterable) next(b, None) - return zip(a, b) + yield from zip(a, b) -def grouper(n, iterable, fillvalue=None): +try: + from itertools import pairwise as itertools_pairwise +except ImportError: + pairwise = _pairwise +else: + + def pairwise(iterable): + yield from itertools_pairwise(iterable) + + pairwise.__doc__ = _pairwise.__doc__ + + +def grouper(iterable, n, fillvalue=None): """Collect data into fixed-length chunks or blocks. - >>> list(grouper(3, 'ABCDEFG', 'x')) - [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + >>> list(grouper('ABCDEFG', 3, 'x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] """ + if isinstance(iterable, int): + warnings.warn( + "grouper expects iterable as first parameter", DeprecationWarning + ) + n, iterable = iterable, n args = [iter(iterable)] * n return zip_longest(fillvalue=fillvalue, *args) @@ -309,10 +310,7 @@ def roundrobin(*iterables): """ # Recipe credited to George Sakkis pending = len(iterables) - if PY2: - nexts = cycle(iter(it).next for it in iterables) - else: - nexts = cycle(iter(it).__next__ for it in iterables) + nexts = cycle(iter(it).__next__ for it in iterables) while pending: try: for next in nexts: @@ -334,10 +332,23 @@ def partition(pred, iterable): >>> list(even_items), list(odd_items) ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]) + If *pred* is None, :func:`bool` is used. + + >>> iterable = [0, 1, False, True, '', ' '] + >>> false_items, true_items = partition(None, iterable) + >>> list(false_items), list(true_items) + ([0, False, ''], [1, True, ' ']) + """ - # partition(is_odd, range(10)) --> 0 2 4 6 8 and 1 3 5 7 9 - t1, t2 = tee(iterable) - return filterfalse(pred, t1), filter(pred, t2) + if pred is None: + pred = bool + + evaluations = ((pred(x), x) for x in iterable) + t1, t2 = tee(evaluations) + return ( + (x for (cond, x) in t1 if not cond), + (x for (cond, x) in t2 if cond), + ) def powerset(iterable): @@ -375,41 +386,46 @@ def unique_everseen(iterable, key=None): Sequences with a mix of hashable and unhashable items can be used. The function will be slower (i.e., `O(n^2)`) for unhashable items. + Remember that ``list`` objects are unhashable - you can use the *key* + parameter to transform the list to a tuple (which is hashable) to + avoid a slowdown. + + >>> iterable = ([1, 2], [2, 3], [1, 2]) + >>> list(unique_everseen(iterable)) # Slow + [[1, 2], [2, 3]] + >>> list(unique_everseen(iterable, key=tuple)) # Faster + [[1, 2], [2, 3]] + + Similary, you may want to convert unhashable ``set`` objects with + ``key=frozenset``. For ``dict`` objects, + ``key=lambda x: frozenset(x.items())`` can be used. + """ seenset = set() seenset_add = seenset.add seenlist = [] seenlist_add = seenlist.append - if key is None: - for element in iterable: - try: - if element not in seenset: - seenset_add(element) - yield element - except TypeError: - if element not in seenlist: - seenlist_add(element) - yield element - else: - for element in iterable: - k = key(element) - try: - if k not in seenset: - seenset_add(k) - yield element - except TypeError: - if k not in seenlist: - seenlist_add(k) - yield element + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seenset: + seenset_add(k) + yield element + except TypeError: + if k not in seenlist: + seenlist_add(k) + yield element def unique_justseen(iterable, key=None): """Yields elements in order, ignoring serial duplicates - >>> list(unique_justseen('AAAABBBCCDAABBB')) - ['A', 'B', 'C', 'D', 'A', 'B'] - >>> list(unique_justseen('ABBCcAD', str.lower)) - ['A', 'B', 'C', 'A', 'D'] + >>> list(unique_justseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D', 'A', 'B'] + >>> list(unique_justseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'A', 'D'] """ return map(next, map(operator.itemgetter(1), groupby(iterable, key))) @@ -456,7 +472,7 @@ def first_true(iterable, default=None, pred=None): return next(filter(pred, iterable), default) -def random_product(*args, **kwds): +def random_product(*args, repeat=1): """Draw an item at random from each of the input iterables. >>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP @@ -472,7 +488,7 @@ def random_product(*args, **kwds): ``itertools.product(*args, **kwarg)``. """ - pools = [tuple(pool) for pool in args] * kwds.get('repeat', 1) + pools = [tuple(pool) for pool in args] * repeat return tuple(choice(pool) for pool in pools) @@ -535,6 +551,12 @@ def nth_combination(iterable, r, index): sort position *index* directly, without computing the previous subsequences. + >>> nth_combination(range(5), 3, 5) + (0, 3, 4) + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. """ pool = tuple(iterable) n = len(pool) @@ -571,7 +593,28 @@ def prepend(value, iterator): >>> list(prepend(value, iterator)) ['0', '1', '2', '3'] - To prepend multiple values, see :func:`itertools.chain`. + To prepend multiple values, see :func:`itertools.chain` + or :func:`value_chain`. """ return chain([value], iterator) + + +def convolve(signal, kernel): + """Convolve the iterable *signal* with the iterable *kernel*. + + >>> signal = (1, 2, 3, 4, 5) + >>> kernel = [3, 2, 1] + >>> list(convolve(signal, kernel)) + [3, 8, 14, 20, 26, 14, 5] + + Note: the input arguments are not interchangeable, as the *kernel* + is immediately consumed and stored. + + """ + kernel = tuple(kernel)[::-1] + n = len(kernel) + window = deque([0], maxlen=n) * n + for x in chain(signal, repeat(0, n - 1)): + window.append(x) + yield sum(map(operator.mul, kernel, window)) diff --git a/libs/common/setuptools/_vendor/packaging/__about__.py b/libs/common/setuptools/_vendor/packaging/__about__.py index dc95138d..3551bc2d 100644 --- a/libs/common/setuptools/_vendor/packaging/__about__.py +++ b/libs/common/setuptools/_vendor/packaging/__about__.py @@ -1,7 +1,6 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function __all__ = [ "__title__", @@ -18,10 +17,10 @@ __title__ = "packaging" __summary__ = "Core utilities for Python packages" __uri__ = "https://github.com/pypa/packaging" -__version__ = "19.2" +__version__ = "21.3" __author__ = "Donald Stufft and individual contributors" __email__ = "donald@stufft.io" -__license__ = "BSD or Apache License, Version 2.0" -__copyright__ = "Copyright 2014-2019 %s" % __author__ +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014-2019 %s" % __author__ diff --git a/libs/common/setuptools/_vendor/packaging/__init__.py b/libs/common/setuptools/_vendor/packaging/__init__.py index a0cf67df..3c50c5dc 100644 --- a/libs/common/setuptools/_vendor/packaging/__init__.py +++ b/libs/common/setuptools/_vendor/packaging/__init__.py @@ -1,7 +1,6 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function from .__about__ import ( __author__, diff --git a/libs/common/setuptools/_vendor/packaging/_compat.py b/libs/common/setuptools/_vendor/packaging/_compat.py deleted file mode 100644 index 25da473c..00000000 --- a/libs/common/setuptools/_vendor/packaging/_compat.py +++ /dev/null @@ -1,31 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. -from __future__ import absolute_import, division, print_function - -import sys - - -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -# flake8: noqa - -if PY3: - string_types = (str,) -else: - string_types = (basestring,) - - -def with_metaclass(meta, *bases): - """ - Create a base class with a metaclass. - """ - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): - def __new__(cls, name, this_bases, d): - return meta(name, bases, d) - - return type.__new__(metaclass, "temporary_class", (), {}) diff --git a/libs/common/setuptools/_vendor/packaging/_manylinux.py b/libs/common/setuptools/_vendor/packaging/_manylinux.py new file mode 100644 index 00000000..4c379aa6 --- /dev/null +++ b/libs/common/setuptools/_vendor/packaging/_manylinux.py @@ -0,0 +1,301 @@ +import collections +import functools +import os +import re +import struct +import sys +import warnings +from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple + + +# Python does not provide platform information at sufficient granularity to +# identify the architecture of the running executable in some cases, so we +# determine it dynamically by reading the information from the running +# process. This only applies on Linux, which uses the ELF format. +class _ELFFileHeader: + # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header + class _InvalidELFFileHeader(ValueError): + """ + An invalid ELF file header was found. + """ + + ELF_MAGIC_NUMBER = 0x7F454C46 + ELFCLASS32 = 1 + ELFCLASS64 = 2 + ELFDATA2LSB = 1 + ELFDATA2MSB = 2 + EM_386 = 3 + EM_S390 = 22 + EM_ARM = 40 + EM_X86_64 = 62 + EF_ARM_ABIMASK = 0xFF000000 + EF_ARM_ABI_VER5 = 0x05000000 + EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + def __init__(self, file: IO[bytes]) -> None: + def unpack(fmt: str) -> int: + try: + data = file.read(struct.calcsize(fmt)) + result: Tuple[int, ...] = struct.unpack(fmt, data) + except struct.error: + raise _ELFFileHeader._InvalidELFFileHeader() + return result[0] + + self.e_ident_magic = unpack(">I") + if self.e_ident_magic != self.ELF_MAGIC_NUMBER: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_class = unpack("B") + if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_data = unpack("B") + if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_version = unpack("B") + self.e_ident_osabi = unpack("B") + self.e_ident_abiversion = unpack("B") + self.e_ident_pad = file.read(7) + format_h = "H" + format_i = "I" + format_q = "Q" + format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q + self.e_type = unpack(format_h) + self.e_machine = unpack(format_h) + self.e_version = unpack(format_i) + self.e_entry = unpack(format_p) + self.e_phoff = unpack(format_p) + self.e_shoff = unpack(format_p) + self.e_flags = unpack(format_i) + self.e_ehsize = unpack(format_h) + self.e_phentsize = unpack(format_h) + self.e_phnum = unpack(format_h) + self.e_shentsize = unpack(format_h) + self.e_shnum = unpack(format_h) + self.e_shstrndx = unpack(format_h) + + +def _get_elf_header() -> Optional[_ELFFileHeader]: + try: + with open(sys.executable, "rb") as f: + elf_header = _ELFFileHeader(f) + except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): + return None + return elf_header + + +def _is_linux_armhf() -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_ARM + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABIMASK + ) == elf_header.EF_ARM_ABI_VER5 + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD + ) == elf_header.EF_ARM_ABI_FLOAT_HARD + return result + + +def _is_linux_i686() -> bool: + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_386 + return result + + +def _have_compatible_abi(arch: str) -> bool: + if arch == "armv7l": + return _is_linux_armhf() + if arch == "i686": + return _is_linux_i686() + return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". + version_string = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.split() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + "Expected glibc version with 2 components major.minor," + " got: %s" % version_str, + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux # noqa + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(linux: str, arch: str) -> Iterator[str]: + if not _have_compatible_abi(arch): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if arch in {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(tag, arch, glibc_version): + yield linux.replace("linux", tag) + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(legacy_tag, arch, glibc_version): + yield linux.replace("linux", legacy_tag) diff --git a/libs/common/setuptools/_vendor/packaging/_musllinux.py b/libs/common/setuptools/_vendor/packaging/_musllinux.py new file mode 100644 index 00000000..8ac3059b --- /dev/null +++ b/libs/common/setuptools/_vendor/packaging/_musllinux.py @@ -0,0 +1,136 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import contextlib +import functools +import operator +import os +import re +import struct +import subprocess +import sys +from typing import IO, Iterator, NamedTuple, Optional, Tuple + + +def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, f.read(struct.calcsize(fmt))) + + +def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: + """Detect musl libc location by parsing the Python executable. + + Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca + ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html + """ + f.seek(0) + try: + ident = _read_unpacked(f, "16B") + except struct.error: + return None + if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. + return None + f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, p_fmt, p_idx = { + 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. + 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. + }[ident[4]] + except KeyError: + return None + else: + p_get = operator.itemgetter(*p_idx) + + # Find the interpreter section and return its content. + try: + _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) + except struct.error: + return None + for i in range(e_phnum + 1): + f.seek(e_phoff + e_phentsize * i) + try: + p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) + except struct.error: + return None + if p_type != 3: # Not PT_INTERP. + continue + f.seek(p_offset) + interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") + if "musl" not in interpreter: + return None + return interpreter + return None + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + with contextlib.ExitStack() as stack: + try: + f = stack.enter_context(open(executable, "rb")) + except OSError: + return None + ld = _parse_ld_musl_from_elf(f) + if not ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(arch: str) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param arch: Should be the part of platform tag after the ``linux_`` + prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a + prerequisite for the current platform to be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/libs/common/setuptools/_vendor/packaging/_structures.py b/libs/common/setuptools/_vendor/packaging/_structures.py index 68dcca63..90a6465f 100644 --- a/libs/common/setuptools/_vendor/packaging/_structures.py +++ b/libs/common/setuptools/_vendor/packaging/_structures.py @@ -1,68 +1,61 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -class Infinity(object): - def __repr__(self): +class InfinityType: + def __repr__(self) -> str: return "Infinity" - def __hash__(self): + def __hash__(self) -> int: return hash(repr(self)) - def __lt__(self, other): + def __lt__(self, other: object) -> bool: return False - def __le__(self, other): + def __le__(self, other: object) -> bool: return False - def __eq__(self, other): + def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other): - return not isinstance(other, self.__class__) - - def __gt__(self, other): + def __gt__(self, other: object) -> bool: return True - def __ge__(self, other): + def __ge__(self, other: object) -> bool: return True - def __neg__(self): + def __neg__(self: object) -> "NegativeInfinityType": return NegativeInfinity -Infinity = Infinity() +Infinity = InfinityType() -class NegativeInfinity(object): - def __repr__(self): +class NegativeInfinityType: + def __repr__(self) -> str: return "-Infinity" - def __hash__(self): + def __hash__(self) -> int: return hash(repr(self)) - def __lt__(self, other): + def __lt__(self, other: object) -> bool: return True - def __le__(self, other): + def __le__(self, other: object) -> bool: return True - def __eq__(self, other): + def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other): - return not isinstance(other, self.__class__) - - def __gt__(self, other): + def __gt__(self, other: object) -> bool: return False - def __ge__(self, other): + def __ge__(self, other: object) -> bool: return False - def __neg__(self): + def __neg__(self: object) -> InfinityType: return Infinity -NegativeInfinity = NegativeInfinity() +NegativeInfinity = NegativeInfinityType() diff --git a/libs/common/setuptools/_vendor/packaging/markers.py b/libs/common/setuptools/_vendor/packaging/markers.py index 4bdfdb24..eb0541b8 100644 --- a/libs/common/setuptools/_vendor/packaging/markers.py +++ b/libs/common/setuptools/_vendor/packaging/markers.py @@ -1,20 +1,26 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import operator import os import platform import sys +from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from setuptools.extern.pyparsing import ParseException, ParseResults, stringStart, stringEnd -from setuptools.extern.pyparsing import ZeroOrMore, Group, Forward, QuotedString -from setuptools.extern.pyparsing import Literal as L # noqa - -from ._compat import string_types -from .specifiers import Specifier, InvalidSpecifier +from setuptools.extern.pyparsing import ( # noqa: N817 + Forward, + Group, + Literal as L, + ParseException, + ParseResults, + QuotedString, + ZeroOrMore, + stringEnd, + stringStart, +) +from .specifiers import InvalidSpecifier, Specifier __all__ = [ "InvalidMarker", @@ -24,6 +30,8 @@ __all__ = [ "default_environment", ] +Operator = Callable[[str, str], bool] + class InvalidMarker(ValueError): """ @@ -44,32 +52,32 @@ class UndefinedEnvironmentName(ValueError): """ -class Node(object): - def __init__(self, value): +class Node: + def __init__(self, value: Any) -> None: self.value = value - def __str__(self): + def __str__(self) -> str: return str(self.value) - def __repr__(self): - return "<{0}({1!r})>".format(self.__class__.__name__, str(self)) + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" - def serialize(self): + def serialize(self) -> str: raise NotImplementedError class Variable(Node): - def serialize(self): + def serialize(self) -> str: return str(self) class Value(Node): - def serialize(self): - return '"{0}"'.format(self) + def serialize(self) -> str: + return f'"{self}"' class Op(Node): - def serialize(self): + def serialize(self) -> str: return str(self) @@ -85,13 +93,13 @@ VARIABLE = ( | L("python_version") | L("sys_platform") | L("os_name") - | L("os.name") + | L("os.name") # PEP-345 | L("sys.platform") # PEP-345 | L("platform.version") # PEP-345 | L("platform.machine") # PEP-345 | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # PEP-345 - | L("extra") # undocumented setuptools legacy + | L("python_implementation") # undocumented setuptools legacy + | L("extra") # PEP-508 ) ALIASES = { "os.name": "os_name", @@ -130,15 +138,18 @@ MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) MARKER = stringStart + MARKER_EXPR + stringEnd -def _coerce_parse_result(results): +def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: if isinstance(results, ParseResults): return [_coerce_parse_result(i) for i in results] else: return results -def _format_marker(marker, first=True): - assert isinstance(marker, (list, tuple, string_types)) +def _format_marker( + marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True +) -> str: + + assert isinstance(marker, (list, tuple, str)) # Sometimes we have a structure like [[...]] which is a single item list # where the single item is itself it's own list. In that case we want skip @@ -163,7 +174,7 @@ def _format_marker(marker, first=True): return marker -_operators = { +_operators: Dict[str, Operator] = { "in": lambda lhs, rhs: lhs in rhs, "not in": lambda lhs, rhs: lhs not in rhs, "<": operator.lt, @@ -175,7 +186,7 @@ _operators = { } -def _eval_op(lhs, op, rhs): +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: try: spec = Specifier("".join([op.serialize(), rhs])) except InvalidSpecifier: @@ -183,34 +194,36 @@ def _eval_op(lhs, op, rhs): else: return spec.contains(lhs) - oper = _operators.get(op.serialize()) + oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: - raise UndefinedComparison( - "Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs) - ) + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") return oper(lhs, rhs) -_undefined = object() +class Undefined: + pass -def _get_env(environment, name): - value = environment.get(name, _undefined) +_undefined = Undefined() - if value is _undefined: + +def _get_env(environment: Dict[str, str], name: str) -> str: + value: Union[str, Undefined] = environment.get(name, _undefined) + + if isinstance(value, Undefined): raise UndefinedEnvironmentName( - "{0!r} does not exist in evaluation environment.".format(name) + f"{name!r} does not exist in evaluation environment." ) return value -def _evaluate_markers(markers, environment): - groups = [[]] +def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] for marker in markers: - assert isinstance(marker, (list, tuple, string_types)) + assert isinstance(marker, (list, tuple, str)) if isinstance(marker, list): groups[-1].append(_evaluate_markers(marker, environment)) @@ -233,7 +246,7 @@ def _evaluate_markers(markers, environment): return any(all(item) for item in groups) -def format_full_version(info): +def format_full_version(info: "sys._version_info") -> str: version = "{0.major}.{0.minor}.{0.micro}".format(info) kind = info.releaselevel if kind != "final": @@ -241,14 +254,9 @@ def format_full_version(info): return version -def default_environment(): - if hasattr(sys, "implementation"): - iver = format_full_version(sys.implementation.version) - implementation_name = sys.implementation.name - else: - iver = "0" - implementation_name = "" - +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name return { "implementation_name": implementation_name, "implementation_version": iver, @@ -264,23 +272,23 @@ def default_environment(): } -class Marker(object): - def __init__(self, marker): +class Marker: + def __init__(self, marker: str) -> None: try: self._markers = _coerce_parse_result(MARKER.parseString(marker)) except ParseException as e: - err_str = "Invalid marker: {0!r}, parse error at {1!r}".format( - marker, marker[e.loc : e.loc + 8] + raise InvalidMarker( + f"Invalid marker: {marker!r}, parse error at " + f"{marker[e.loc : e.loc + 8]!r}" ) - raise InvalidMarker(err_str) - def __str__(self): + def __str__(self) -> str: return _format_marker(self._markers) - def __repr__(self): - return "".format(str(self)) + def __repr__(self) -> str: + return f"" - def evaluate(self, environment=None): + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. Return the boolean from evaluating the given marker against the diff --git a/libs/common/setuptools/_vendor/packaging/requirements.py b/libs/common/setuptools/_vendor/packaging/requirements.py index 8a0c2cb9..0d93231b 100644 --- a/libs/common/setuptools/_vendor/packaging/requirements.py +++ b/libs/common/setuptools/_vendor/packaging/requirements.py @@ -1,15 +1,24 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -import string import re +import string +import urllib.parse +from typing import List, Optional as TOptional, Set -from setuptools.extern.pyparsing import stringStart, stringEnd, originalTextFor, ParseException -from setuptools.extern.pyparsing import ZeroOrMore, Word, Optional, Regex, Combine -from setuptools.extern.pyparsing import Literal as L # noqa -from setuptools.extern.six.moves.urllib import parse as urlparse +from setuptools.extern.pyparsing import ( # noqa + Combine, + Literal as L, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + originalTextFor, + stringEnd, + stringStart, +) from .markers import MARKER_EXPR, Marker from .specifiers import LegacySpecifier, Specifier, SpecifierSet @@ -51,7 +60,7 @@ VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY VERSION_MANY = Combine( VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False )("_raw_spec") -_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) +_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) _VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") @@ -75,7 +84,7 @@ REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd REQUIREMENT.parseString("x[]") -class Requirement(object): +class Requirement: """Parse a requirement. Parse a given requirement string into its parts, such as name, specifier, @@ -88,51 +97,50 @@ class Requirement(object): # the thing as well as the version? What about the markers? # TODO: Can we normalize the name and extra name? - def __init__(self, requirement_string): + def __init__(self, requirement_string: str) -> None: try: req = REQUIREMENT.parseString(requirement_string) except ParseException as e: raise InvalidRequirement( - 'Parse error at "{0!r}": {1}'.format( - requirement_string[e.loc : e.loc + 8], e.msg - ) + f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' ) - self.name = req.name + self.name: str = req.name if req.url: - parsed_url = urlparse.urlparse(req.url) + parsed_url = urllib.parse.urlparse(req.url) if parsed_url.scheme == "file": - if urlparse.urlunparse(parsed_url) != req.url: + if urllib.parse.urlunparse(parsed_url) != req.url: raise InvalidRequirement("Invalid URL given") elif not (parsed_url.scheme and parsed_url.netloc) or ( not parsed_url.scheme and not parsed_url.netloc ): - raise InvalidRequirement("Invalid URL: {0}".format(req.url)) - self.url = req.url + raise InvalidRequirement(f"Invalid URL: {req.url}") + self.url: TOptional[str] = req.url else: self.url = None - self.extras = set(req.extras.asList() if req.extras else []) - self.specifier = SpecifierSet(req.specifier) - self.marker = req.marker if req.marker else None + self.extras: Set[str] = set(req.extras.asList() if req.extras else []) + self.specifier: SpecifierSet = SpecifierSet(req.specifier) + self.marker: TOptional[Marker] = req.marker if req.marker else None - def __str__(self): - parts = [self.name] + def __str__(self) -> str: + parts: List[str] = [self.name] if self.extras: - parts.append("[{0}]".format(",".join(sorted(self.extras)))) + formatted_extras = ",".join(sorted(self.extras)) + parts.append(f"[{formatted_extras}]") if self.specifier: parts.append(str(self.specifier)) if self.url: - parts.append("@ {0}".format(self.url)) + parts.append(f"@ {self.url}") if self.marker: parts.append(" ") if self.marker: - parts.append("; {0}".format(self.marker)) + parts.append(f"; {self.marker}") return "".join(parts) - def __repr__(self): - return "".format(str(self)) + def __repr__(self) -> str: + return f"" diff --git a/libs/common/setuptools/_vendor/packaging/specifiers.py b/libs/common/setuptools/_vendor/packaging/specifiers.py index 743576a0..0e218a6f 100644 --- a/libs/common/setuptools/_vendor/packaging/specifiers.py +++ b/libs/common/setuptools/_vendor/packaging/specifiers.py @@ -1,15 +1,33 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import abc import functools import itertools import re +import warnings +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Pattern, + Set, + Tuple, + TypeVar, + Union, +) -from ._compat import string_types, with_metaclass -from .version import Version, LegacyVersion, parse +from .utils import canonicalize_version +from .version import LegacyVersion, Version, parse + +ParsedVersion = Union[Version, LegacyVersion] +UnparsedVersion = Union[Version, LegacyVersion, str] +VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) +CallableOperator = Callable[[ParsedVersion, str], bool] class InvalidSpecifier(ValueError): @@ -18,56 +36,51 @@ class InvalidSpecifier(ValueError): """ -class BaseSpecifier(with_metaclass(abc.ABCMeta, object)): +class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod - def __str__(self): + def __str__(self) -> str: """ Returns the str representation of this Specifier like object. This should be representative of the Specifier itself. """ @abc.abstractmethod - def __hash__(self): + def __hash__(self) -> int: """ Returns a hash value for this Specifier like object. """ @abc.abstractmethod - def __eq__(self, other): + def __eq__(self, other: object) -> bool: """ Returns a boolean representing whether or not the two Specifier like objects are equal. """ - @abc.abstractmethod - def __ne__(self, other): - """ - Returns a boolean representing whether or not the two Specifier like - objects are not equal. - """ - @abc.abstractproperty - def prereleases(self): + def prereleases(self) -> Optional[bool]: """ Returns whether or not pre-releases as a whole are allowed by this specifier. """ @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: """ Sets whether or not pre-releases as a whole are allowed by this specifier. """ @abc.abstractmethod - def contains(self, item, prereleases=None): + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: """ Determines if the given item is contained within this specifier. """ @abc.abstractmethod - def filter(self, iterable, prereleases=None): + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. @@ -76,102 +89,109 @@ class BaseSpecifier(with_metaclass(abc.ABCMeta, object)): class _IndividualSpecifier(BaseSpecifier): - _operators = {} + _operators: Dict[str, str] = {} + _regex: Pattern[str] - def __init__(self, spec="", prereleases=None): + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: match = self._regex.search(spec) if not match: - raise InvalidSpecifier("Invalid specifier: '{0}'".format(spec)) + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") - self._spec = (match.group("operator").strip(), match.group("version").strip()) + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) # Store whether or not this Specifier should accept prereleases self._prereleases = prereleases - def __repr__(self): + def __repr__(self) -> str: pre = ( - ", prereleases={0!r}".format(self.prereleases) + f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "<{0}({1!r}{2})>".format(self.__class__.__name__, str(self), pre) + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" - def __str__(self): - return "{0}{1}".format(*self._spec) + def __str__(self) -> str: + return "{}{}".format(*self._spec) - def __hash__(self): - return hash(self._spec) + @property + def _canonical_spec(self) -> Tuple[str, str]: + return self._spec[0], canonicalize_version(self._spec[1]) - def __eq__(self, other): - if isinstance(other, string_types): + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + if isinstance(other, str): try: - other = self.__class__(other) + other = self.__class__(str(other)) except InvalidSpecifier: return NotImplemented elif not isinstance(other, self.__class__): return NotImplemented - return self._spec == other._spec + return self._canonical_spec == other._canonical_spec - def __ne__(self, other): - if isinstance(other, string_types): - try: - other = self.__class__(other) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable - return self._spec != other._spec - - def _get_operator(self, op): - return getattr(self, "_compare_{0}".format(self._operators[op])) - - def _coerce_version(self, version): + def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: if not isinstance(version, (LegacyVersion, Version)): version = parse(version) return version @property - def operator(self): + def operator(self) -> str: return self._spec[0] @property - def version(self): + def version(self) -> str: return self._spec[1] @property - def prereleases(self): + def prereleases(self) -> Optional[bool]: return self._prereleases @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: self._prereleases = value - def __contains__(self, item): + def __contains__(self, item: str) -> bool: return self.contains(item) - def contains(self, item, prereleases=None): + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + # Determine if prereleases are to be allowed or not. if prereleases is None: prereleases = self.prereleases # Normalize item to a Version or LegacyVersion, this allows us to have # a shortcut for ``"2.0" in Specifier(">=2") - item = self._coerce_version(item) + normalized_item = self._coerce_version(item) # Determine if we should be supporting prereleases in this specifier # or not, if we do not support prereleases than we can short circuit # logic if this version is a prereleases. - if item.is_prerelease and not prereleases: + if normalized_item.is_prerelease and not prereleases: return False # Actually do the comparison to determine if this item is contained # within this Specifier or not. - return self._get_operator(self.operator)(item, self.version) + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: - def filter(self, iterable, prereleases=None): yielded = False found_prereleases = [] @@ -184,7 +204,7 @@ class _IndividualSpecifier(BaseSpecifier): if self.contains(parsed_version, **kw): # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later incase nothing + # prereleases, then we'll store it for later in case nothing # else matches this specifier. if parsed_version.is_prerelease and not ( prereleases or self.prereleases @@ -229,33 +249,46 @@ class LegacySpecifier(_IndividualSpecifier): ">": "greater_than", } - def _coerce_version(self, version): + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + super().__init__(spec, prereleases) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: if not isinstance(version, LegacyVersion): version = LegacyVersion(str(version)) return version - def _compare_equal(self, prospective, spec): + def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: return prospective == self._coerce_version(spec) - def _compare_not_equal(self, prospective, spec): + def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: return prospective != self._coerce_version(spec) - def _compare_less_than_equal(self, prospective, spec): + def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: return prospective <= self._coerce_version(spec) - def _compare_greater_than_equal(self, prospective, spec): + def _compare_greater_than_equal( + self, prospective: LegacyVersion, spec: str + ) -> bool: return prospective >= self._coerce_version(spec) - def _compare_less_than(self, prospective, spec): + def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: return prospective < self._coerce_version(spec) - def _compare_greater_than(self, prospective, spec): + def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: return prospective > self._coerce_version(spec) -def _require_version_compare(fn): +def _require_version_compare( + fn: Callable[["Specifier", ParsedVersion, str], bool] +) -> Callable[["Specifier", ParsedVersion, str], bool]: @functools.wraps(fn) - def wrapped(self, prospective, spec): + def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: if not isinstance(prospective, Version): return False return fn(self, prospective, spec) @@ -372,7 +405,8 @@ class Specifier(_IndividualSpecifier): } @_require_version_compare - def _compare_compatible(self, prospective, spec): + def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to # implement this in terms of the other specifiers instead of @@ -380,15 +414,9 @@ class Specifier(_IndividualSpecifier): # the other specifiers. # We want everything but the last item in the version, but we want to - # ignore post and dev releases and we want to treat the pre-release as - # it's own separate segment. + # ignore suffix segments. prefix = ".".join( - list( - itertools.takewhile( - lambda x: (not x.startswith("post") and not x.startswith("dev")), - _version_split(spec), - ) - )[:-1] + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] ) # Add the prefix notation to the end of our string @@ -399,57 +427,73 @@ class Specifier(_IndividualSpecifier): ) @_require_version_compare - def _compare_equal(self, prospective, spec): + def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. prospective = Version(prospective.public) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(spec[:-2]) # Remove the trailing .* # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - prospective = _version_split(str(prospective)) + split_prospective = _version_split(str(prospective)) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - prospective = prospective[: len(spec)] + shortened_prospective = split_prospective[: len(split_spec)] # Pad out our two sides with zeros so that they both equal the same # length. - spec, prospective = _pad_version(spec, prospective) + padded_spec, padded_prospective = _pad_version( + split_spec, shortened_prospective + ) + + return padded_prospective == padded_spec else: # Convert our spec string into a Version - spec = Version(spec) + spec_version = Version(spec) # If the specifier does not have a local segment, then we want to # act as if the prospective version also does not have a local # segment. - if not spec.local: + if not spec_version.local: prospective = Version(prospective.public) - return prospective == spec + return prospective == spec_version @_require_version_compare - def _compare_not_equal(self, prospective, spec): + def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: return not self._compare_equal(prospective, spec) @_require_version_compare - def _compare_less_than_equal(self, prospective, spec): - return prospective <= Version(spec) + def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) @_require_version_compare - def _compare_greater_than_equal(self, prospective, spec): - return prospective >= Version(spec) + def _compare_greater_than_equal( + self, prospective: ParsedVersion, spec: str + ) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) @_require_version_compare - def _compare_less_than(self, prospective, spec): + def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + # Convert our spec to a Version instance, since we'll want to work with # it as a version. - spec = Version(spec) + spec = Version(spec_str) # Check to see if the prospective version is less than the spec # version. If it's not we can short circuit and just return False now @@ -471,10 +515,11 @@ class Specifier(_IndividualSpecifier): return True @_require_version_compare - def _compare_greater_than(self, prospective, spec): + def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + # Convert our spec to a Version instance, since we'll want to work with # it as a version. - spec = Version(spec) + spec = Version(spec_str) # Check to see if the prospective version is greater than the spec # version. If it's not we can short circuit and just return False now @@ -501,11 +546,12 @@ class Specifier(_IndividualSpecifier): # same version in the spec. return True - def _compare_arbitrary(self, prospective, spec): + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() @property - def prereleases(self): + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just # blindly use that. if self._prereleases is not None: @@ -529,15 +575,15 @@ class Specifier(_IndividualSpecifier): return False @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: self._prereleases = value _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") -def _version_split(version): - result = [] +def _version_split(version: str) -> List[str]: + result: List[str] = [] for item in version.split("."): match = _prefix_regex.search(item) if match: @@ -547,7 +593,13 @@ def _version_split(version): return result -def _pad_version(left, right): +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: left_split, right_split = [], [] # Get the release segment of our versions @@ -566,15 +618,18 @@ def _pad_version(left, right): class SpecifierSet(BaseSpecifier): - def __init__(self, specifiers="", prereleases=None): - # Split on , to break each indidivual specifier into it's own item, and + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: + + # Split on , to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. - specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a # Specifier and falling back to a LegacySpecifier. - parsed = set() - for specifier in specifiers: + parsed: Set[_IndividualSpecifier] = set() + for specifier in split_specifiers: try: parsed.add(Specifier(specifier)) except InvalidSpecifier: @@ -587,23 +642,23 @@ class SpecifierSet(BaseSpecifier): # we accept prereleases or not. self._prereleases = prereleases - def __repr__(self): + def __repr__(self) -> str: pre = ( - ", prereleases={0!r}".format(self.prereleases) + f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "".format(str(self), pre) + return f"" - def __str__(self): + def __str__(self) -> str: return ",".join(sorted(str(s) for s in self._specs)) - def __hash__(self): + def __hash__(self) -> int: return hash(self._specs) - def __and__(self, other): - if isinstance(other, string_types): + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): return NotImplemented @@ -625,34 +680,23 @@ class SpecifierSet(BaseSpecifier): return specifier - def __eq__(self, other): - if isinstance(other, string_types): - other = SpecifierSet(other) - elif isinstance(other, _IndividualSpecifier): + def __eq__(self, other: object) -> bool: + if isinstance(other, (str, _IndividualSpecifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented return self._specs == other._specs - def __ne__(self, other): - if isinstance(other, string_types): - other = SpecifierSet(other) - elif isinstance(other, _IndividualSpecifier): - other = SpecifierSet(str(other)) - elif not isinstance(other, SpecifierSet): - return NotImplemented - - return self._specs != other._specs - - def __len__(self): + def __len__(self) -> int: return len(self._specs) - def __iter__(self): + def __iter__(self) -> Iterator[_IndividualSpecifier]: return iter(self._specs) @property - def prereleases(self): + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll # pass that through here. if self._prereleases is not None: @@ -669,13 +713,16 @@ class SpecifierSet(BaseSpecifier): return any(s.prereleases for s in self._specs) @prereleases.setter - def prereleases(self, value): + def prereleases(self, value: bool) -> None: self._prereleases = value - def __contains__(self, item): + def __contains__(self, item: UnparsedVersion) -> bool: return self.contains(item) - def contains(self, item, prereleases=None): + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + # Ensure that our item is a Version or LegacyVersion instance. if not isinstance(item, (LegacyVersion, Version)): item = parse(item) @@ -701,7 +748,10 @@ class SpecifierSet(BaseSpecifier): # will always return True, this is an explicit design decision. return all(s.contains(item, prereleases=prereleases) for s in self._specs) - def filter(self, iterable, prereleases=None): + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: + # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -719,8 +769,11 @@ class SpecifierSet(BaseSpecifier): # which will filter out any pre-releases, unless there are no final # releases, and which will filter out LegacyVersion in general. else: - filtered = [] - found_prereleases = [] + filtered: List[VersionTypeVar] = [] + found_prereleases: List[VersionTypeVar] = [] + + item: UnparsedVersion + parsed_version: Union[Version, LegacyVersion] for item in iterable: # Ensure that we some kind of Version class for this item. diff --git a/libs/common/setuptools/_vendor/packaging/tags.py b/libs/common/setuptools/_vendor/packaging/tags.py index ec9942f0..9a3d25a7 100644 --- a/libs/common/setuptools/_vendor/packaging/tags.py +++ b/libs/common/setuptools/_vendor/packaging/tags.py @@ -2,25 +2,32 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import - -import distutils.util - -try: - from importlib.machinery import EXTENSION_SUFFIXES -except ImportError: # pragma: no cover - import imp - - EXTENSION_SUFFIXES = [x[0] for x in imp.get_suffixes()] - del imp +import logging import platform -import re import sys import sysconfig -import warnings +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) +from . import _manylinux, _musllinux -INTERPRETER_SHORT_NAMES = { +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { "python": "py", # Generic. "cpython": "cp", "pypy": "pp", @@ -32,45 +39,67 @@ INTERPRETER_SHORT_NAMES = { _32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 -class Tag(object): +class Tag: + """ + A representation of the tag triple for a wheel. - __slots__ = ["_interpreter", "_abi", "_platform"] + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ - def __init__(self, interpreter, abi, platform): + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: self._interpreter = interpreter.lower() self._abi = abi.lower() self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) @property - def interpreter(self): + def interpreter(self) -> str: return self._interpreter @property - def abi(self): + def abi(self) -> str: return self._abi @property - def platform(self): + def platform(self) -> str: return self._platform - def __eq__(self, other): + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + return ( - (self.platform == other.platform) - and (self.abi == other.abi) - and (self.interpreter == other.interpreter) + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) ) - def __hash__(self): - return hash((self._interpreter, self._abi, self._platform)) + def __hash__(self) -> int: + return self._hash - def __str__(self): - return "{}-{}-{}".format(self._interpreter, self._abi, self._platform) + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" - def __repr__(self): - return "<{self} @ {self_id}>".format(self=self, self_id=id(self)) + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" -def parse_tag(tag): +def parse_tag(tag: str) -> FrozenSet[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ tags = set() interpreters, abis, platforms = tag.split("-") for interpreter in interpreters.split("."): @@ -80,20 +109,34 @@ def parse_tag(tag): return frozenset(tags) -def _normalize_string(string): +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: return string.replace(".", "_").replace("-", "_") -def _cpython_interpreter(py_version): - # TODO: Is using py_version_nodot for interpreter version critical? - return "cp{major}{minor}".format(major=py_version[0], minor=py_version[1]) +def _abi3_applies(python_version: PythonVersion) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) -def _cpython_abis(py_version): +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: + py_version = tuple(py_version) # To allow for version comparison. abis = [] - version = "{}{}".format(*py_version[:2]) + version = _version_nodot(py_version[:2]) debug = pymalloc = ucs4 = "" - with_debug = sysconfig.get_config_var("Py_DEBUG") + with_debug = _get_config_var("Py_DEBUG", warn) has_refcount = hasattr(sys, "gettotalrefcount") # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled # extension modules is the best option. @@ -102,11 +145,11 @@ def _cpython_abis(py_version): if with_debug or (with_debug is None and (has_refcount or has_ext)): debug = "d" if py_version < (3, 8): - with_pymalloc = sysconfig.get_config_var("WITH_PYMALLOC") + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) if with_pymalloc or with_pymalloc is None: pymalloc = "m" if py_version < (3, 3): - unicode_size = sysconfig.get_config_var("Py_UNICODE_SIZE") + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) if unicode_size == 4 or ( unicode_size is None and sys.maxunicode == 0x10FFFF ): @@ -114,7 +157,7 @@ def _cpython_abis(py_version): elif debug: # Debug builds can also load "normal" extension modules. # We can also assume no UCS-4 or pymalloc requirement. - abis.append("cp{version}".format(version=version)) + abis.append(f"cp{version}") abis.insert( 0, "cp{version}{debug}{pymalloc}{ucs4}".format( @@ -124,86 +167,140 @@ def _cpython_abis(py_version): return abis -def _cpython_tags(py_version, interpreter, abis, platforms): +def cpython_tags( + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = f"cp{_version_nodot(python_version[:2])}" + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) for abi in abis: for platform_ in platforms: yield Tag(interpreter, abi, platform_) - for tag in (Tag(interpreter, "abi3", platform_) for platform_ in platforms): - yield tag - for tag in (Tag(interpreter, "none", platform_) for platform_ in platforms): - yield tag - # PEP 384 was first implemented in Python 3.2. - for minor_version in range(py_version[1] - 1, 1, -1): - for platform_ in platforms: - interpreter = "cp{major}{minor}".format( - major=py_version[0], minor=minor_version - ) - yield Tag(interpreter, "abi3", platform_) + if _abi3_applies(python_version): + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if _abi3_applies(python_version): + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) -def _pypy_interpreter(): - return "pp{py_major}{pypy_major}{pypy_minor}".format( - py_major=sys.version_info[0], - pypy_major=sys.pypy_version_info.major, - pypy_minor=sys.pypy_version_info.minor, - ) - - -def _generic_abi(): +def _generic_abi() -> Iterator[str]: abi = sysconfig.get_config_var("SOABI") if abi: - return _normalize_string(abi) - else: - return "none" + yield _normalize_string(abi) -def _pypy_tags(py_version, interpreter, abi, platforms): - for tag in (Tag(interpreter, abi, platform) for platform in platforms): - yield tag - for tag in (Tag(interpreter, "none", platform) for platform in platforms): - yield tag - - -def _generic_tags(interpreter, py_version, abi, platforms): - for tag in (Tag(interpreter, abi, platform) for platform in platforms): - yield tag - if abi != "none": - tags = (Tag(interpreter, "none", platform_) for platform_ in platforms) - for tag in tags: - yield tag - - -def _py_interpreter_range(py_version): +def generic_tags( + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: """ - Yield Python versions in descending order. + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + platforms = list(platforms or platform_tags()) + abis = list(abis) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. After the latest version, the major-only version will be yielded, and then - all following versions up to 'end'. + all previous versions of that major version. """ - yield "py{major}{minor}".format(major=py_version[0], minor=py_version[1]) - yield "py{major}".format(major=py_version[0]) - for minor in range(py_version[1] - 1, -1, -1): - yield "py{major}{minor}".format(major=py_version[0], minor=minor) + if len(py_version) > 1: + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield f"py{_version_nodot((py_version[0], minor))}" -def _independent_tags(interpreter, py_version, platforms): +def compatible_tags( + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: """ - Return the sequence of tags that are consistent across implementations. + Yields the sequence of tags that are compatible with a specific version of Python. The tags consist of: - py*-none- - - -none-any + - -none-any # ... if `interpreter` is provided. - py*-none-any """ - for version in _py_interpreter_range(py_version): + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): for platform_ in platforms: yield Tag(version, "none", platform_) - yield Tag(interpreter, "none", "any") - for version in _py_interpreter_range(py_version): + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): yield Tag(version, "none", "any") -def _mac_arch(arch, is_32bit=_32_BIT_INTERPRETER): +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: if not is_32bit: return arch @@ -213,7 +310,7 @@ def _mac_arch(arch, is_32bit=_32_BIT_INTERPRETER): return "i386" -def _mac_binary_formats(version, cpu_arch): +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: formats = [cpu_arch] if cpu_arch == "x86_64": if version < (10, 4): @@ -236,169 +333,155 @@ def _mac_binary_formats(version, cpu_arch): return [] formats.extend(["fat32", "fat"]) - formats.append("universal") + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + return formats -def _mac_platforms(version=None, arch=None): +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ version_str, _, cpu_arch = platform.mac_ver() if version is None: - version = tuple(map(int, version_str.split(".")[:2])) + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version if arch is None: arch = _mac_arch(cpu_arch) - platforms = [] - for minor_version in range(version[1], -1, -1): - compat_version = version[0], minor_version - binary_formats = _mac_binary_formats(compat_version, arch) - for binary_format in binary_formats: - platforms.append( - "macosx_{major}_{minor}_{binary_format}".format( + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( major=compat_version[0], minor=compat_version[1], binary_format=binary_format, ) - ) - return platforms -# From PEP 513. -def _is_manylinux_compatible(name, glibc_version): - # Check for presence of _manylinux module. - try: - import _manylinux - - return bool(getattr(_manylinux, name + "_compatible")) - except (ImportError, AttributeError): - # Fall through to heuristic check below. - pass - - return _have_compatible_glibc(*glibc_version) +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv7l" + _, arch = linux.split("_", 1) + yield from _manylinux.platform_tags(linux, arch) + yield from _musllinux.platform_tags(arch) + yield linux -def _glibc_version_string(): - # Returns glibc version string, or None if not using glibc. - import ctypes - - # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen - # manpage says, "If filename is NULL, then the returned handle is for the - # main program". This way we can let the linker do the work to figure out - # which libc our process is actually using. - process_namespace = ctypes.CDLL(None) - try: - gnu_get_libc_version = process_namespace.gnu_get_libc_version - except AttributeError: - # Symbol doesn't exist -> therefore, we are not linked to - # glibc. - return None - - # Call gnu_get_libc_version, which returns a string like "2.5" - gnu_get_libc_version.restype = ctypes.c_char_p - version_str = gnu_get_libc_version() - # py2 / py3 compatibility: - if not isinstance(version_str, str): - version_str = version_str.decode("ascii") - - return version_str +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) -# Separated out from have_compatible_glibc for easier unit testing. -def _check_glibc_version(version_str, required_major, minimum_minor): - # Parse string and check against requested version. - # - # We use a regexp instead of str.split because we want to discard any - # random junk that might come after the minor version -- this might happen - # in patched/forked versions of glibc (e.g. Linaro's version of glibc - # uses version strings like "2.20-2014.11"). See gh-3588. - m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) - if not m: - warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, - RuntimeWarning, - ) - return False - return ( - int(m.group("major")) == required_major - and int(m.group("minor")) >= minimum_minor - ) - - -def _have_compatible_glibc(required_major, minimum_minor): - version_str = _glibc_version_string() - if version_str is None: - return False - return _check_glibc_version(version_str, required_major, minimum_minor) - - -def _linux_platforms(is_32bit=_32_BIT_INTERPRETER): - linux = _normalize_string(distutils.util.get_platform()) - if linux == "linux_x86_64" and is_32bit: - linux = "linux_i686" - manylinux_support = ( - ("manylinux2014", (2, 17)), # CentOS 7 w/ glibc 2.17 (PEP 599) - ("manylinux2010", (2, 12)), # CentOS 6 w/ glibc 2.12 (PEP 571) - ("manylinux1", (2, 5)), # CentOS 5 w/ glibc 2.5 (PEP 513) - ) - manylinux_support_iter = iter(manylinux_support) - for name, glibc_version in manylinux_support_iter: - if _is_manylinux_compatible(name, glibc_version): - platforms = [linux.replace("linux", name)] - break +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() else: - platforms = [] - # Support for a later manylinux implies support for an earlier version. - platforms += [linux.replace("linux", name) for name, _ in manylinux_support_iter] - platforms.append(linux) - return platforms + return _generic_platforms() -def _generic_platforms(): - platform = _normalize_string(distutils.util.get_platform()) - return [platform] - - -def _interpreter_name(): - name = platform.python_implementation().lower() +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + """ + name = sys.implementation.name return INTERPRETER_SHORT_NAMES.get(name) or name -def _generic_interpreter(name, py_version): - version = sysconfig.get_config_var("py_version_nodot") - if not version: - version = "".join(map(str, py_version[:2])) - return "{name}{version}".format(name=name, version=version) +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version -def sys_tags(): +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: """ Returns the sequence of tag triples for the running interpreter. The order of the sequence corresponds to priority order for the interpreter, from most to least important. """ - py_version = sys.version_info[:2] - interpreter_name = _interpreter_name() - if platform.system() == "Darwin": - platforms = _mac_platforms() - elif platform.system() == "Linux": - platforms = _linux_platforms() - else: - platforms = _generic_platforms() - if interpreter_name == "cp": - interpreter = _cpython_interpreter(py_version) - abis = _cpython_abis(py_version) - for tag in _cpython_tags(py_version, interpreter, abis, platforms): - yield tag - elif interpreter_name == "pp": - interpreter = _pypy_interpreter() - abi = _generic_abi() - for tag in _pypy_tags(py_version, interpreter, abi, platforms): - yield tag + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) else: - interpreter = _generic_interpreter(interpreter_name, py_version) - abi = _generic_abi() - for tag in _generic_tags(interpreter, py_version, abi, platforms): - yield tag - for tag in _independent_tags(interpreter, py_version, platforms): - yield tag + yield from generic_tags() + + if interp_name == "pp": + yield from compatible_tags(interpreter="pp3") + else: + yield from compatible_tags() diff --git a/libs/common/setuptools/_vendor/packaging/utils.py b/libs/common/setuptools/_vendor/packaging/utils.py index 88418786..bab11b80 100644 --- a/libs/common/setuptools/_vendor/packaging/utils.py +++ b/libs/common/setuptools/_vendor/packaging/utils.py @@ -1,57 +1,136 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import re +from typing import FrozenSet, NewType, Tuple, Union, cast +from .tags import Tag, parse_tag from .version import InvalidVersion, Version +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + _canonicalize_regex = re.compile(r"[-_.]+") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name): +def canonicalize_name(name: str) -> NormalizedName: # This is taken from PEP 503. - return _canonicalize_regex.sub("-", name).lower() + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) -def canonicalize_version(version): +def canonicalize_version(version: Union[Version, str]) -> str: """ - This is very similar to Version.__str__, but has one subtle differences + This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. """ - - try: - version = Version(version) - except InvalidVersion: - # Legacy versions cannot be normalized - return version + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version parts = [] # Epoch - if version.epoch != 0: - parts.append("{0}!".format(version.epoch)) + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") # Release segment # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in version.release))) + parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) # Pre-release - if version.pre is not None: - parts.append("".join(str(x) for x in version.pre)) + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) # Post-release - if version.post is not None: - parts.append(".post{0}".format(version.post)) + if parsed.post is not None: + parts.append(f".post{parsed.post}") # Development release - if version.dev is not None: - parts.append(".dev{0}".format(version.dev)) + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") # Local version segment - if version.local is not None: - parts.append("+{0}".format(version.local)) + if parsed.local is not None: + parts.append(f"+{parsed.local}") return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + version = Version(parts[1]) + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + version = Version(version_part) + return (name, version) diff --git a/libs/common/setuptools/_vendor/packaging/version.py b/libs/common/setuptools/_vendor/packaging/version.py index 95157a1f..de9a09a4 100644 --- a/libs/common/setuptools/_vendor/packaging/version.py +++ b/libs/common/setuptools/_vendor/packaging/version.py @@ -1,24 +1,45 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import collections import itertools import re +import warnings +from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union -from ._structures import Infinity - +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType __all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +InfiniteTypes = Union[InfinityType, NegativeInfinityType] +PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] +SubLocalType = Union[InfiniteTypes, int, str] +LocalType = Union[ + NegativeInfinityType, + Tuple[ + Union[ + SubLocalType, + Tuple[SubLocalType, str], + Tuple[NegativeInfinityType, SubLocalType], + ], + ..., + ], +] +CmpKey = Tuple[ + int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType +] +LegacyCmpKey = Tuple[int, Tuple[str, ...]] +VersionComparisonMethod = Callable[ + [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool +] _Version = collections.namedtuple( "_Version", ["epoch", "release", "dev", "pre", "post", "local"] ) -def parse(version): +def parse(version: str) -> Union["LegacyVersion", "Version"]: """ Parse the given version string and return either a :class:`Version` object or a :class:`LegacyVersion` object depending on if the given version is @@ -36,88 +57,111 @@ class InvalidVersion(ValueError): """ -class _BaseVersion(object): - def __hash__(self): +class _BaseVersion: + _key: Union[CmpKey, LegacyCmpKey] + + def __hash__(self) -> int: return hash(self._key) - def __lt__(self, other): - return self._compare(other, lambda s, o: s < o) - - def __le__(self, other): - return self._compare(other, lambda s, o: s <= o) - - def __eq__(self, other): - return self._compare(other, lambda s, o: s == o) - - def __ge__(self, other): - return self._compare(other, lambda s, o: s >= o) - - def __gt__(self, other): - return self._compare(other, lambda s, o: s > o) - - def __ne__(self, other): - return self._compare(other, lambda s, o: s != o) - - def _compare(self, other, method): + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: if not isinstance(other, _BaseVersion): return NotImplemented - return method(self._key, other._key) + return self._key < other._key + + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key class LegacyVersion(_BaseVersion): - def __init__(self, version): + def __init__(self, version: str) -> None: self._version = str(version) self._key = _legacy_cmpkey(self._version) - def __str__(self): + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def __str__(self) -> str: return self._version - def __repr__(self): - return "".format(repr(str(self))) + def __repr__(self) -> str: + return f"" @property - def public(self): + def public(self) -> str: return self._version @property - def base_version(self): + def base_version(self) -> str: return self._version @property - def epoch(self): + def epoch(self) -> int: return -1 @property - def release(self): + def release(self) -> None: return None @property - def pre(self): + def pre(self) -> None: return None @property - def post(self): + def post(self) -> None: return None @property - def dev(self): + def dev(self) -> None: return None @property - def local(self): + def local(self) -> None: return None @property - def is_prerelease(self): + def is_prerelease(self) -> bool: return False @property - def is_postrelease(self): + def is_postrelease(self) -> bool: return False @property - def is_devrelease(self): + def is_devrelease(self) -> bool: return False @@ -132,7 +176,7 @@ _legacy_version_replacement_map = { } -def _parse_version_parts(s): +def _parse_version_parts(s: str) -> Iterator[str]: for part in _legacy_version_component_re.split(s): part = _legacy_version_replacement_map.get(part, part) @@ -149,7 +193,8 @@ def _parse_version_parts(s): yield "*final" -def _legacy_cmpkey(version): +def _legacy_cmpkey(version: str) -> LegacyCmpKey: + # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch # greater than or equal to 0. This will effectively put the LegacyVersion, # which uses the defacto standard originally implemented by setuptools, @@ -158,7 +203,7 @@ def _legacy_cmpkey(version): # This scheme is taken from pkg_resources.parse_version setuptools prior to # it's adoption of the packaging library. - parts = [] + parts: List[str] = [] for part in _parse_version_parts(version.lower()): if part.startswith("*"): # remove "-" before a prerelease tag @@ -171,9 +216,8 @@ def _legacy_cmpkey(version): parts.pop() parts.append(part) - parts = tuple(parts) - return epoch, parts + return epoch, tuple(parts) # Deliberately not anchored to the start and end of the string, to make it @@ -214,11 +258,12 @@ class Version(_BaseVersion): _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) - def __init__(self, version): + def __init__(self, version: str) -> None: + # Validate the version and parse it into pieces match = self._regex.search(version) if not match: - raise InvalidVersion("Invalid version: '{0}'".format(version)) + raise InvalidVersion(f"Invalid version: '{version}'") # Store the parsed out pieces of the version self._version = _Version( @@ -242,15 +287,15 @@ class Version(_BaseVersion): self._version.local, ) - def __repr__(self): - return "".format(repr(str(self))) + def __repr__(self) -> str: + return f"" - def __str__(self): + def __str__(self) -> str: parts = [] # Epoch if self.epoch != 0: - parts.append("{0}!".format(self.epoch)) + parts.append(f"{self.epoch}!") # Release segment parts.append(".".join(str(x) for x in self.release)) @@ -261,56 +306,59 @@ class Version(_BaseVersion): # Post-release if self.post is not None: - parts.append(".post{0}".format(self.post)) + parts.append(f".post{self.post}") # Development release if self.dev is not None: - parts.append(".dev{0}".format(self.dev)) + parts.append(f".dev{self.dev}") # Local version segment if self.local is not None: - parts.append("+{0}".format(self.local)) + parts.append(f"+{self.local}") return "".join(parts) @property - def epoch(self): - return self._version.epoch + def epoch(self) -> int: + _epoch: int = self._version.epoch + return _epoch @property - def release(self): - return self._version.release + def release(self) -> Tuple[int, ...]: + _release: Tuple[int, ...] = self._version.release + return _release @property - def pre(self): - return self._version.pre + def pre(self) -> Optional[Tuple[str, int]]: + _pre: Optional[Tuple[str, int]] = self._version.pre + return _pre @property - def post(self): + def post(self) -> Optional[int]: return self._version.post[1] if self._version.post else None @property - def dev(self): + def dev(self) -> Optional[int]: return self._version.dev[1] if self._version.dev else None @property - def local(self): + def local(self) -> Optional[str]: if self._version.local: return ".".join(str(x) for x in self._version.local) else: return None @property - def public(self): + def public(self) -> str: return str(self).split("+", 1)[0] @property - def base_version(self): + def base_version(self) -> str: parts = [] # Epoch if self.epoch != 0: - parts.append("{0}!".format(self.epoch)) + parts.append(f"{self.epoch}!") # Release segment parts.append(".".join(str(x) for x in self.release)) @@ -318,19 +366,34 @@ class Version(_BaseVersion): return "".join(parts) @property - def is_prerelease(self): + def is_prerelease(self) -> bool: return self.dev is not None or self.pre is not None @property - def is_postrelease(self): + def is_postrelease(self) -> bool: return self.post is not None @property - def is_devrelease(self): + def is_devrelease(self) -> bool: return self.dev is not None + @property + def major(self) -> int: + return self.release[0] if len(self.release) >= 1 else 0 + + @property + def minor(self) -> int: + return self.release[1] if len(self.release) >= 2 else 0 + + @property + def micro(self) -> int: + return self.release[2] if len(self.release) >= 3 else 0 + + +def _parse_letter_version( + letter: str, number: Union[str, bytes, SupportsInt] +) -> Optional[Tuple[str, int]]: -def _parse_letter_version(letter, number): if letter: # We consider there to be an implicit 0 in a pre-release if there is # not a numeral associated with it. @@ -360,11 +423,13 @@ def _parse_letter_version(letter, number): return letter, int(number) + return None + _local_version_separators = re.compile(r"[\._-]") -def _parse_local_version(local): +def _parse_local_version(local: str) -> Optional[LocalType]: """ Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve"). """ @@ -373,15 +438,24 @@ def _parse_local_version(local): part.lower() if not part.isdigit() else int(part) for part in _local_version_separators.split(local) ) + return None -def _cmpkey(epoch, release, pre, post, dev, local): +def _cmpkey( + epoch: int, + release: Tuple[int, ...], + pre: Optional[Tuple[str, int]], + post: Optional[Tuple[str, int]], + dev: Optional[Tuple[str, int]], + local: Optional[Tuple[SubLocalType]], +) -> CmpKey: + # When we compare a release version, we want to compare it with all of the # trailing zeros removed. So we'll use a reverse the list, drop all the now # leading zeros until we come to something non zero, then take the rest # re-reverse it back into the correct order and make it a tuple and use # that for our sorting key. - release = tuple( + _release = tuple( reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release)))) ) @@ -390,23 +464,31 @@ def _cmpkey(epoch, release, pre, post, dev, local): # if there is not a pre or a post segment. If we have one of those then # the normal sorting rules will handle this case correctly. if pre is None and post is None and dev is not None: - pre = -Infinity + _pre: PrePostDevType = NegativeInfinity # Versions without a pre-release (except as noted above) should sort after # those with one. elif pre is None: - pre = Infinity + _pre = Infinity + else: + _pre = pre # Versions without a post segment should sort before those with one. if post is None: - post = -Infinity + _post: PrePostDevType = NegativeInfinity + + else: + _post = post # Versions without a development segment should sort after those with one. if dev is None: - dev = Infinity + _dev: PrePostDevType = Infinity + + else: + _dev = dev if local is None: # Versions without a local segment should sort before those with one. - local = -Infinity + _local: LocalType = NegativeInfinity else: # Versions with a local segment need that segment parsed to implement # the sorting rules in PEP440. @@ -415,6 +497,8 @@ def _cmpkey(epoch, release, pre, post, dev, local): # - Numeric segments sort numerically # - Shorter versions sort before longer versions when the prefixes # match exactly - local = tuple((i, "") if isinstance(i, int) else (-Infinity, i) for i in local) + _local = tuple( + (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local + ) - return epoch, release, pre, post, dev, local + return epoch, _release, _pre, _post, _dev, _local diff --git a/libs/common/setuptools/_vendor/pyparsing.py b/libs/common/setuptools/_vendor/pyparsing.py deleted file mode 100644 index 4aa30ee6..00000000 --- a/libs/common/setuptools/_vendor/pyparsing.py +++ /dev/null @@ -1,5742 +0,0 @@ -# module pyparsing.py -# -# Copyright (c) 2003-2018 Paul T. McGuire -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -__doc__ = \ -""" -pyparsing module - Classes and methods to define and execute parsing grammars -============================================================================= - -The pyparsing module is an alternative approach to creating and executing simple grammars, -vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module -provides a library of classes that you use to construct the grammar directly in Python. - -Here is a program to parse "Hello, World!" (or any greeting of the form -C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements -(L{'+'} operator gives L{And} expressions, strings are auto-converted to -L{Literal} expressions):: - - from pyparsing import Word, alphas - - # define grammar of a greeting - greet = Word(alphas) + "," + Word(alphas) + "!" - - hello = "Hello, World!" - print (hello, "->", greet.parseString(hello)) - -The program outputs the following:: - - Hello, World! -> ['Hello', ',', 'World', '!'] - -The Python representation of the grammar is quite readable, owing to the self-explanatory -class names, and the use of '+', '|' and '^' operators. - -The L{ParseResults} object returned from L{ParserElement.parseString} can be accessed as a nested list, a dictionary, or an -object with named attributes. - -The pyparsing module handles some of the problems that are typically vexing when writing text parsers: - - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) - - quoted strings - - embedded comments - - -Getting Started - ------------------ -Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing -classes inherit from. Use the docstrings for examples of how to: - - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes - - construct character word-group expressions using the L{Word} class - - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes - - use L{'+'}, L{'|'}, L{'^'}, and L{'&'} operators to combine simple expressions into more complex ones - - associate names with your parsed results using L{ParserElement.setResultsName} - - find some helpful expression short-cuts like L{delimitedList} and L{oneOf} - - find more useful common expressions in the L{pyparsing_common} namespace class -""" - -__version__ = "2.2.1" -__versionTime__ = "18 Sep 2018 00:49 UTC" -__author__ = "Paul McGuire " - -import string -from weakref import ref as wkref -import copy -import sys -import warnings -import re -import sre_constants -import collections -import pprint -import traceback -import types -from datetime import datetime - -try: - from _thread import RLock -except ImportError: - from threading import RLock - -try: - # Python 3 - from collections.abc import Iterable - from collections.abc import MutableMapping -except ImportError: - # Python 2.7 - from collections import Iterable - from collections import MutableMapping - -try: - from collections import OrderedDict as _OrderedDict -except ImportError: - try: - from ordereddict import OrderedDict as _OrderedDict - except ImportError: - _OrderedDict = None - -#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) - -__all__ = [ -'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', -'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', -'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', -'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', -'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', -'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', -'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', -'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', -'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', -'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', -'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', -'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', -'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', -'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', -'CloseMatch', 'tokenMap', 'pyparsing_common', -] - -system_version = tuple(sys.version_info)[:3] -PY_3 = system_version[0] == 3 -if PY_3: - _MAX_INT = sys.maxsize - basestring = str - unichr = chr - _ustr = str - - # build list of single arg builtins, that can be used as parse actions - singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] - -else: - _MAX_INT = sys.maxint - range = xrange - - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - if isinstance(obj,unicode): - return obj - - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) - - except UnicodeEncodeError: - # Else encode it - ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') - xmlcharref = Regex(r'&#\d+;') - xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) - return xmlcharref.transformString(ret) - - # build list of single arg builtins, tolerant of Python version, that can be used as parse actions - singleArgBuiltins = [] - import __builtin__ - for fname in "sum len sorted reversed list tuple set any all min max".split(): - try: - singleArgBuiltins.append(getattr(__builtin__,fname)) - except AttributeError: - continue - -_generatorType = type((y for y in range(1))) - -def _xml_escape(data): - """Escape &, <, >, ", ', etc. in a string of data.""" - - # ampersand must be replaced first - from_symbols = '&><"\'' - to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) - for from_,to_ in zip(from_symbols, to_symbols): - data = data.replace(from_, to_) - return data - -class _Constants(object): - pass - -alphas = string.ascii_uppercase + string.ascii_lowercase -nums = "0123456789" -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = chr(92) -printables = "".join(c for c in string.printable if c not in string.whitespace) - -class ParseBaseException(Exception): - """base exception class for all parsing runtime exceptions""" - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, pstr, loc=0, msg=None, elem=None ): - self.loc = loc - if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr - self.parserElement = elem - self.args = (pstr, loc, msg) - - @classmethod - def _from_exception(cls, pe): - """ - internal factory method to simplify creating one type of ParseException - from another - avoids having __init__ signature conflicts among subclasses - """ - return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) - - def __getattr__( self, aname ): - """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - if( aname == "lineno" ): - return lineno( self.loc, self.pstr ) - elif( aname in ("col", "column") ): - return col( self.loc, self.pstr ) - elif( aname == "line" ): - return line( self.loc, self.pstr ) - else: - raise AttributeError(aname) - - def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % \ - ( self.msg, self.loc, self.lineno, self.column ) - def __repr__( self ): - return _ustr(self) - def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. - """ - line_str = self.line - line_column = self.column - 1 - if markerString: - line_str = "".join((line_str[:line_column], - markerString, line_str[line_column:])) - return line_str.strip() - def __dir__(self): - return "lineno col line".split() + dir(type(self)) - -class ParseException(ParseBaseException): - """ - Exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - - Example:: - try: - Word(nums).setName("integer").parseString("ABC") - except ParseException as pe: - print(pe) - print("column: {}".format(pe.col)) - - prints:: - Expected integer (at char 0), (line:1, col:1) - column: 1 - """ - pass - -class ParseFatalException(ParseBaseException): - """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" - pass - -class ParseSyntaxException(ParseFatalException): - """just like L{ParseFatalException}, but thrown internally when an - L{ErrorStop} ('-' operator) indicates that parsing is to stop - immediately because an unbacktrackable syntax error has been found""" - pass - -#~ class ReparseException(ParseBaseException): - #~ """Experimental class - parse actions can raise this exception to cause - #~ pyparsing to reparse the input string: - #~ - with a modified input string, and/or - #~ - with a modified start location - #~ Set the values of the ReparseException in the constructor, and raise the - #~ exception in a parse action to cause pyparsing to use the new string/location. - #~ Setting the values as None causes no change to be made. - #~ """ - #~ def __init_( self, newstring, restartLoc ): - #~ self.newParseText = newstring - #~ self.reparseLoc = restartLoc - -class RecursiveGrammarException(Exception): - """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" - def __init__( self, parseElementList ): - self.parseElementTrace = parseElementList - - def __str__( self ): - return "RecursiveGrammarException: %s" % self.parseElementTrace - -class _ParseResultsWithOffset(object): - def __init__(self,p1,p2): - self.tup = (p1,p2) - def __getitem__(self,i): - return self.tup[i] - def __repr__(self): - return repr(self.tup[0]) - def setOffset(self,i): - self.tup = (self.tup[0],i) - -class ParseResults(object): - """ - Structured parse results, to provide multiple means of access to the parsed data: - - as a list (C{len(results)}) - - by list index (C{results[0], results[1]}, etc.) - - by attribute (C{results.} - see L{ParserElement.setResultsName}) - - Example:: - integer = Word(nums) - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' - + integer.setResultsName("day")) - # equivalent form: - # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - # parseString returns a ParseResults object - result = date_str.parseString("1999/12/31") - - def test(s, fn=repr): - print("%s -> %s" % (s, fn(eval(s)))) - test("list(result)") - test("result[0]") - test("result['month']") - test("result.day") - test("'month' in result") - test("'minutes' in result") - test("result.dump()", str) - prints:: - list(result) -> ['1999', '/', '12', '/', '31'] - result[0] -> '1999' - result['month'] -> '12' - result.day -> '31' - 'month' in result -> True - 'minutes' in result -> False - result.dump() -> ['1999', '/', '12', '/', '31'] - - day: 31 - - month: 12 - - year: 1999 - """ - def __new__(cls, toklist=None, name=None, asList=True, modal=True ): - if isinstance(toklist, cls): - return toklist - retobj = object.__new__(cls) - retobj.__doinit = True - return retobj - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): - if self.__doinit: - self.__doinit = False - self.__name = None - self.__parent = None - self.__accumNames = {} - self.__asList = asList - self.__modal = modal - if toklist is None: - toklist = [] - if isinstance(toklist, list): - self.__toklist = toklist[:] - elif isinstance(toklist, _generatorType): - self.__toklist = list(toklist) - else: - self.__toklist = [toklist] - self.__tokdict = dict() - - if name is not None and name: - if not modal: - self.__accumNames[name] = 0 - if isinstance(name,int): - name = _ustr(name) # will always return a str, but use _ustr for consistency - self.__name = name - if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): - if isinstance(toklist,basestring): - toklist = [ toklist ] - if asList: - if isinstance(toklist,ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(),0) - else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) - self[name].__name = name - else: - try: - self[name] = toklist[0] - except (KeyError,TypeError,IndexError): - self[name] = toklist - - def __getitem__( self, i ): - if isinstance( i, (int,slice) ): - return self.__toklist[i] - else: - if i not in self.__accumNames: - return self.__tokdict[i][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[i] ]) - - def __setitem__( self, k, v, isinstance=isinstance ): - if isinstance(v,_ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] - sub = v[0] - elif isinstance(k,(int,slice)): - self.__toklist[k] = v - sub = v - else: - self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] - sub = v - if isinstance(sub,ParseResults): - sub.__parent = wkref(self) - - def __delitem__( self, i ): - if isinstance(i,(int,slice)): - mylen = len( self.__toklist ) - del self.__toklist[i] - - # convert int to slice - if isinstance(i, int): - if i < 0: - i += mylen - i = slice(i, i+1) - # get removed indices - removed = list(range(*i.indices(mylen))) - removed.reverse() - # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): - for j in removed: - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) - else: - del self.__tokdict[i] - - def __contains__( self, k ): - return k in self.__tokdict - - def __len__( self ): return len( self.__toklist ) - def __bool__(self): return ( not not self.__toklist ) - __nonzero__ = __bool__ - def __iter__( self ): return iter( self.__toklist ) - def __reversed__( self ): return iter( self.__toklist[::-1] ) - def _iterkeys( self ): - if hasattr(self.__tokdict, "iterkeys"): - return self.__tokdict.iterkeys() - else: - return iter(self.__tokdict) - - def _itervalues( self ): - return (self[k] for k in self._iterkeys()) - - def _iteritems( self ): - return ((k, self[k]) for k in self._iterkeys()) - - if PY_3: - keys = _iterkeys - """Returns an iterator of all named result keys (Python 3.x only).""" - - values = _itervalues - """Returns an iterator of all named result values (Python 3.x only).""" - - items = _iteritems - """Returns an iterator of all named result key-value tuples (Python 3.x only).""" - - else: - iterkeys = _iterkeys - """Returns an iterator of all named result keys (Python 2.x only).""" - - itervalues = _itervalues - """Returns an iterator of all named result values (Python 2.x only).""" - - iteritems = _iteritems - """Returns an iterator of all named result key-value tuples (Python 2.x only).""" - - def keys( self ): - """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" - return list(self.iterkeys()) - - def values( self ): - """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" - return list(self.itervalues()) - - def items( self ): - """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" - return list(self.iteritems()) - - def haskeys( self ): - """Since keys() returns an iterator, this method is helpful in bypassing - code that looks for the existence of any defined results names.""" - return bool(self.__tokdict) - - def pop( self, *args, **kwargs): - """ - Removes and returns item at specified index (default=C{last}). - Supports both C{list} and C{dict} semantics for C{pop()}. If passed no - argument or an integer argument, it will use C{list} semantics - and pop tokens from the list of parsed tokens. If passed a - non-integer argument (most likely a string), it will use C{dict} - semantics and pop the corresponding value from any defined - results names. A second default return value argument is - supported, just as in C{dict.pop()}. - - Example:: - def remove_first(tokens): - tokens.pop(0) - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] - - label = Word(alphas) - patt = label("LABEL") + OneOrMore(Word(nums)) - print(patt.parseString("AAB 123 321").dump()) - - # Use pop() in a parse action to remove named result (note that corresponding value is not - # removed from list form of results) - def remove_LABEL(tokens): - tokens.pop("LABEL") - return tokens - patt.addParseAction(remove_LABEL) - print(patt.parseString("AAB 123 321").dump()) - prints:: - ['AAB', '123', '321'] - - LABEL: AAB - - ['AAB', '123', '321'] - """ - if not args: - args = [-1] - for k,v in kwargs.items(): - if k == 'default': - args = (args[0], v) - else: - raise TypeError("pop() got an unexpected keyword argument '%s'" % k) - if (isinstance(args[0], int) or - len(args) == 1 or - args[0] in self): - index = args[0] - ret = self[index] - del self[index] - return ret - else: - defaultvalue = args[1] - return defaultvalue - - def get(self, key, defaultValue=None): - """ - Returns named result matching the given key, or if there is no - such name, then returns the given C{defaultValue} or C{None} if no - C{defaultValue} is specified. - - Similar to C{dict.get()}. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString("1999/12/31") - print(result.get("year")) # -> '1999' - print(result.get("hour", "not specified")) # -> 'not specified' - print(result.get("hour")) # -> None - """ - if key in self: - return self[key] - else: - return defaultValue - - def insert( self, index, insStr ): - """ - Inserts new element at location index in the list of parsed tokens. - - Similar to C{list.insert()}. - - Example:: - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to insert the parse location in the front of the parsed results - def insert_locn(locn, tokens): - tokens.insert(0, locn) - print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] - """ - self.__toklist.insert(index, insStr) - # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) - - def append( self, item ): - """ - Add single element to end of ParseResults list of elements. - - Example:: - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to compute the sum of the parsed integers, and add it to the end - def append_sum(tokens): - tokens.append(sum(map(int, tokens))) - print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] - """ - self.__toklist.append(item) - - def extend( self, itemseq ): - """ - Add sequence of elements to end of ParseResults list of elements. - - Example:: - patt = OneOrMore(Word(alphas)) - - # use a parse action to append the reverse of the matched strings, to make a palindrome - def make_palindrome(tokens): - tokens.extend(reversed([t[::-1] for t in tokens])) - return ''.join(tokens) - print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' - """ - if isinstance(itemseq, ParseResults): - self += itemseq - else: - self.__toklist.extend(itemseq) - - def clear( self ): - """ - Clear all elements and results names. - """ - del self.__toklist[:] - self.__tokdict.clear() - - def __getattr__( self, name ): - try: - return self[name] - except KeyError: - return "" - - if name in self.__tokdict: - if name not in self.__accumNames: - return self.__tokdict[name][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[name] ]) - else: - return "" - - def __add__( self, other ): - ret = self.copy() - ret += other - return ret - - def __iadd__( self, other ): - if other.__tokdict: - offset = len(self.__toklist) - addoffset = lambda a: offset if a<0 else a+offset - otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) - for (k,vlist) in otheritems for v in vlist] - for k,v in otherdictitems: - self[k] = v - if isinstance(v[0],ParseResults): - v[0].__parent = wkref(self) - - self.__toklist += other.__toklist - self.__accumNames.update( other.__accumNames ) - return self - - def __radd__(self, other): - if isinstance(other,int) and other == 0: - # useful for merging many ParseResults using sum() builtin - return self.copy() - else: - # this may raise a TypeError - so be it - return other + self - - def __repr__( self ): - return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - - def __str__( self ): - return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' - - def _asStringList( self, sep='' ): - out = [] - for item in self.__toklist: - if out and sep: - out.append(sep) - if isinstance( item, ParseResults ): - out += item._asStringList() - else: - out.append( _ustr(item) ) - return out - - def asList( self ): - """ - Returns the parse results as a nested list of matching tokens, all converted to strings. - - Example:: - patt = OneOrMore(Word(alphas)) - result = patt.parseString("sldkj lsdkj sldkj") - # even though the result prints in string-like form, it is actually a pyparsing ParseResults - print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] - - # Use asList() to create an actual list - result_list = result.asList() - print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] - """ - return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] - - def asDict( self ): - """ - Returns the named parse results as a nested dictionary. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString('12/31/1999') - print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) - - result_dict = result.asDict() - print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} - - # even though a ParseResults supports dict-like access, sometime you just need to have a dict - import json - print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable - print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} - """ - if PY_3: - item_fn = self.items - else: - item_fn = self.iteritems - - def toItem(obj): - if isinstance(obj, ParseResults): - if obj.haskeys(): - return obj.asDict() - else: - return [toItem(v) for v in obj] - else: - return obj - - return dict((k,toItem(v)) for k,v in item_fn()) - - def copy( self ): - """ - Returns a new copy of a C{ParseResults} object. - """ - ret = ParseResults( self.__toklist ) - ret.__tokdict = self.__tokdict.copy() - ret.__parent = self.__parent - ret.__accumNames.update( self.__accumNames ) - ret.__name = self.__name - return ret - - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): - """ - (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. - """ - nl = "\n" - out = [] - namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() - for v in vlist) - nextLevelIndent = indent + " " - - # collapse out indents if formatting is not desired - if not formatted: - indent = "" - nextLevelIndent = "" - nl = "" - - selfTag = None - if doctag is not None: - selfTag = doctag - else: - if self.__name: - selfTag = self.__name - - if not selfTag: - if namedItemsOnly: - return "" - else: - selfTag = "ITEM" - - out += [ nl, indent, "<", selfTag, ">" ] - - for i,res in enumerate(self.__toklist): - if isinstance(res,ParseResults): - if i in namedItems: - out += [ res.asXML(namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - out += [ res.asXML(None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - # individual token, see if there is a name for it - resTag = None - if i in namedItems: - resTag = namedItems[i] - if not resTag: - if namedItemsOnly: - continue - else: - resTag = "ITEM" - xmlBodyText = _xml_escape(_ustr(res)) - out += [ nl, nextLevelIndent, "<", resTag, ">", - xmlBodyText, - "" ] - - out += [ nl, indent, "" ] - return "".join(out) - - def __lookup(self,sub): - for k,vlist in self.__tokdict.items(): - for v,loc in vlist: - if sub is v: - return k - return None - - def getName(self): - r""" - Returns the results name for this token expression. Useful when several - different expressions might match at a particular location. - - Example:: - integer = Word(nums) - ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") - house_number_expr = Suppress('#') + Word(nums, alphanums) - user_data = (Group(house_number_expr)("house_number") - | Group(ssn_expr)("ssn") - | Group(integer)("age")) - user_info = OneOrMore(user_data) - - result = user_info.parseString("22 111-22-3333 #221B") - for item in result: - print(item.getName(), ':', item[0]) - prints:: - age : 22 - ssn : 111-22-3333 - house_number : 221B - """ - if self.__name: - return self.__name - elif self.__parent: - par = self.__parent() - if par: - return par.__lookup(self) - else: - return None - elif (len(self) == 1 and - len(self.__tokdict) == 1 and - next(iter(self.__tokdict.values()))[0][1] in (0,-1)): - return next(iter(self.__tokdict.keys())) - else: - return None - - def dump(self, indent='', depth=0, full=True): - """ - Diagnostic method for listing out the contents of a C{ParseResults}. - Accepts an optional C{indent} argument so that this string can be embedded - in a nested display of other data. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString('12/31/1999') - print(result.dump()) - prints:: - ['12', '/', '31', '/', '1999'] - - day: 1999 - - month: 31 - - year: 12 - """ - out = [] - NL = '\n' - out.append( indent+_ustr(self.asList()) ) - if full: - if self.haskeys(): - items = sorted((str(k), v) for k,v in self.items()) - for k,v in items: - if out: - out.append(NL) - out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) - if isinstance(v,ParseResults): - if v: - out.append( v.dump(indent,depth+1) ) - else: - out.append(_ustr(v)) - else: - out.append(repr(v)) - elif any(isinstance(vv,ParseResults) for vv in self): - v = self - for i,vv in enumerate(v): - if isinstance(vv,ParseResults): - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) - else: - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) - - return "".join(out) - - def pprint(self, *args, **kwargs): - """ - Pretty-printer for parsed results as a list, using the C{pprint} module. - Accepts additional positional or keyword args as defined for the - C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) - - Example:: - ident = Word(alphas, alphanums) - num = Word(nums) - func = Forward() - term = ident | num | Group('(' + func + ')') - func <<= ident + Group(Optional(delimitedList(term))) - result = func.parseString("fna a,b,(fnb c,d,200),100") - result.pprint(width=40) - prints:: - ['fna', - ['a', - 'b', - ['(', 'fnb', ['c', 'd', '200'], ')'], - '100']] - """ - pprint.pprint(self.asList(), *args, **kwargs) - - # add support for pickle protocol - def __getstate__(self): - return ( self.__toklist, - ( self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name ) ) - - def __setstate__(self,state): - self.__toklist = state[0] - (self.__tokdict, - par, - inAccumNames, - self.__name) = state[1] - self.__accumNames = {} - self.__accumNames.update(inAccumNames) - if par is not None: - self.__parent = wkref(par) - else: - self.__parent = None - - def __getnewargs__(self): - return self.__toklist, self.__name, self.__asList, self.__modal - - def __dir__(self): - return (dir(type(self)) + list(self.keys())) - -MutableMapping.register(ParseResults) - -def col (loc,strg): - """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - s = strg - return 1 if 0} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n",0,loc) + 1 - -def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ - lastCR = strg.rfind("\n", 0, loc) - nextCR = strg.find("\n", loc) - if nextCR >= 0: - return strg[lastCR+1:nextCR] - else: - return strg[lastCR+1:] - -def _defaultStartDebugAction( instring, loc, expr ): - print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) - -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) - -def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print ("Exception raised:" + _ustr(exc)) - -def nullDebugAction(*args): - """'Do-nothing' debug action, to suppress debugging output during parsing.""" - pass - -# Only works on Python 3.x - nonlocal is toxic to Python 2 installs -#~ 'decorator to trim function calls to match the arity of the target' -#~ def _trim_arity(func, maxargs=3): - #~ if func in singleArgBuiltins: - #~ return lambda s,l,t: func(t) - #~ limit = 0 - #~ foundArity = False - #~ def wrapper(*args): - #~ nonlocal limit,foundArity - #~ while 1: - #~ try: - #~ ret = func(*args[limit:]) - #~ foundArity = True - #~ return ret - #~ except TypeError: - #~ if limit == maxargs or foundArity: - #~ raise - #~ limit += 1 - #~ continue - #~ return wrapper - -# this version is Python 2.x-3.x cross-compatible -'decorator to trim function calls to match the arity of the target' -def _trim_arity(func, maxargs=2): - if func in singleArgBuiltins: - return lambda s,l,t: func(t) - limit = [0] - foundArity = [False] - - # traceback return data structure changed in Py3.5 - normalize back to plain tuples - if system_version[:2] >= (3,5): - def extract_stack(limit=0): - # special handling for Python 3.5.0 - extra deep call stack by 1 - offset = -3 if system_version == (3,5,0) else -2 - frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] - return [frame_summary[:2]] - def extract_tb(tb, limit=0): - frames = traceback.extract_tb(tb, limit=limit) - frame_summary = frames[-1] - return [frame_summary[:2]] - else: - extract_stack = traceback.extract_stack - extract_tb = traceback.extract_tb - - # synthesize what would be returned by traceback.extract_stack at the call to - # user's parse action 'func', so that we don't incur call penalty at parse time - - LINE_DIFF = 6 - # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND - # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! - this_line = extract_stack(limit=2)[-1] - pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) - - def wrapper(*args): - while 1: - try: - ret = func(*args[limit[0]:]) - foundArity[0] = True - return ret - except TypeError: - # re-raise TypeErrors if they did not come from our arity testing - if foundArity[0]: - raise - else: - try: - tb = sys.exc_info()[-1] - if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: - raise - finally: - del tb - - if limit[0] <= maxargs: - limit[0] += 1 - continue - raise - - # copy func name to wrapper for sensible debug output - func_name = "" - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - wrapper.__name__ = func_name - - return wrapper - -class ParserElement(object): - """Abstract base level parser element class.""" - DEFAULT_WHITE_CHARS = " \n\t\r" - verbose_stacktrace = False - - @staticmethod - def setDefaultWhitespaceChars( chars ): - r""" - Overrides the default whitespace chars - - Example:: - # default whitespace chars are space, and newline - OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] - - # change to just treat newline as significant - ParserElement.setDefaultWhitespaceChars(" \t") - OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] - """ - ParserElement.DEFAULT_WHITE_CHARS = chars - - @staticmethod - def inlineLiteralsUsing(cls): - """ - Set class to be used for inclusion of string literals into a parser. - - Example:: - # default literal class used is Literal - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - - # change to Suppress - ParserElement.inlineLiteralsUsing(Suppress) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] - """ - ParserElement._literalStringClass = cls - - def __init__( self, savelist=False ): - self.parseAction = list() - self.failAction = None - #~ self.name = "" # don't define self.name, let subclasses try/except upcall - self.strRepr = None - self.resultsName = None - self.saveAsList = savelist - self.skipWhitespace = True - self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion - self.keepTabs = False - self.ignoreExprs = list() - self.debug = False - self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index - self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = ( None, None, None ) #custom debug actions - self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse - self.callDuringTry = False - - def copy( self ): - """ - Make a copy of this C{ParserElement}. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element. - - Example:: - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") - integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - - print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) - prints:: - [5120, 100, 655360, 268435456] - Equivalent form of C{expr.copy()} is just C{expr()}:: - integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - """ - cpy = copy.copy( self ) - cpy.parseAction = self.parseAction[:] - cpy.ignoreExprs = self.ignoreExprs[:] - if self.copyDefaultWhiteChars: - cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - return cpy - - def setName( self, name ): - """ - Define name for this expression, makes debugging and exception messages clearer. - - Example:: - Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) - Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) - """ - self.name = name - self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): - self.exception.msg = self.errmsg - return self - - def setResultsName( self, name, listAllMatches=False ): - """ - Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original C{ParserElement} object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - - You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - - see L{I{__call__}<__call__>}. - - Example:: - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' - + integer.setResultsName("day")) - - # equivalent form: - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - """ - newself = self.copy() - if name.endswith("*"): - name = name[:-1] - listAllMatches=True - newself.resultsName = name - newself.modalResults = not listAllMatches - return newself - - def setBreak(self,breakFlag = True): - """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set C{breakFlag} to True to enable, False to - disable. - """ - if breakFlag: - _parseMethod = self._parse - def breaker(instring, loc, doActions=True, callPreParse=True): - import pdb - pdb.set_trace() - return _parseMethod( instring, loc, doActions, callPreParse ) - breaker._originalParseMethod = _parseMethod - self._parse = breaker - else: - if hasattr(self._parse,"_originalParseMethod"): - self._parse = self._parse._originalParseMethod - return self - - def setParseAction( self, *fns, **kwargs ): - """ - Define one or more actions to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, - C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Optional keyword arguments: - - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - - Example:: - integer = Word(nums) - date_str = integer + '/' + integer + '/' + integer - - date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - # use parse action to convert to ints at parse time - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - date_str = integer + '/' + integer + '/' + integer - - # note that integer fields are now ints, not strings - date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] - """ - self.parseAction = list(map(_trim_arity, list(fns))) - self.callDuringTry = kwargs.get("callDuringTry", False) - return self - - def addParseAction( self, *fns, **kwargs ): - """ - Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}}. - - See examples in L{I{copy}}. - """ - self.parseAction += list(map(_trim_arity, list(fns))) - self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) - return self - - def addCondition(self, *fns, **kwargs): - """Add a boolean predicate function to expression's list of parse actions. See - L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, - functions passed to C{addCondition} need to return boolean success/fail of the condition. - - Optional keyword arguments: - - message = define a custom message to be used in the raised exception - - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException - - Example:: - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - year_int = integer.copy() - year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") - date_str = year_int + '/' + integer + '/' + integer - - result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) - """ - msg = kwargs.get("message", "failed user-defined condition") - exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException - for fn in fns: - def pa(s,l,t): - if not bool(_trim_arity(fn)(s,l,t)): - raise exc_type(s,l,msg) - self.parseAction.append(pa) - self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) - return self - - def setFailAction( self, fn ): - """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - C{fn(s,loc,expr,err)} where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw C{L{ParseFatalException}} - if it is desired to stop parsing immediately.""" - self.failAction = fn - return self - - def _skipIgnorables( self, instring, loc ): - exprsFound = True - while exprsFound: - exprsFound = False - for e in self.ignoreExprs: - try: - while 1: - loc,dummy = e._parse( instring, loc ) - exprsFound = True - except ParseException: - pass - return loc - - def preParse( self, instring, loc ): - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - - if self.skipWhitespace: - wt = self.whiteChars - instrlen = len(instring) - while loc < instrlen and instring[loc] in wt: - loc += 1 - - return loc - - def parseImpl( self, instring, loc, doActions=True ): - return loc, [] - - def postParse( self, instring, loc, tokenlist ): - return tokenlist - - #~ @profile - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): - debugging = ( self.debug ) #and doActions ) - - if debugging or self.failAction: - #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - try: - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException as err: - #~ print ("Exception raised:", err) - if self.debugActions[2]: - self.debugActions[2]( instring, tokensStart, self, err ) - if self.failAction: - self.failAction( instring, tokensStart, self, err ) - raise - else: - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - if self.mayIndexError or preloc >= len(instring): - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - else: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - - tokens = self.postParse( instring, loc, tokens ) - - retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) - if self.parseAction and (doActions or self.callDuringTry): - if debugging: - try: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - except ParseBaseException as err: - #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - self.debugActions[2]( instring, tokensStart, self, err ) - raise - else: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - if debugging: - #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) - - return loc, retTokens - - def tryParse( self, instring, loc ): - try: - return self._parse( instring, loc, doActions=False )[0] - except ParseFatalException: - raise ParseException( instring, loc, self.errmsg, self) - - def canParseNext(self, instring, loc): - try: - self.tryParse(instring, loc) - except (ParseException, IndexError): - return False - else: - return True - - class _UnboundedCache(object): - def __init__(self): - cache = {} - self.not_in_cache = not_in_cache = object() - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - - def clear(self): - cache.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - if _OrderedDict is not None: - class _FifoCache(object): - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - - cache = _OrderedDict() - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - while len(cache) > size: - try: - cache.popitem(False) - except KeyError: - pass - - def clear(self): - cache.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - else: - class _FifoCache(object): - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - - cache = {} - key_fifo = collections.deque([], size) - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - while len(key_fifo) > size: - cache.pop(key_fifo.popleft(), None) - key_fifo.append(key) - - def clear(self): - cache.clear() - key_fifo.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail - packrat_cache_lock = RLock() - packrat_cache_stats = [0, 0] - - # this method gets repeatedly called during backtracking with the same arguments - - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): - HIT, MISS = 0, 1 - lookup = (self, instring, loc, callPreParse, doActions) - with ParserElement.packrat_cache_lock: - cache = ParserElement.packrat_cache - value = cache.get(lookup) - if value is cache.not_in_cache: - ParserElement.packrat_cache_stats[MISS] += 1 - try: - value = self._parseNoCache(instring, loc, doActions, callPreParse) - except ParseBaseException as pe: - # cache a copy of the exception, without the traceback - cache.set(lookup, pe.__class__(*pe.args)) - raise - else: - cache.set(lookup, (value[0], value[1].copy())) - return value - else: - ParserElement.packrat_cache_stats[HIT] += 1 - if isinstance(value, Exception): - raise value - return (value[0], value[1].copy()) - - _parse = _parseNoCache - - @staticmethod - def resetCache(): - ParserElement.packrat_cache.clear() - ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) - - _packratEnabled = False - @staticmethod - def enablePackrat(cache_size_limit=128): - """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - Parameters: - - cache_size_limit - (default=C{128}) - if an integer value is provided - will limit the size of the packrat cache; if None is passed, then - the cache size will be unbounded; if 0 is passed, the cache will - be effectively disabled. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method C{ParserElement.enablePackrat()}. If - your program uses C{psyco} to "compile as you go", you must call - C{enablePackrat} before calling C{psyco.full()}. If you do not do this, - Python will crash. For best results, call C{enablePackrat()} immediately - after importing pyparsing. - - Example:: - import pyparsing - pyparsing.ParserElement.enablePackrat() - """ - if not ParserElement._packratEnabled: - ParserElement._packratEnabled = True - if cache_size_limit is None: - ParserElement.packrat_cache = ParserElement._UnboundedCache() - else: - ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) - ParserElement._parse = ParserElement._parseCache - - def parseString( self, instring, parseAll=False ): - """ - Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{L{StringEnd()}}). - - Note: C{parseString} implicitly calls C{expandtabs()} on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the C{loc} argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling C{parseWithTabs} on your grammar before calling C{parseString} - (see L{I{parseWithTabs}}) - - define your parse action using the full C{(s,loc,toks)} signature, and - reference the input string using the parse action's C{s} argument - - explictly expand the tabs in your input string before calling - C{parseString} - - Example:: - Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] - Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text - """ - ParserElement.resetCache() - if not self.streamlined: - self.streamline() - #~ self.saveAsList = True - for e in self.ignoreExprs: - e.streamline() - if not self.keepTabs: - instring = instring.expandtabs() - try: - loc, tokens = self._parse( instring, 0 ) - if parseAll: - loc = self.preParse( instring, loc ) - se = Empty() + StringEnd() - se._parse( instring, loc ) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - else: - return tokens - - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): - """ - Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - C{maxMatches} argument, to clip scanning after 'n' matches are found. If - C{overlap} is specified, then overlapping matches will be reported. - - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs. - - Example:: - source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" - print(source) - for tokens,start,end in Word(alphas).scanString(source): - print(' '*start + '^'*(end-start)) - print(' '*start + tokens[0]) - - prints:: - - sldjf123lsdjjkf345sldkjf879lkjsfd987 - ^^^^^ - sldjf - ^^^^^^^ - lsdjjkf - ^^^^^^ - sldkjf - ^^^^^^ - lkjsfd - """ - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - - if not self.keepTabs: - instring = _ustr(instring).expandtabs() - instrlen = len(instring) - loc = 0 - preparseFn = self.preParse - parseFn = self._parse - ParserElement.resetCache() - matches = 0 - try: - while loc <= instrlen and matches < maxMatches: - try: - preloc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) - except ParseException: - loc = preloc+1 - else: - if nextLoc > loc: - matches += 1 - yield tokens, preloc, nextLoc - if overlap: - nextloc = preparseFn( instring, loc ) - if nextloc > loc: - loc = nextLoc - else: - loc += 1 - else: - loc = nextLoc - else: - loc = preloc+1 - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def transformString( self, instring ): - """ - Extension to C{L{scanString}}, to modify matching text with modified tokens that may - be returned from a parse action. To use C{transformString}, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking C{transformString()} on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. C{transformString()} returns the resulting transformed string. - - Example:: - wd = Word(alphas) - wd.setParseAction(lambda toks: toks[0].title()) - - print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) - Prints:: - Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. - """ - out = [] - lastE = 0 - # force preservation of s, to minimize unwanted transformation of string, and to - # keep string locs straight between transformString and scanString - self.keepTabs = True - try: - for t,s,e in self.scanString( instring ): - out.append( instring[lastE:s] ) - if t: - if isinstance(t,ParseResults): - out += t.asList() - elif isinstance(t,list): - out += t - else: - out.append(t) - lastE = e - out.append(instring[lastE:]) - out = [o for o in out if o] - return "".join(map(_ustr,_flatten(out))) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def searchString( self, instring, maxMatches=_MAX_INT ): - """ - Another extension to C{L{scanString}}, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - C{maxMatches} argument, to clip searching after 'n' matches are found. - - Example:: - # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters - cap_word = Word(alphas.upper(), alphas.lower()) - - print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) - - # the sum() builtin can be used to merge results into a single ParseResults object - print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) - prints:: - [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] - ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] - """ - try: - return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): - """ - Generator method to split a string using the given expression as a separator. - May be called with optional C{maxsplit} argument, to limit the number of splits; - and the optional C{includeSeparators} argument (default=C{False}), if the separating - matching text should be included in the split results. - - Example:: - punc = oneOf(list(".,;:/-!?")) - print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) - prints:: - ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] - """ - splits = 0 - last = 0 - for t,s,e in self.scanString(instring, maxMatches=maxsplit): - yield instring[last:s] - if includeSeparators: - yield t[0] - last = e - yield instring[last:] - - def __add__(self, other ): - """ - Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement - converts them to L{Literal}s by default. - - Example:: - greet = Word(alphas) + "," + Word(alphas) + "!" - hello = "Hello, World!" - print (hello, "->", greet.parseString(hello)) - Prints:: - Hello, World! -> ['Hello', ',', 'World', '!'] - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, other ] ) - - def __radd__(self, other ): - """ - Implementation of + operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other + self - - def __sub__(self, other): - """ - Implementation of - operator, returns C{L{And}} with error stop - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return self + And._ErrorStop() + other - - def __rsub__(self, other ): - """ - Implementation of - operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other - self - - def __mul__(self,other): - """ - Implementation of * operator, allows use of C{expr * 3} in place of - C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer - tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples - may also include C{None} as in: - - C{expr*(n,None)} or C{expr*(n,)} is equivalent - to C{expr*n + L{ZeroOrMore}(expr)} - (read as "at least n instances of C{expr}") - - C{expr*(None,n)} is equivalent to C{expr*(0,n)} - (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} - - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} - - Note that C{expr*(None,n)} does not raise an exception if - more than n exprs exist in the input stream; that is, - C{expr*(None,n)} does not enforce a maximum number of expr - occurrences. If this behavior is desired, then write - C{expr*(None,n) + ~expr} - """ - if isinstance(other,int): - minElements, optElements = other,0 - elif isinstance(other,tuple): - other = (other + (None, None))[:2] - if other[0] is None: - other = (0, other[1]) - if isinstance(other[0],int) and other[1] is None: - if other[0] == 0: - return ZeroOrMore(self) - if other[0] == 1: - return OneOrMore(self) - else: - return self*other[0] + ZeroOrMore(self) - elif isinstance(other[0],int) and isinstance(other[1],int): - minElements, optElements = other - optElements -= minElements - else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) - else: - raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) - - if minElements < 0: - raise ValueError("cannot multiply ParserElement by negative value") - if optElements < 0: - raise ValueError("second tuple value must be greater or equal to first tuple value") - if minElements == optElements == 0: - raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - - if (optElements): - def makeOptionalList(n): - if n>1: - return Optional(self + makeOptionalList(n-1)) - else: - return Optional(self) - if minElements: - if minElements == 1: - ret = self + makeOptionalList(optElements) - else: - ret = And([self]*minElements) + makeOptionalList(optElements) - else: - ret = makeOptionalList(optElements) - else: - if minElements == 1: - ret = self - else: - ret = And([self]*minElements) - return ret - - def __rmul__(self, other): - return self.__mul__(other) - - def __or__(self, other ): - """ - Implementation of | operator - returns C{L{MatchFirst}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return MatchFirst( [ self, other ] ) - - def __ror__(self, other ): - """ - Implementation of | operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other | self - - def __xor__(self, other ): - """ - Implementation of ^ operator - returns C{L{Or}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Or( [ self, other ] ) - - def __rxor__(self, other ): - """ - Implementation of ^ operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other ^ self - - def __and__(self, other ): - """ - Implementation of & operator - returns C{L{Each}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Each( [ self, other ] ) - - def __rand__(self, other ): - """ - Implementation of & operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other & self - - def __invert__( self ): - """ - Implementation of ~ operator - returns C{L{NotAny}} - """ - return NotAny( self ) - - def __call__(self, name=None): - """ - Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. - - If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be - passed as C{True}. - - If C{name} is omitted, same as calling C{L{copy}}. - - Example:: - # these are equivalent - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - """ - if name is not None: - return self.setResultsName(name) - else: - return self.copy() - - def suppress( self ): - """ - Suppresses the output of this C{ParserElement}; useful to keep punctuation from - cluttering up returned output. - """ - return Suppress( self ) - - def leaveWhitespace( self ): - """ - Disables the skipping of whitespace before matching the characters in the - C{ParserElement}'s defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. - """ - self.skipWhitespace = False - return self - - def setWhitespaceChars( self, chars ): - """ - Overrides the default whitespace chars - """ - self.skipWhitespace = True - self.whiteChars = chars - self.copyDefaultWhiteChars = False - return self - - def parseWithTabs( self ): - """ - Overrides default behavior to expand C{}s to spaces before parsing the input string. - Must be called before C{parseString} when the input grammar contains elements that - match C{} characters. - """ - self.keepTabs = True - return self - - def ignore( self, other ): - """ - Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. - - Example:: - patt = OneOrMore(Word(alphas)) - patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] - - patt.ignore(cStyleComment) - patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] - """ - if isinstance(other, basestring): - other = Suppress(other) - - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - self.ignoreExprs.append(other) - else: - self.ignoreExprs.append( Suppress( other.copy() ) ) - return self - - def setDebugActions( self, startAction, successAction, exceptionAction ): - """ - Enable display of debugging messages while doing pattern matching. - """ - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) - self.debug = True - return self - - def setDebug( self, flag=True ): - """ - Enable display of debugging messages while doing pattern matching. - Set C{flag} to True to enable, False to disable. - - Example:: - wd = Word(alphas).setName("alphaword") - integer = Word(nums).setName("numword") - term = wd | integer - - # turn on debugging for wd - wd.setDebug() - - OneOrMore(term).parseString("abc 123 xyz 890") - - prints:: - Match alphaword at loc 0(1,1) - Matched alphaword -> ['abc'] - Match alphaword at loc 3(1,4) - Exception raised:Expected alphaword (at char 4), (line:1, col:5) - Match alphaword at loc 7(1,8) - Matched alphaword -> ['xyz'] - Match alphaword at loc 11(1,12) - Exception raised:Expected alphaword (at char 12), (line:1, col:13) - Match alphaword at loc 15(1,16) - Exception raised:Expected alphaword (at char 15), (line:1, col:16) - - The output shown is that produced by the default debug actions - custom debug actions can be - specified using L{setDebugActions}. Prior to attempting - to match the C{wd} expression, the debugging message C{"Match at loc (,)"} - is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} - message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, - which makes debugging and exception messages easier to understand - for instance, the default - name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. - """ - if flag: - self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) - else: - self.debug = False - return self - - def __str__( self ): - return self.name - - def __repr__( self ): - return _ustr(self) - - def streamline( self ): - self.streamlined = True - self.strRepr = None - return self - - def checkRecursion( self, parseElementList ): - pass - - def validate( self, validateTrace=[] ): - """ - Check defined expressions for valid structure, check for infinite recursive definitions. - """ - self.checkRecursion( [] ) - - def parseFile( self, file_or_filename, parseAll=False ): - """ - Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. - """ - try: - file_contents = file_or_filename.read() - except AttributeError: - with open(file_or_filename, "r") as f: - file_contents = f.read() - try: - return self.parseString(file_contents, parseAll) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def __eq__(self,other): - if isinstance(other, ParserElement): - return self is other or vars(self) == vars(other) - elif isinstance(other, basestring): - return self.matches(other) - else: - return super(ParserElement,self)==other - - def __ne__(self,other): - return not (self == other) - - def __hash__(self): - return hash(id(self)) - - def __req__(self,other): - return self == other - - def __rne__(self,other): - return not (self == other) - - def matches(self, testString, parseAll=True): - """ - Method for quick testing of a parser against a test string. Good for simple - inline microtests of sub expressions while building up larger parser. - - Parameters: - - testString - to test against this expression for a match - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - Example:: - expr = Word(nums) - assert expr.matches("100") - """ - try: - self.parseString(_ustr(testString), parseAll=parseAll) - return True - except ParseBaseException: - return False - - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): - """ - Execute the parse expression on a series of test strings, showing each - test, the parsed results or where the parse failed. Quick and easy way to - run a parse expression against a list of sample strings. - - Parameters: - - tests - a list of separate test strings, or a multiline string of test strings - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - comment - (default=C{'#'}) - expression for indicating embedded comments in the test - string; pass None to disable comment filtering - - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; - if False, only dump nested list - - printResults - (default=C{True}) prints test output to stdout - - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing - - Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if C{failureTests} is True), and the results contain a list of lines of each - test's output - - Example:: - number_expr = pyparsing_common.number.copy() - - result = number_expr.runTests(''' - # unsigned integer - 100 - # negative integer - -100 - # float with scientific notation - 6.02e23 - # integer with scientific notation - 1e-12 - ''') - print("Success" if result[0] else "Failed!") - - result = number_expr.runTests(''' - # stray character - 100Z - # missing leading digit before '.' - -.100 - # too many '.' - 3.14.159 - ''', failureTests=True) - print("Success" if result[0] else "Failed!") - prints:: - # unsigned integer - 100 - [100] - - # negative integer - -100 - [-100] - - # float with scientific notation - 6.02e23 - [6.02e+23] - - # integer with scientific notation - 1e-12 - [1e-12] - - Success - - # stray character - 100Z - ^ - FAIL: Expected end of text (at char 3), (line:1, col:4) - - # missing leading digit before '.' - -.100 - ^ - FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) - - # too many '.' - 3.14.159 - ^ - FAIL: Expected end of text (at char 4), (line:1, col:5) - - Success - - Each test string must be on a single line. If you want to test a string that spans multiple - lines, create a test like this:: - - expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") - - (Note that this is a raw string literal, you must include the leading 'r'.) - """ - if isinstance(tests, basestring): - tests = list(map(str.strip, tests.rstrip().splitlines())) - if isinstance(comment, basestring): - comment = Literal(comment) - allResults = [] - comments = [] - success = True - for t in tests: - if comment is not None and comment.matches(t, False) or comments and not t: - comments.append(t) - continue - if not t: - continue - out = ['\n'.join(comments), t] - comments = [] - try: - t = t.replace(r'\n','\n') - result = self.parseString(t, parseAll=parseAll) - out.append(result.dump(full=fullDump)) - success = success and not failureTests - except ParseBaseException as pe: - fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" - if '\n' in t: - out.append(line(pe.loc, t)) - out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) - else: - out.append(' '*pe.loc + '^' + fatal) - out.append("FAIL: " + str(pe)) - success = success and failureTests - result = pe - except Exception as exc: - out.append("FAIL-EXCEPTION: " + str(exc)) - success = success and failureTests - result = exc - - if printResults: - if fullDump: - out.append('') - print('\n'.join(out)) - - allResults.append((t, result)) - - return success, allResults - - -class Token(ParserElement): - """ - Abstract C{ParserElement} subclass, for defining atomic matching patterns. - """ - def __init__( self ): - super(Token,self).__init__( savelist=False ) - - -class Empty(Token): - """ - An empty token, will always match. - """ - def __init__( self ): - super(Empty,self).__init__() - self.name = "Empty" - self.mayReturnEmpty = True - self.mayIndexError = False - - -class NoMatch(Token): - """ - A token that will never match. - """ - def __init__( self ): - super(NoMatch,self).__init__() - self.name = "NoMatch" - self.mayReturnEmpty = True - self.mayIndexError = False - self.errmsg = "Unmatchable token" - - def parseImpl( self, instring, loc, doActions=True ): - raise ParseException(instring, loc, self.errmsg, self) - - -class Literal(Token): - """ - Token to exactly match a specified string. - - Example:: - Literal('blah').parseString('blah') # -> ['blah'] - Literal('blah').parseString('blahfooblah') # -> ['blah'] - Literal('blah').parseString('bla') # -> Exception: Expected "blah" - - For case-insensitive matching, use L{CaselessLiteral}. - - For keyword matching (force word break before and after the matched string), - use L{Keyword} or L{CaselessKeyword}. - """ - def __init__( self, matchString ): - super(Literal,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.__class__ = Empty - self.name = '"%s"' % _ustr(self.match) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - - # Performance tuning: this routine gets called a *lot* - # if this is a single character match string and the first character matches, - # short-circuit as quickly as possible, and avoid calling startswith - #~ @profile - def parseImpl( self, instring, loc, doActions=True ): - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) -_L = Literal -ParserElement._literalStringClass = Literal - -class Keyword(Token): - """ - Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{L{Literal}}: - - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. - - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} - Accepts two optional constructor arguments in addition to the keyword string: - - C{identChars} is a string of characters that would be valid identifier characters, - defaulting to all alphanumerics + "_" and "$" - - C{caseless} allows case-insensitive matching, default is C{False}. - - Example:: - Keyword("start").parseString("start") # -> ['start'] - Keyword("start").parseString("starting") # -> Exception - - For case-insensitive matching, use L{CaselessKeyword}. - """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" - - def __init__( self, matchString, identChars=None, caseless=False ): - super(Keyword,self).__init__() - if identChars is None: - identChars = Keyword.DEFAULT_KEYWORD_CHARS - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.name = '"%s"' % self.match - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - self.caseless = caseless - if caseless: - self.caselessmatch = matchString.upper() - identChars = identChars.upper() - self.identChars = set(identChars) - - def parseImpl( self, instring, loc, doActions=True ): - if self.caseless: - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and - (loc == 0 or instring[loc-1].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - else: - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and - (loc == 0 or instring[loc-1] not in self.identChars) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - - def copy(self): - c = super(Keyword,self).copy() - c.identChars = Keyword.DEFAULT_KEYWORD_CHARS - return c - - @staticmethod - def setDefaultKeywordChars( chars ): - """Overrides the default Keyword chars - """ - Keyword.DEFAULT_KEYWORD_CHARS = chars - -class CaselessLiteral(Literal): - """ - Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. - - Example:: - OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] - - (Contrast with example for L{CaselessKeyword}.) - """ - def __init__( self, matchString ): - super(CaselessLiteral,self).__init__( matchString.upper() ) - # Preserve the defining literal. - self.returnString = matchString - self.name = "'%s'" % self.returnString - self.errmsg = "Expected " + self.name - - def parseImpl( self, instring, loc, doActions=True ): - if instring[ loc:loc+self.matchLen ].upper() == self.match: - return loc+self.matchLen, self.returnString - raise ParseException(instring, loc, self.errmsg, self) - -class CaselessKeyword(Keyword): - """ - Caseless version of L{Keyword}. - - Example:: - OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] - - (Contrast with example for L{CaselessLiteral}.) - """ - def __init__( self, matchString, identChars=None ): - super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - - def parseImpl( self, instring, loc, doActions=True ): - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - -class CloseMatch(Token): - """ - A variation on L{Literal} which matches "close" matches, that is, - strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: - - C{match_string} - string to be matched - - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match - - The results from a successful parse will contain the matched text from the input string and the following named results: - - C{mismatches} - a list of the positions within the match_string where mismatches were found - - C{original} - the original match_string used to compare against the input string - - If C{mismatches} is an empty list, then the match was an exact match. - - Example:: - patt = CloseMatch("ATCATCGAATGGA") - patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) - patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) - - # exact match - patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) - - # close match allowing up to 2 mismatches - patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) - patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) - """ - def __init__(self, match_string, maxMismatches=1): - super(CloseMatch,self).__init__() - self.name = match_string - self.match_string = match_string - self.maxMismatches = maxMismatches - self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) - self.mayIndexError = False - self.mayReturnEmpty = False - - def parseImpl( self, instring, loc, doActions=True ): - start = loc - instrlen = len(instring) - maxloc = start + len(self.match_string) - - if maxloc <= instrlen: - match_string = self.match_string - match_stringloc = 0 - mismatches = [] - maxMismatches = self.maxMismatches - - for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): - src,mat = s_m - if src != mat: - mismatches.append(match_stringloc) - if len(mismatches) > maxMismatches: - break - else: - loc = match_stringloc + 1 - results = ParseResults([instring[start:loc]]) - results['original'] = self.match_string - results['mismatches'] = mismatches - return loc, results - - raise ParseException(instring, loc, self.errmsg, self) - - -class Word(Token): - """ - Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. An optional - C{excludeChars} parameter can list characters that might be found in - the input C{bodyChars} string; useful to define a word of all printables - except for one or two characters, for instance. - - L{srange} is useful for defining custom character set strings for defining - C{Word} expressions, using range notation from regular expression character sets. - - A common mistake is to use C{Word} to match a specific literal string, as in - C{Word("Address")}. Remember that C{Word} uses the string argument to define - I{sets} of matchable characters. This expression would match "Add", "AAA", - "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. - To match an exact literal string, use L{Literal} or L{Keyword}. - - pyparsing includes helper strings for building Words: - - L{alphas} - - L{nums} - - L{alphanums} - - L{hexnums} - - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) - - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) - - L{printables} (any non-whitespace character) - - Example:: - # a word composed of digits - integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) - - # a word with a leading capital, and zero or more lowercase - capital_word = Word(alphas.upper(), alphas.lower()) - - # hostnames are alphanumeric, with leading alpha, and '-' - hostname = Word(alphas, alphanums+'-') - - # roman numeral (not a strict parser, accepts invalid mix of characters) - roman = Word("IVXLCDM") - - # any string of non-whitespace characters, except for ',' - csv_value = Word(printables, excludeChars=",") - """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): - super(Word,self).__init__() - if excludeChars: - initChars = ''.join(c for c in initChars if c not in excludeChars) - if bodyChars: - bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) - self.initCharsOrig = initChars - self.initChars = set(initChars) - if bodyChars : - self.bodyCharsOrig = bodyChars - self.bodyChars = set(bodyChars) - else: - self.bodyCharsOrig = initChars - self.bodyChars = set(initChars) - - self.maxSpecified = max > 0 - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.asKeyword = asKeyword - - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): - if self.bodyCharsOrig == self.initCharsOrig: - self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) - elif len(self.initCharsOrig) == 1: - self.reString = "%s[%s]*" % \ - (re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - else: - self.reString = "[%s][%s]*" % \ - (_escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" - try: - self.re = re.compile( self.reString ) - except Exception: - self.re = None - - def parseImpl( self, instring, loc, doActions=True ): - if self.re: - result = self.re.match(instring,loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - return loc, result.group() - - if not(instring[ loc ] in self.initChars): - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - instrlen = len(instring) - bodychars = self.bodyChars - maxloc = start + self.maxLen - maxloc = min( maxloc, instrlen ) - while loc < maxloc and instring[loc] in bodychars: - loc += 1 - - throwException = False - if loc - start < self.minLen: - throwException = True - if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: - throwException = True - if self.asKeyword: - if (start>0 and instring[start-1] in bodychars) or (loc4: - return s[:4]+"..." - else: - return s - - if ( self.initCharsOrig != self.bodyCharsOrig ): - self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) - else: - self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) - - return self.strRepr - - -class Regex(Token): - r""" - Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as - named parse results. - - Example:: - realnum = Regex(r"[+-]?\d+\.\d*") - date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)') - # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression - roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") - """ - compiledREtype = type(re.compile("[A-Z]")) - def __init__( self, pattern, flags=0): - """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" - super(Regex,self).__init__() - - if isinstance(pattern, basestring): - if not pattern: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) - - self.pattern = pattern - self.flags = flags - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) - raise - - elif isinstance(pattern, Regex.compiledREtype): - self.re = pattern - self.pattern = \ - self.reString = str(pattern) - self.flags = flags - - else: - raise ValueError("Regex may only be constructed with a string or a compiled RE object") - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = self.re.match(instring,loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - d = result.groupdict() - ret = ParseResults(result.group()) - if d: - for k in d: - ret[k] = d[k] - return loc,ret - - def __str__( self ): - try: - return super(Regex,self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "Re:(%s)" % repr(self.pattern) - - return self.strRepr - - -class QuotedString(Token): - r""" - Token for matching strings that are delimited by quoting characters. - - Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=C{None}) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) - - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) - - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) - - Example:: - qs = QuotedString('"') - print(qs.searchString('lsjdf "This is the quote" sldjf')) - complex_qs = QuotedString('{{', endQuoteChar='}}') - print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) - sql_qs = QuotedString('"', escQuote='""') - print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) - prints:: - [['This is the quote']] - [['This is the "quote"']] - [['This is the quote with "embedded" quotes']] - """ - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): - super(QuotedString,self).__init__() - - # remove white space from quote chars - wont work anyway - quoteChar = quoteChar.strip() - if not quoteChar: - warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - if endQuoteChar is None: - endQuoteChar = quoteChar - else: - endQuoteChar = endQuoteChar.strip() - if not endQuoteChar: - warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - self.quoteChar = quoteChar - self.quoteCharLen = len(quoteChar) - self.firstQuoteChar = quoteChar[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - self.convertWhitespaceEscapes = convertWhitespaceEscapes - - if multiline: - self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - else: - self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - if len(self.endQuoteChar) > 1: - self.pattern += ( - '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' - ) - if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) - if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = result.group() - - if self.unquoteResults: - - # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - - if isinstance(ret,basestring): - # replace escaped whitespace - if '\\' in ret and self.convertWhitespaceEscapes: - ws_map = { - r'\t' : '\t', - r'\n' : '\n', - r'\f' : '\f', - r'\r' : '\r', - } - for wslit,wschar in ws_map.items(): - ret = ret.replace(wslit, wschar) - - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) - - # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) - - return loc, ret - - def __str__( self ): - try: - return super(QuotedString,self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - - return self.strRepr - - -class CharsNotIn(Token): - """ - Token for matching words composed of characters I{not} in a given set (will - include whitespace in matched characters if not listed in the provided exclusion set - see example). - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. - - Example:: - # define a comma-separated-value as anything that is not a ',' - csv_value = CharsNotIn(',') - print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) - prints:: - ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] - """ - def __init__( self, notChars, min=1, max=0, exact=0 ): - super(CharsNotIn,self).__init__() - self.skipWhitespace = False - self.notChars = notChars - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = ( self.minLen == 0 ) - self.mayIndexError = False - - def parseImpl( self, instring, loc, doActions=True ): - if instring[loc] in self.notChars: - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - notchars = self.notChars - maxlen = min( start+self.maxLen, len(instring) ) - while loc < maxlen and \ - (instring[loc] not in notchars): - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - def __str__( self ): - try: - return super(CharsNotIn, self).__str__() - except Exception: - pass - - if self.strRepr is None: - if len(self.notChars) > 4: - self.strRepr = "!W:(%s...)" % self.notChars[:4] - else: - self.strRepr = "!W:(%s)" % self.notChars - - return self.strRepr - -class White(Token): - """ - Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{L{Word}} class. - """ - whiteStrs = { - " " : "", - "\t": "", - "\n": "", - "\r": "", - "\f": "", - } - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White,self).__init__() - self.matchWhite = ws - self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) - #~ self.leaveWhitespace() - self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) - self.mayReturnEmpty = True - self.errmsg = "Expected " + self.name - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def parseImpl( self, instring, loc, doActions=True ): - if not(instring[ loc ] in self.matchWhite): - raise ParseException(instring, loc, self.errmsg, self) - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min( maxloc, len(instring) ) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - -class _PositionToken(Token): - def __init__( self ): - super(_PositionToken,self).__init__() - self.name=self.__class__.__name__ - self.mayReturnEmpty = True - self.mayIndexError = False - -class GoToColumn(_PositionToken): - """ - Token to advance to a specific column of input text; useful for tabular report scraping. - """ - def __init__( self, colno ): - super(GoToColumn,self).__init__() - self.col = colno - - def preParse( self, instring, loc ): - if col(loc,instring) != self.col: - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - thiscol = col( loc, instring ) - if thiscol > self.col: - raise ParseException( instring, loc, "Text not in expected column", self ) - newloc = loc + self.col - thiscol - ret = instring[ loc: newloc ] - return newloc, ret - - -class LineStart(_PositionToken): - """ - Matches if current position is at the beginning of a line within the parse string - - Example:: - - test = '''\ - AAA this line - AAA and this line - AAA but not this one - B AAA and definitely not this one - ''' - - for t in (LineStart() + 'AAA' + restOfLine).searchString(test): - print(t) - - Prints:: - ['AAA', ' this line'] - ['AAA', ' and this line'] - - """ - def __init__( self ): - super(LineStart,self).__init__() - self.errmsg = "Expected start of line" - - def parseImpl( self, instring, loc, doActions=True ): - if col(loc, instring) == 1: - return loc, [] - raise ParseException(instring, loc, self.errmsg, self) - -class LineEnd(_PositionToken): - """ - Matches if current position is at the end of a line within the parse string - """ - def __init__( self ): - super(LineEnd,self).__init__() - self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) - self.errmsg = "Expected end of line" - - def parseImpl( self, instring, loc, doActions=True ): - if loc len(instring): - return loc, [] - else: - raise ParseException(instring, loc, self.errmsg, self) - -class WordStart(_PositionToken): - """ - Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of - the string being parsed, or at the beginning of a line. - """ - def __init__(self, wordChars = printables): - super(WordStart,self).__init__() - self.wordChars = set(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True ): - if loc != 0: - if (instring[loc-1] in self.wordChars or - instring[loc] not in self.wordChars): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - -class WordEnd(_PositionToken): - """ - Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of - the string being parsed, or at the end of a line. - """ - def __init__(self, wordChars = printables): - super(WordEnd,self).__init__() - self.wordChars = set(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True ): - instrlen = len(instring) - if instrlen>0 and loc maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - else: - # save match among all matches, to retry longest to shortest - matches.append((loc2, e)) - - if matches: - matches.sort(key=lambda x: -x[0]) - for _,e in matches: - try: - return e._parse( instring, loc, doActions ) - except ParseException as err: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - - def __ixor__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #Or( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class MatchFirst(ParseExpression): - """ - Requires that at least one C{ParseExpression} is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the C{'|'} operator. - - Example:: - # construct MatchFirst using '|' operator - - # watch the order of expressions to match - number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) - print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] - - # put more selective expression first - number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) - print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] - """ - def __init__( self, exprs, savelist = False ): - super(MatchFirst,self).__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - maxExcLoc = -1 - maxException = None - for e in self.exprs: - try: - ret = e._parse( instring, loc, doActions ) - return ret - except ParseException as err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - - # only got here if no expression matched, raise exception for match that made it the furthest - else: - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #MatchFirst( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class Each(ParseExpression): - """ - Requires all given C{ParseExpression}s to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the C{'&'} operator. - - Example:: - color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") - shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") - integer = Word(nums) - shape_attr = "shape:" + shape_type("shape") - posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") - color_attr = "color:" + color("color") - size_attr = "size:" + integer("size") - - # use Each (using operator '&') to accept attributes in any order - # (shape and posn are required, color and size are optional) - shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) - - shape_spec.runTests(''' - shape: SQUARE color: BLACK posn: 100, 120 - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - color:GREEN size:20 shape:TRIANGLE posn:20,40 - ''' - ) - prints:: - shape: SQUARE color: BLACK posn: 100, 120 - ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] - - color: BLACK - - posn: ['100', ',', '120'] - - x: 100 - - y: 120 - - shape: SQUARE - - - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] - - color: BLUE - - posn: ['50', ',', '80'] - - x: 50 - - y: 80 - - shape: CIRCLE - - size: 50 - - - color: GREEN size: 20 shape: TRIANGLE posn: 20,40 - ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] - - color: GREEN - - posn: ['20', ',', '40'] - - x: 20 - - y: 40 - - shape: TRIANGLE - - size: 20 - """ - def __init__( self, exprs, savelist = True ): - super(Each,self).__init__(exprs, savelist) - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = True - self.initExprGroups = True - - def parseImpl( self, instring, loc, doActions=True ): - if self.initExprGroups: - self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) - opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] - opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] - self.optionals = opt1 + opt2 - self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] - self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] - self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] - self.required += self.multirequired - self.initExprGroups = False - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - matchOrder = [] - - keepMatching = True - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired - failed = [] - for e in tmpExprs: - try: - tmpLoc = e.tryParse( instring, tmpLoc ) - except ParseException: - failed.append(e) - else: - matchOrder.append(self.opt1map.get(id(e),e)) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - if tmpReqd: - missing = ", ".join(_ustr(e) for e in tmpReqd) - raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) - - # add any unmatched Optionals, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] - - resultlist = [] - for e in matchOrder: - loc,results = e._parse(instring,loc,doActions) - resultlist.append(results) - - finalResults = sum(resultlist, ParseResults([])) - return loc, finalResults - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class ParseElementEnhance(ParserElement): - """ - Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. - """ - def __init__( self, expr, savelist=False ): - super(ParseElementEnhance,self).__init__(savelist) - if isinstance( expr, basestring ): - if issubclass(ParserElement._literalStringClass, Token): - expr = ParserElement._literalStringClass(expr) - else: - expr = ParserElement._literalStringClass(Literal(expr)) - self.expr = expr - self.strRepr = None - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars( expr.whiteChars ) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr is not None: - return self.expr._parse( instring, loc, doActions, callPreParse=False ) - else: - raise ParseException("",loc,self.errmsg,self) - - def leaveWhitespace( self ): - self.skipWhitespace = False - self.expr = self.expr.copy() - if self.expr is not None: - self.expr.leaveWhitespace() - return self - - def ignore( self, other ): - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - else: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - return self - - def streamline( self ): - super(ParseElementEnhance,self).streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def checkRecursion( self, parseElementList ): - if self in parseElementList: - raise RecursiveGrammarException( parseElementList+[self] ) - subRecCheckList = parseElementList[:] + [ self ] - if self.expr is not None: - self.expr.checkRecursion( subRecCheckList ) - - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion( [] ) - - def __str__( self ): - try: - return super(ParseElementEnhance,self).__str__() - except Exception: - pass - - if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) - return self.strRepr - - -class FollowedBy(ParseElementEnhance): - """ - Lookahead matching of the given parse expression. C{FollowedBy} - does I{not} advance the parsing position within the input string, it only - verifies that the specified parse expression matches at the current - position. C{FollowedBy} always returns a null token list. - - Example:: - # use FollowedBy to match a label only if it is followed by a ':' - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() - prints:: - [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] - """ - def __init__( self, expr ): - super(FollowedBy,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - self.expr.tryParse( instring, loc ) - return loc, [] - - -class NotAny(ParseElementEnhance): - """ - Lookahead to disallow matching with the given parse expression. C{NotAny} - does I{not} advance the parsing position within the input string, it only - verifies that the specified parse expression does I{not} match at the current - position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} - always returns a null token list. May be constructed using the '~' operator. - - Example:: - - """ - def __init__( self, expr ): - super(NotAny,self).__init__(expr) - #~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr.canParseNext(instring, loc): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "~{" + _ustr(self.expr) + "}" - - return self.strRepr - -class _MultipleMatch(ParseElementEnhance): - def __init__( self, expr, stopOn=None): - super(_MultipleMatch, self).__init__(expr) - self.saveAsList = True - ender = stopOn - if isinstance(ender, basestring): - ender = ParserElement._literalStringClass(ender) - self.not_ender = ~ender if ender is not None else None - - def parseImpl( self, instring, loc, doActions=True ): - self_expr_parse = self.expr._parse - self_skip_ignorables = self._skipIgnorables - check_ender = self.not_ender is not None - if check_ender: - try_not_ender = self.not_ender.tryParse - - # must be at least one (but first see if we are the stopOn sentinel; - # if so, fail) - if check_ender: - try_not_ender(instring, loc) - loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) - try: - hasIgnoreExprs = (not not self.ignoreExprs) - while 1: - if check_ender: - try_not_ender(instring, loc) - if hasIgnoreExprs: - preloc = self_skip_ignorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self_expr_parse( instring, preloc, doActions ) - if tmptokens or tmptokens.haskeys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - -class OneOrMore(_MultipleMatch): - """ - Repetition of one or more of the given expression. - - Parameters: - - expr - expression that must match one or more times - - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example:: - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: BLACK" - OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] - - # use stopOn attribute for OneOrMore to avoid reading label string as part of the data - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - - # could also be written as - (attr_expr * (1,)).parseString(text).pprint() - """ - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + _ustr(self.expr) + "}..." - - return self.strRepr - -class ZeroOrMore(_MultipleMatch): - """ - Optional repetition of zero or more of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example: similar to L{OneOrMore} - """ - def __init__( self, expr, stopOn=None): - super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) - except (ParseException,IndexError): - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]..." - - return self.strRepr - -class _NullToken(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - def __str__(self): - return "" - -_optionalNotMatched = _NullToken() -class Optional(ParseElementEnhance): - """ - Optional matching of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - default (optional) - value to be returned if the optional expression is not found. - - Example:: - # US postal code can be a 5-digit zip, plus optional 4-digit qualifier - zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) - zip.runTests(''' - # traditional ZIP code - 12345 - - # ZIP+4 form - 12101-0001 - - # invalid ZIP - 98765- - ''') - prints:: - # traditional ZIP code - 12345 - ['12345'] - - # ZIP+4 form - 12101-0001 - ['12101-0001'] - - # invalid ZIP - 98765- - ^ - FAIL: Expected end of text (at char 5), (line:1, col:6) - """ - def __init__( self, expr, default=_optionalNotMatched ): - super(Optional,self).__init__( expr, savelist=False ) - self.saveAsList = self.expr.saveAsList - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - except (ParseException,IndexError): - if self.defaultValue is not _optionalNotMatched: - if self.expr.resultsName: - tokens = ParseResults([ self.defaultValue ]) - tokens[self.expr.resultsName] = self.defaultValue - else: - tokens = [ self.defaultValue ] - else: - tokens = [] - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]" - - return self.strRepr - -class SkipTo(ParseElementEnhance): - """ - Token for skipping over all undefined text until the matched expression is found. - - Parameters: - - expr - target expression marking the end of the data to be skipped - - include - (default=C{False}) if True, the target expression is also parsed - (the skipped text and target expression are returned as a 2-element list). - - ignore - (default=C{None}) used to define grammars (typically quoted strings and - comments) that might contain false matches to the target expression - - failOn - (default=C{None}) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, - the SkipTo is not a match - - Example:: - report = ''' - Outstanding Issues Report - 1 Jan 2000 - - # | Severity | Description | Days Open - -----+----------+-------------------------------------------+----------- - 101 | Critical | Intermittent system crash | 6 - 94 | Cosmetic | Spelling error on Login ('log|n') | 14 - 79 | Minor | System slow when running too many reports | 47 - ''' - integer = Word(nums) - SEP = Suppress('|') - # use SkipTo to simply match everything up until the next SEP - # - ignore quoted strings, so that a '|' character inside a quoted string does not match - # - parse action will call token.strip() for each matched token, i.e., the description body - string_data = SkipTo(SEP, ignore=quotedString) - string_data.setParseAction(tokenMap(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP - + integer("days_open")) - - for tkt in ticket_expr.searchString(report): - print tkt.dump() - prints:: - ['101', 'Critical', 'Intermittent system crash', '6'] - - days_open: 6 - - desc: Intermittent system crash - - issue_num: 101 - - sev: Critical - ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] - - days_open: 14 - - desc: Spelling error on Login ('log|n') - - issue_num: 94 - - sev: Cosmetic - ['79', 'Minor', 'System slow when running too many reports', '47'] - - days_open: 47 - - desc: System slow when running too many reports - - issue_num: 79 - - sev: Minor - """ - def __init__( self, other, include=False, ignore=None, failOn=None ): - super( SkipTo, self ).__init__( other ) - self.ignoreExpr = ignore - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.asList = False - if isinstance(failOn, basestring): - self.failOn = ParserElement._literalStringClass(failOn) - else: - self.failOn = failOn - self.errmsg = "No match found for "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - startloc = loc - instrlen = len(instring) - expr = self.expr - expr_parse = self.expr._parse - self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None - self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - - tmploc = loc - while tmploc <= instrlen: - if self_failOn_canParseNext is not None: - # break if failOn expression matches - if self_failOn_canParseNext(instring, tmploc): - break - - if self_ignoreExpr_tryParse is not None: - # advance past ignore expressions - while 1: - try: - tmploc = self_ignoreExpr_tryParse(instring, tmploc) - except ParseBaseException: - break - - try: - expr_parse(instring, tmploc, doActions=False, callPreParse=False) - except (ParseException, IndexError): - # no match, advance loc in string - tmploc += 1 - else: - # matched skipto expr, done - break - - else: - # ran off the end of the input string without matching skipto expr, fail - raise ParseException(instring, loc, self.errmsg, self) - - # build up return values - loc = tmploc - skiptext = instring[startloc:loc] - skipresult = ParseResults(skiptext) - - if self.includeMatch: - loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) - skipresult += mat - - return loc, skipresult - -class Forward(ParseElementEnhance): - """ - Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. - - Note: take care when assigning to C{Forward} not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the C{Forward}:: - fwdExpr << (a | b | c) - Converting to use the '<<=' operator instead will avoid this problem. - - See L{ParseResults.pprint} for an example of a recursive parser created using - C{Forward}. - """ - def __init__( self, other=None ): - super(Forward,self).__init__( other, savelist=False ) - - def __lshift__( self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass(other) - self.expr = other - self.strRepr = None - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars( self.expr.whiteChars ) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - return self - - def __ilshift__(self, other): - return self << other - - def leaveWhitespace( self ): - self.skipWhitespace = False - return self - - def streamline( self ): - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate( self, validateTrace=[] ): - if self not in validateTrace: - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - return self.__class__.__name__ + ": ..." - - # stubbed out for now - creates awful memory and perf issues - self._revertClass = self.__class__ - self.__class__ = _ForwardNoRecurse - try: - if self.expr is not None: - retString = _ustr(self.expr) - else: - retString = "None" - finally: - self.__class__ = self._revertClass - return self.__class__.__name__ + ": " + retString - - def copy(self): - if self.expr is not None: - return super(Forward,self).copy() - else: - ret = Forward() - ret <<= self - return ret - -class _ForwardNoRecurse(Forward): - def __str__( self ): - return "..." - -class TokenConverter(ParseElementEnhance): - """ - Abstract subclass of C{ParseExpression}, for converting parsed results. - """ - def __init__( self, expr, savelist=False ): - super(TokenConverter,self).__init__( expr )#, savelist ) - self.saveAsList = False - -class Combine(TokenConverter): - """ - Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying C{'adjacent=False'} in the constructor. - - Example:: - real = Word(nums) + '.' + Word(nums) - print(real.parseString('3.1416')) # -> ['3', '.', '1416'] - # will also erroneously match the following - print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] - - real = Combine(Word(nums) + '.' + Word(nums)) - print(real.parseString('3.1416')) # -> ['3.1416'] - # no match when there are internal spaces - print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) - """ - def __init__( self, expr, joinString="", adjacent=True ): - super(Combine,self).__init__( expr ) - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leaveWhitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - self.callPreparse = True - - def ignore( self, other ): - if self.adjacent: - ParserElement.ignore(self, other) - else: - super( Combine, self).ignore( other ) - return self - - def postParse( self, instring, loc, tokenlist ): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) - - if self.resultsName and retToks.haskeys(): - return [ retToks ] - else: - return retToks - -class Group(TokenConverter): - """ - Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. - - Example:: - ident = Word(alphas) - num = Word(nums) - term = ident | num - func = ident + Optional(delimitedList(term)) - print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] - - func = ident + Group(Optional(delimitedList(term))) - print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] - """ - def __init__( self, expr ): - super(Group,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - return [ tokenlist ] - -class Dict(TokenConverter): - """ - Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. - - Example:: - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - # print attributes as plain groups - print(OneOrMore(attr_expr).parseString(text).dump()) - - # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names - result = Dict(OneOrMore(Group(attr_expr))).parseString(text) - print(result.dump()) - - # access named fields as dict entries, or output as dict - print(result['shape']) - print(result.asDict()) - prints:: - ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] - - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} - See more examples at L{ParseResults} of accessing fields by results name. - """ - def __init__( self, expr ): - super(Dict,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - for i,tok in enumerate(tokenlist): - if len(tok) == 0: - continue - ikey = tok[0] - if isinstance(ikey,int): - ikey = _ustr(tok[0]).strip() - if len(tok)==1: - tokenlist[ikey] = _ParseResultsWithOffset("",i) - elif len(tok)==2 and not isinstance(tok[1],ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) - else: - dictvalue = tok.copy() #ParseResults(i) - del dictvalue[0] - if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) - - if self.resultsName: - return [ tokenlist ] - else: - return tokenlist - - -class Suppress(TokenConverter): - """ - Converter for ignoring the results of a parsed expression. - - Example:: - source = "a, b, c,d" - wd = Word(alphas) - wd_list1 = wd + ZeroOrMore(',' + wd) - print(wd_list1.parseString(source)) - - # often, delimiters that are useful during parsing are just in the - # way afterward - use Suppress to keep them out of the parsed output - wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) - print(wd_list2.parseString(source)) - prints:: - ['a', ',', 'b', ',', 'c', ',', 'd'] - ['a', 'b', 'c', 'd'] - (See also L{delimitedList}.) - """ - def postParse( self, instring, loc, tokenlist ): - return [] - - def suppress( self ): - return self - - -class OnlyOnce(object): - """ - Wrapper for parse actions, to ensure they are only called once. - """ - def __init__(self, methodCall): - self.callable = _trim_arity(methodCall) - self.called = False - def __call__(self,s,l,t): - if not self.called: - results = self.callable(s,l,t) - self.called = True - return results - raise ParseException(s,l,"") - def reset(self): - self.called = False - -def traceParseAction(f): - """ - Decorator for debugging parse actions. - - When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} - When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. - - Example:: - wd = Word(alphas) - - @traceParseAction - def remove_duplicate_chars(tokens): - return ''.join(sorted(set(''.join(tokens)))) - - wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) - print(wds.parseString("slkdjs sld sldd sdlf sdljf")) - prints:: - >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) - <3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) - try: - ret = f(*paArgs) - except Exception as exc: - sys.stderr.write( "< ['aa', 'bb', 'cc'] - delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." - if combine: - return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) - else: - return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) - -def countedArray( expr, intExpr=None ): - """ - Helper to define a counted list of expressions. - This helper defines a pattern of the form:: - integer expr expr expr... - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. - - If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. - - Example:: - countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] - - # in this parser, the leading integer value is given in binary, - # '10' indicating that 2 values are in the array - binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) - countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] - """ - arrayExpr = Forward() - def countFieldParseAction(s,l,t): - n = t[0] - arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) - return [] - if intExpr is None: - intExpr = Word(nums).setParseAction(lambda t:int(t[0])) - else: - intExpr = intExpr.copy() - intExpr.setName("arrayLen") - intExpr.addParseAction(countFieldParseAction, callDuringTry=True) - return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') - -def _flatten(L): - ret = [] - for i in L: - if isinstance(i,list): - ret.extend(_flatten(i)) - else: - ret.append(i) - return ret - -def matchPreviousLiteral(expr): - """ - Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousLiteral(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches a - previous literal, will also match the leading C{"1:1"} in C{"1:10"}. - If this is not desired, use C{matchPreviousExpr}. - Do I{not} use with packrat parsing enabled. - """ - rep = Forward() - def copyTokenToRepeater(s,l,t): - if t: - if len(t) == 1: - rep << t[0] - else: - # flatten t tokens - tflat = _flatten(t.asList()) - rep << And(Literal(tt) for tt in tflat) - else: - rep << Empty() - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def matchPreviousExpr(expr): - """ - Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousExpr(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches by - expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; - the expressions are evaluated first, and then compared, so - C{"1"} is compared with C{"10"}. - Do I{not} use with packrat parsing enabled. - """ - rep = Forward() - e2 = expr.copy() - rep <<= e2 - def copyTokenToRepeater(s,l,t): - matchTokens = _flatten(t.asList()) - def mustMatchTheseTokens(s,l,t): - theseTokens = _flatten(t.asList()) - if theseTokens != matchTokens: - raise ParseException("",0,"") - rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def _escapeRegexRangeChars(s): - #~ escape these chars: ^-] - for c in r"\^-]": - s = s.replace(c,_bslash+c) - s = s.replace("\n",r"\n") - s = s.replace("\t",r"\t") - return _ustr(s) - -def oneOf( strs, caseless=False, useRegex=True ): - """ - Helper to quickly define a set of alternative Literals, and makes sure to do - longest-first testing when there is a conflict, regardless of the input order, - but returns a C{L{MatchFirst}} for best performance. - - Parameters: - - strs - a string of space-delimited literals, or a collection of string literals - - caseless - (default=C{False}) - treat all literals as caseless - - useRegex - (default=C{True}) - as an optimization, will generate a Regex - object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or - if creating a C{Regex} raises an exception) - - Example:: - comp_oper = oneOf("< = > <= >= !=") - var = Word(alphas) - number = Word(nums) - term = var | number - comparison_expr = term + comp_oper + term - print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) - prints:: - [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] - """ - if caseless: - isequal = ( lambda a,b: a.upper() == b.upper() ) - masks = ( lambda a,b: b.upper().startswith(a.upper()) ) - parseElementClass = CaselessLiteral - else: - isequal = ( lambda a,b: a == b ) - masks = ( lambda a,b: b.startswith(a) ) - parseElementClass = Literal - - symbols = [] - if isinstance(strs,basestring): - symbols = strs.split() - elif isinstance(strs, Iterable): - symbols = list(strs) - else: - warnings.warn("Invalid argument to oneOf, expected string or iterable", - SyntaxWarning, stacklevel=2) - if not symbols: - return NoMatch() - - i = 0 - while i < len(symbols)-1: - cur = symbols[i] - for j,other in enumerate(symbols[i+1:]): - if ( isequal(other, cur) ): - del symbols[i+j+1] - break - elif ( masks(cur, other) ): - del symbols[i+j+1] - symbols.insert(i,other) - cur = other - break - else: - i += 1 - - if not caseless and useRegex: - #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) - try: - if len(symbols)==len("".join(symbols)): - return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) - else: - return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) - except Exception: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - - - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) - -def dictOf( key, value ): - """ - Helper to easily and clearly define a dictionary by specifying the respective patterns - for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens - in the proper order. The key pattern can include delimiting markers or punctuation, - as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the C{Dict} results can include named token - fields. - - Example:: - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - print(OneOrMore(attr_expr).parseString(text).dump()) - - attr_label = label - attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) - - # similar to Dict, but simpler call format - result = dictOf(attr_label, attr_value).parseString(text) - print(result.dump()) - print(result['shape']) - print(result.shape) # object attribute access works too - print(result.asDict()) - prints:: - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - SQUARE - {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} - """ - return Dict( ZeroOrMore( Group ( key + value ) ) ) - -def originalTextFor(expr, asString=True): - """ - Helper to return the original, untokenized text for a given expression. Useful to - restore the parsed fields of an HTML start tag into the raw tag text itself, or to - revert separate tokens with intervening whitespace back to the original matching - input text. By default, returns astring containing the original parsed text. - - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{L{ParseResults}} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if - the expression passed to C{L{originalTextFor}} contains expressions with defined - results names, you must set C{asString} to C{False} if you want to preserve those - results name values. - - Example:: - src = "this is test bold text normal text " - for tag in ("b","i"): - opener,closer = makeHTMLTags(tag) - patt = originalTextFor(opener + SkipTo(closer) + closer) - print(patt.searchString(src)[0]) - prints:: - [' bold text '] - ['text'] - """ - locMarker = Empty().setParseAction(lambda s,loc,t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s,l,t: s[t._original_start:t._original_end] - else: - def extractText(s,l,t): - t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] - matchExpr.setParseAction(extractText) - matchExpr.ignoreExprs = expr.ignoreExprs - return matchExpr - -def ungroup(expr): - """ - Helper to undo pyparsing's default grouping of And expressions, even - if all but one are non-empty. - """ - return TokenConverter(expr).setParseAction(lambda t:t[0]) - -def locatedExpr(expr): - """ - Helper to decorate a returned token with its starting and ending locations in the input string. - This helper adds the following results names: - - locn_start = location where matched expression begins - - locn_end = location where matched expression ends - - value = the actual parsed results - - Be careful if the input text contains C{} characters, you may want to call - C{L{ParserElement.parseWithTabs}} - - Example:: - wd = Word(alphas) - for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): - print(match) - prints:: - [[0, 'ljsdf', 5]] - [[8, 'lksdjjf', 15]] - [[18, 'lkkjj', 23]] - """ - locator = Empty().setParseAction(lambda s,l,t: l) - return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) - - -# convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") -stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" - -def srange(s): - r""" - Helper to easily define string ranges for use in Word construction. Borrows - syntax from regexp '[]' string range definitions:: - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be: - - a single character - - an escaped character with a leading backslash (such as C{\-} or C{\]}) - - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) - (C{\0x##} is also supported for backwards compatibility) - - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) - """ - _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) - try: - return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) - except Exception: - return "" - -def matchOnlyAtCol(n): - """ - Helper method for defining parse actions that require matching at a specific - column in the input text. - """ - def verifyCol(strg,locn,toks): - if col(locn,strg) != n: - raise ParseException(strg,locn,"matched token not at column %d" % n) - return verifyCol - -def replaceWith(replStr): - """ - Helper method for common parse actions that simply return a literal value. Especially - useful when used with C{L{transformString}()}. - - Example:: - num = Word(nums).setParseAction(lambda toks: int(toks[0])) - na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) - term = na | num - - OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] - """ - return lambda s,l,t: [replStr] - -def removeQuotes(s,l,t): - """ - Helper parse action for removing quotation marks from parsed quoted strings. - - Example:: - # by default, quotation marks are included in parsed results - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] - - # use removeQuotes to strip quotation marks from parsed results - quotedString.setParseAction(removeQuotes) - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] - """ - return t[0][1:-1] - -def tokenMap(func, *args): - """ - Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional - args are passed, they are forwarded to the given function as additional arguments after - the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the - parsed data to an integer using base 16. - - Example (compare the last to example in L{ParserElement.transformString}:: - hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) - hex_ints.runTests(''' - 00 11 22 aa FF 0a 0d 1a - ''') - - upperword = Word(alphas).setParseAction(tokenMap(str.upper)) - OneOrMore(upperword).runTests(''' - my kingdom for a horse - ''') - - wd = Word(alphas).setParseAction(tokenMap(str.title)) - OneOrMore(wd).setParseAction(' '.join).runTests(''' - now is the winter of our discontent made glorious summer by this sun of york - ''') - prints:: - 00 11 22 aa FF 0a 0d 1a - [0, 17, 34, 170, 255, 10, 13, 26] - - my kingdom for a horse - ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] - - now is the winter of our discontent made glorious summer by this sun of york - ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] - """ - def pa(s,l,t): - return [func(tokn, *args) for tokn in t] - - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - pa.__name__ = func_name - - return pa - -upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) -"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" - -downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) -"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" - -def _makeTags(tagStr, xml): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr,basestring): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas,alphanums+"_-:") - if (xml): - tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - else: - printablesLessRAbrack = "".join(c for c in printables if c not in ">") - tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ - Optional( Suppress("=") + tagAttrValue ) ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - closeTag = Combine(_L("") - - openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % resname) - openTag.tag = resname - closeTag.tag = resname - return openTag, closeTag - -def makeHTMLTags(tagStr): - """ - Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches - tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. - - Example:: - text = 'More info at the
pyparsing wiki page' - # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple - a,a_end = makeHTMLTags("A") - link_expr = a + SkipTo(a_end)("link_text") + a_end - - for link in link_expr.searchString(text): - # attributes in the tag (like "href" shown here) are also accessible as named results - print(link.link_text, '->', link.href) - prints:: - pyparsing -> http://pyparsing.wikispaces.com - """ - return _makeTags( tagStr, False ) - -def makeXMLTags(tagStr): - """ - Helper to construct opening and closing tag expressions for XML, given a tag name. Matches - tags only in the given upper/lower case. - - Example: similar to L{makeHTMLTags} - """ - return _makeTags( tagStr, True ) - -def withAttribute(*args,**attrDict): - """ - Helper to create a validating parse action to be used with start tags created - with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - C{} or C{
}. - - Call C{withAttribute} with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in C{(align="right")}, or - - as an explicit dict with C{**} operator, when an attribute name is also a Python - reserved word, as in C{**{"class":"Customer", "align":"right"}} - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. - - If just testing for C{class} (with or without a namespace), use C{L{withClass}}. - - To verify that the attribute exists, but without specifying a value, pass - C{withAttribute.ANY_VALUE} as the value. - - Example:: - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this has no type
-
- - ''' - div,div_end = makeHTMLTags("div") - - # only match div tag having a type attribute with value "grid" - div_grid = div().setParseAction(withAttribute(type="grid")) - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - # construct a match with any div tag having a type attribute, regardless of the value - div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - prints:: - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - if args: - attrs = args[:] - else: - attrs = attrDict.items() - attrs = [(k,v) for k,v in attrs] - def pa(s,l,tokens): - for attrName,attrValue in attrs: - if attrName not in tokens: - raise ParseException(s,l,"no matching attribute " + attrName) - if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) - return pa -withAttribute.ANY_VALUE = object() - -def withClass(classname, namespace=''): - """ - Simplified version of C{L{withAttribute}} when matching on a div class - made - difficult because C{class} is a reserved word in Python. - - Example:: - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this <div> has no class
-
- - ''' - div,div_end = makeHTMLTags("div") - div_grid = div().setParseAction(withClass("grid")) - - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - prints:: - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - classattr = "%s:class" % namespace if namespace else "class" - return withAttribute(**{classattr : classname}) - -opAssoc = _Constants() -opAssoc.LEFT = object() -opAssoc.RIGHT = object() - -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): - """ - Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. The generated parser will also recognize the use - of parentheses to override operator precedences (see example below). - - Note: if you define a deep operator list, you may see performance issues - when using infixNotation. See L{ParserElement.enablePackrat} for a - mechanism to potentially improve your parser performance. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted); if the parse action - is passed a tuple or list of functions, this is equivalent to - calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) - - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) - - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) - - Example:: - # simple example of four-function arithmetic with ints and variable names - integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - - arith_expr = infixNotation(integer | varname, - [ - ('-', 1, opAssoc.RIGHT), - (oneOf('* /'), 2, opAssoc.LEFT), - (oneOf('+ -'), 2, opAssoc.LEFT), - ]) - - arith_expr.runTests(''' - 5+3*6 - (5+3)*6 - -2--11 - ''', fullDump=False) - prints:: - 5+3*6 - [[5, '+', [3, '*', 6]]] - - (5+3)*6 - [[[5, '+', 3], '*', 6]] - - -2--11 - [[['-', 2], '-', ['-', 11]]] - """ - ret = Forward() - lastExpr = baseExpr | ( lpar + ret + rpar ) - for i,operDef in enumerate(opList): - opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] - termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr - if arity == 3: - if opExpr is None or len(opExpr) != 2: - raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") - opExpr1, opExpr2 = opExpr - thisExpr = Forward().setName(termName) - if rightLeftAssoc == opAssoc.LEFT: - if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) - else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - elif rightLeftAssoc == opAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Optional): - opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) - else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - else: - raise ValueError("operator must indicate right or left associativity") - if pa: - if isinstance(pa, (tuple, list)): - matchExpr.setParseAction(*pa) - else: - matchExpr.setParseAction(pa) - thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) - lastExpr = thisExpr - ret <<= lastExpr - return ret - -operatorPrecedence = infixNotation -"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" - -dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") -sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") -quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| - Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") - -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): - """ - Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression - - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression - - content - expression for items within the nested lists (default=C{None}) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the C{ignoreExpr} argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. - The default is L{quotedString}, but if no expressions are to be ignored, - then pass C{None} for this argument. - - Example:: - data_type = oneOf("void int short long char float double") - decl_data_type = Combine(data_type + Optional(Word('*'))) - ident = Word(alphas+'_', alphanums+'_') - number = pyparsing_common.number - arg = Group(decl_data_type + ident) - LPAR,RPAR = map(Suppress, "()") - - code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) - - c_function = (decl_data_type("type") - + ident("name") - + LPAR + Optional(delimitedList(arg), [])("args") + RPAR - + code_body("body")) - c_function.ignore(cStyleComment) - - source_code = ''' - int is_odd(int x) { - return (x%2); - } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { - return (10+ord(hchar)-ord('A')); - } - } - ''' - for func in c_function.searchString(source_code): - print("%(name)s (%(type)s) args: %(args)s" % func) - - prints:: - is_odd (int) args: [['int', 'x']] - dec_to_hex (int) args: [['char', 'hchar']] - """ - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener,basestring) and isinstance(closer,basestring): - if len(opener) == 1 and len(closer)==1: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t:t[0].strip())) - else: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - ~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") - ret = Forward() - if ignoreExpr is not None: - ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) - else: - ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) - ret.setName('nested %s%s expression' % (opener,closer)) - return ret - -def indentedBlock(blockStatementExpr, indentStack, indent=True): - """ - Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. - - Parameters: - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single grammar - should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the - the current level; set to False for block of left-most statements - (default=C{True}) - - A valid block must contain at least one C{blockStatement}. - - Example:: - data = ''' - def A(z): - A1 - B = 100 - G = A2 - A2 - A3 - B - def BB(a,b,c): - BB1 - def BBA(): - bba1 - bba2 - bba3 - C - D - def spam(x,y): - def eggs(z): - pass - ''' - - - indentStack = [1] - stmt = Forward() - - identifier = Word(alphas, alphanums) - funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") - func_body = indentedBlock(stmt, indentStack) - funcDef = Group( funcDecl + func_body ) - - rvalue = Forward() - funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") - rvalue << (funcCall | identifier | Word(nums)) - assignment = Group(identifier + "=" + rvalue) - stmt << ( funcDef | assignment | identifier ) - - module_body = OneOrMore(stmt) - - parseTree = module_body.parseString(data) - parseTree.pprint() - prints:: - [['def', - 'A', - ['(', 'z', ')'], - ':', - [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], - 'B', - ['def', - 'BB', - ['(', 'a', 'b', 'c', ')'], - ':', - [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], - 'C', - 'D', - ['def', - 'spam', - ['(', 'x', 'y', ')'], - ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] - """ - def checkPeerIndent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseFatalException(s,l,"illegal nesting") - raise ParseException(s,l,"not a peer entry") - - def checkSubIndent(s,l,t): - curCol = col(l,s) - if curCol > indentStack[-1]: - indentStack.append( curCol ) - else: - raise ParseException(s,l,"not a subentry") - - def checkUnindent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s,l,"not an unindent") - indentStack.pop() - - NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') - PEER = Empty().setParseAction(checkPeerIndent).setName('') - UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') - if indent: - smExpr = Group( Optional(NL) + - #~ FollowedBy(blockStatementExpr) + - INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) - else: - smExpr = Group( Optional(NL) + - (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.setName('indented block') - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) -_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) -commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") -def replaceHTMLEntity(t): - """Helper parser action to replace common HTML entities with their special characters""" - return _htmlEntityMap.get(t.entity) - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") -"Comment of the form C{/* ... */}" - -htmlComment = Regex(r"").setName("HTML comment") -"Comment of the form C{}" - -restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") -dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") -"Comment of the form C{// ... (to end of line)}" - -cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") -"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" - -javaStyleComment = cppStyleComment -"Same as C{L{cppStyleComment}}" - -pythonStyleComment = Regex(r"#.*").setName("Python style comment") -"Comment of the form C{# ... (to end of line)}" - -_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + - Optional( Word(" \t") + - ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") -commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") -"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. - This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" - -# some other useful expressions - using lower-case class name since we are really using this as a namespace -class pyparsing_common: - """ - Here are some common low-level expressions that may be useful in jump-starting parser development: - - numeric forms (L{integers}, L{reals}, L{scientific notation}) - - common L{programming identifiers} - - network addresses (L{MAC}, L{IPv4}, L{IPv6}) - - ISO8601 L{dates} and L{datetime} - - L{UUID} - - L{comma-separated list} - Parse actions: - - C{L{convertToInteger}} - - C{L{convertToFloat}} - - C{L{convertToDate}} - - C{L{convertToDatetime}} - - C{L{stripHTMLTags}} - - C{L{upcaseTokens}} - - C{L{downcaseTokens}} - - Example:: - pyparsing_common.number.runTests(''' - # any int or real number, returned as the appropriate type - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.fnumber.runTests(''' - # any int or real number, returned as float - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.hex_integer.runTests(''' - # hex numbers - 100 - FF - ''') - - pyparsing_common.fraction.runTests(''' - # fractions - 1/2 - -3/4 - ''') - - pyparsing_common.mixed_integer.runTests(''' - # mixed fractions - 1 - 1/2 - -3/4 - 1-3/4 - ''') - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(''' - # uuid - 12345678-1234-5678-1234-567812345678 - ''') - prints:: - # any int or real number, returned as the appropriate type - 100 - [100] - - -100 - [-100] - - +100 - [100] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # any int or real number, returned as float - 100 - [100.0] - - -100 - [-100.0] - - +100 - [100.0] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # hex numbers - 100 - [256] - - FF - [255] - - # fractions - 1/2 - [0.5] - - -3/4 - [-0.75] - - # mixed fractions - 1 - [1] - - 1/2 - [0.5] - - -3/4 - [-0.75] - - 1-3/4 - [1.75] - - # uuid - 12345678-1234-5678-1234-567812345678 - [UUID('12345678-1234-5678-1234-567812345678')] - """ - - convertToInteger = tokenMap(int) - """ - Parse action for converting parsed integers to Python int - """ - - convertToFloat = tokenMap(float) - """ - Parse action for converting parsed numbers to Python float - """ - - integer = Word(nums).setName("integer").setParseAction(convertToInteger) - """expression that parses an unsigned integer, returns an int""" - - hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) - """expression that parses a hexadecimal integer, returns an int""" - - signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) - """expression that parses an integer with optional leading sign, returns an int""" - - fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") - """fractional expression of an integer divided by an integer, returns a float""" - fraction.addParseAction(lambda t: t[0]/t[-1]) - - mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") - """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" - mixed_integer.addParseAction(sum) - - real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) - """expression that parses a floating point number and returns a float""" - - sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) - """expression that parses a floating point number with optional scientific notation and returns a float""" - - # streamlining this expression makes the docs nicer-looking - number = (sci_real | real | signed_integer).streamline() - """any numeric expression, returns the corresponding Python type""" - - fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) - """any int or real number, returned as float""" - - identifier = Word(alphas+'_', alphanums+'_').setName("identifier") - """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") - "IPv4 address (C{0.0.0.0 - 255.255.255.255})" - - _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") - _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") - _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") - _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) - _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") - ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") - "IPv6 address (long, short, or mixed form)" - - mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") - "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - - @staticmethod - def convertToDate(fmt="%Y-%m-%d"): - """ - Helper to create a parse action for converting parsed date string to Python datetime.date - - Params - - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) - - Example:: - date_expr = pyparsing_common.iso8601_date.copy() - date_expr.setParseAction(pyparsing_common.convertToDate()) - print(date_expr.parseString("1999-12-31")) - prints:: - [datetime.date(1999, 12, 31)] - """ - def cvt_fn(s,l,t): - try: - return datetime.strptime(t[0], fmt).date() - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - @staticmethod - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): - """ - Helper to create a parse action for converting parsed datetime string to Python datetime.datetime - - Params - - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) - - Example:: - dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.setParseAction(pyparsing_common.convertToDatetime()) - print(dt_expr.parseString("1999-12-31T23:59:59.999")) - prints:: - [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] - """ - def cvt_fn(s,l,t): - try: - return datetime.strptime(t[0], fmt) - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") - "ISO8601 date (C{yyyy-mm-dd})" - - iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") - "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" - - uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") - "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" - - _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() - @staticmethod - def stripHTMLTags(s, l, tokens): - """ - Parse action to remove HTML tags from web page HTML source - - Example:: - # strip HTML links from normal text - text = 'More info at the
pyparsing wiki page' - td,td_end = makeHTMLTags("TD") - table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - - print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' - """ - return pyparsing_common._html_stripper.transformString(tokens[0]) - - _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') - + Optional( White(" \t") ) ) ).streamline().setName("commaItem") - comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") - """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" - - upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) - """Parse action to convert tokens to upper case.""" - - downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) - """Parse action to convert tokens to lower case.""" - - -if __name__ == "__main__": - - selectToken = CaselessLiteral("select") - fromToken = CaselessLiteral("from") - - ident = Word(alphas, alphanums + "_$") - - columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - columnNameList = Group(delimitedList(columnName)).setName("columns") - columnSpec = ('*' | columnNameList) - - tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - tableNameList = Group(delimitedList(tableName)).setName("tables") - - simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") - - # demo runTests method, including embedded comments in test string - simpleSQL.runTests(""" - # '*' as column list and dotted table name - select * from SYS.XYZZY - - # caseless match on "SELECT", and casts back to "select" - SELECT * from XYZZY, ABC - - # list of column names, and mixed case SELECT keyword - Select AA,BB,CC from Sys.dual - - # multiple tables - Select A, B, C from Sys.dual, Table2 - - # invalid SELECT keyword - should fail - Xelect A, B, C from Sys.dual - - # incomplete command - should fail - Select - - # invalid column name - should fail - Select ^^^ frox Sys.dual - - """) - - pyparsing_common.number.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - # any int or real number, returned as float - pyparsing_common.fnumber.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - pyparsing_common.hex_integer.runTests(""" - 100 - FF - """) - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(""" - 12345678-1234-5678-1234-567812345678 - """) diff --git a/libs/common/setuptools/_vendor/pyparsing/__init__.py b/libs/common/setuptools/_vendor/pyparsing/__init__.py new file mode 100644 index 00000000..7802ff15 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/__init__.py @@ -0,0 +1,331 @@ +# module pyparsing.py +# +# Copyright (c) 2003-2022 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +__doc__ = """ +pyparsing module - Classes and methods to define and execute parsing grammars +============================================================================= + +The pyparsing module is an alternative approach to creating and +executing simple grammars, vs. the traditional lex/yacc approach, or the +use of regular expressions. With pyparsing, you don't need to learn +a new syntax for defining grammars or matching expressions - the parsing +module provides a library of classes that you use to construct the +grammar directly in Python. + +Here is a program to parse "Hello, World!" (or any greeting of the form +``", !"``), built up using :class:`Word`, +:class:`Literal`, and :class:`And` elements +(the :meth:`'+'` operators create :class:`And` expressions, +and the strings are auto-converted to :class:`Literal` expressions):: + + from pyparsing import Word, alphas + + # define grammar of a greeting + greet = Word(alphas) + "," + Word(alphas) + "!" + + hello = "Hello, World!" + print(hello, "->", greet.parse_string(hello)) + +The program outputs the following:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +The Python representation of the grammar is quite readable, owing to the +self-explanatory class names, and the use of :class:`'+'`, +:class:`'|'`, :class:`'^'` and :class:`'&'` operators. + +The :class:`ParseResults` object returned from +:class:`ParserElement.parseString` can be +accessed as a nested list, a dictionary, or an object with named +attributes. + +The pyparsing module handles some of the problems that are typically +vexing when writing text parsers: + + - extra or missing whitespace (the above program will also handle + "Hello,World!", "Hello , World !", etc.) + - quoted strings + - embedded comments + + +Getting Started - +----------------- +Visit the classes :class:`ParserElement` and :class:`ParseResults` to +see the base classes that most other pyparsing +classes inherit from. Use the docstrings for examples of how to: + + - construct literal match expressions from :class:`Literal` and + :class:`CaselessLiteral` classes + - construct character word-group expressions using the :class:`Word` + class + - see how to create repetitive expressions using :class:`ZeroOrMore` + and :class:`OneOrMore` classes + - use :class:`'+'`, :class:`'|'`, :class:`'^'`, + and :class:`'&'` operators to combine simple expressions into + more complex ones + - associate names with your parsed results using + :class:`ParserElement.setResultsName` + - access the parsed data, which is returned as a :class:`ParseResults` + object + - find some helpful expression short-cuts like :class:`delimitedList` + and :class:`oneOf` + - find more useful common expressions in the :class:`pyparsing_common` + namespace class +""" +from typing import NamedTuple + + +class version_info(NamedTuple): + major: int + minor: int + micro: int + releaselevel: str + serial: int + + @property + def __version__(self): + return ( + "{}.{}.{}".format(self.major, self.minor, self.micro) + + ( + "{}{}{}".format( + "r" if self.releaselevel[0] == "c" else "", + self.releaselevel[0], + self.serial, + ), + "", + )[self.releaselevel == "final"] + ) + + def __str__(self): + return "{} {} / {}".format(__name__, self.__version__, __version_time__) + + def __repr__(self): + return "{}.{}({})".format( + __name__, + type(self).__name__, + ", ".join("{}={!r}".format(*nv) for nv in zip(self._fields, self)), + ) + + +__version_info__ = version_info(3, 0, 9, "final", 0) +__version_time__ = "05 May 2022 07:02 UTC" +__version__ = __version_info__.__version__ +__versionTime__ = __version_time__ +__author__ = "Paul McGuire " + +from .util import * +from .exceptions import * +from .actions import * +from .core import __diag__, __compat__ +from .results import * +from .core import * +from .core import _builtin_exprs as core_builtin_exprs +from .helpers import * +from .helpers import _builtin_exprs as helper_builtin_exprs + +from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode +from .testing import pyparsing_test as testing +from .common import ( + pyparsing_common as common, + _builtin_exprs as common_builtin_exprs, +) + +# define backward compat synonyms +if "pyparsing_unicode" not in globals(): + pyparsing_unicode = unicode +if "pyparsing_common" not in globals(): + pyparsing_common = common +if "pyparsing_test" not in globals(): + pyparsing_test = testing + +core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs + + +__all__ = [ + "__version__", + "__version_time__", + "__author__", + "__compat__", + "__diag__", + "And", + "AtLineStart", + "AtStringStart", + "CaselessKeyword", + "CaselessLiteral", + "CharsNotIn", + "Combine", + "Dict", + "Each", + "Empty", + "FollowedBy", + "Forward", + "GoToColumn", + "Group", + "IndentedBlock", + "Keyword", + "LineEnd", + "LineStart", + "Literal", + "Located", + "PrecededBy", + "MatchFirst", + "NoMatch", + "NotAny", + "OneOrMore", + "OnlyOnce", + "OpAssoc", + "Opt", + "Optional", + "Or", + "ParseBaseException", + "ParseElementEnhance", + "ParseException", + "ParseExpression", + "ParseFatalException", + "ParseResults", + "ParseSyntaxException", + "ParserElement", + "PositionToken", + "QuotedString", + "RecursiveGrammarException", + "Regex", + "SkipTo", + "StringEnd", + "StringStart", + "Suppress", + "Token", + "TokenConverter", + "White", + "Word", + "WordEnd", + "WordStart", + "ZeroOrMore", + "Char", + "alphanums", + "alphas", + "alphas8bit", + "any_close_tag", + "any_open_tag", + "c_style_comment", + "col", + "common_html_entity", + "counted_array", + "cpp_style_comment", + "dbl_quoted_string", + "dbl_slash_comment", + "delimited_list", + "dict_of", + "empty", + "hexnums", + "html_comment", + "identchars", + "identbodychars", + "java_style_comment", + "line", + "line_end", + "line_start", + "lineno", + "make_html_tags", + "make_xml_tags", + "match_only_at_col", + "match_previous_expr", + "match_previous_literal", + "nested_expr", + "null_debug_action", + "nums", + "one_of", + "printables", + "punc8bit", + "python_style_comment", + "quoted_string", + "remove_quotes", + "replace_with", + "replace_html_entity", + "rest_of_line", + "sgl_quoted_string", + "srange", + "string_end", + "string_start", + "trace_parse_action", + "unicode_string", + "with_attribute", + "indentedBlock", + "original_text_for", + "ungroup", + "infix_notation", + "locatedExpr", + "with_class", + "CloseMatch", + "token_map", + "pyparsing_common", + "pyparsing_unicode", + "unicode_set", + "condition_as_parse_action", + "pyparsing_test", + # pre-PEP8 compatibility names + "__versionTime__", + "anyCloseTag", + "anyOpenTag", + "cStyleComment", + "commonHTMLEntity", + "countedArray", + "cppStyleComment", + "dblQuotedString", + "dblSlashComment", + "delimitedList", + "dictOf", + "htmlComment", + "javaStyleComment", + "lineEnd", + "lineStart", + "makeHTMLTags", + "makeXMLTags", + "matchOnlyAtCol", + "matchPreviousExpr", + "matchPreviousLiteral", + "nestedExpr", + "nullDebugAction", + "oneOf", + "opAssoc", + "pythonStyleComment", + "quotedString", + "removeQuotes", + "replaceHTMLEntity", + "replaceWith", + "restOfLine", + "sglQuotedString", + "stringEnd", + "stringStart", + "traceParseAction", + "unicodeString", + "withAttribute", + "indentedBlock", + "originalTextFor", + "infixNotation", + "locatedExpr", + "withClass", + "tokenMap", + "conditionAsParseAction", + "autoname_elements", +] diff --git a/libs/common/setuptools/_vendor/pyparsing/actions.py b/libs/common/setuptools/_vendor/pyparsing/actions.py new file mode 100644 index 00000000..f72c66e7 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/actions.py @@ -0,0 +1,207 @@ +# actions.py + +from .exceptions import ParseException +from .util import col + + +class OnlyOnce: + """ + Wrapper for parse actions, to ensure they are only called once. + """ + + def __init__(self, method_call): + from .core import _trim_arity + + self.callable = _trim_arity(method_call) + self.called = False + + def __call__(self, s, l, t): + if not self.called: + results = self.callable(s, l, t) + self.called = True + return results + raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset") + + def reset(self): + """ + Allow the associated parse action to be called once more. + """ + + self.called = False + + +def match_only_at_col(n): + """ + Helper method for defining parse actions that require matching at + a specific column in the input text. + """ + + def verify_col(strg, locn, toks): + if col(locn, strg) != n: + raise ParseException(strg, locn, "matched token not at column {}".format(n)) + + return verify_col + + +def replace_with(repl_str): + """ + Helper method for common parse actions that simply return + a literal value. Especially useful when used with + :class:`transform_string` (). + + Example:: + + num = Word(nums).set_parse_action(lambda toks: int(toks[0])) + na = one_of("N/A NA").set_parse_action(replace_with(math.nan)) + term = na | num + + term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234] + """ + return lambda s, l, t: [repl_str] + + +def remove_quotes(s, l, t): + """ + Helper parse action for removing quotation marks from parsed + quoted strings. + + Example:: + + # by default, quotation marks are included in parsed results + quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] + + # use remove_quotes to strip quotation marks from parsed results + quoted_string.set_parse_action(remove_quotes) + quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] + """ + return t[0][1:-1] + + +def with_attribute(*args, **attr_dict): + """ + Helper to create a validating parse action to be used with start + tags created with :class:`make_xml_tags` or + :class:`make_html_tags`. Use ``with_attribute`` to qualify + a starting tag with a required attribute value, to avoid false + matches on common tags such as ```` or ``
``. + + Call ``with_attribute`` with a series of attribute names and + values. Specify the list of filter attributes names and values as: + + - keyword arguments, as in ``(align="right")``, or + - as an explicit dict with ``**`` operator, when an attribute + name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` + - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` + + For attribute names with a namespace prefix, you must use the second + form. Attribute names are matched insensitive to upper/lower case. + + If just testing for ``class`` (with or without a namespace), use + :class:`with_class`. + + To verify that the attribute exists, but without specifying a value, + pass ``with_attribute.ANY_VALUE`` as the value. + + Example:: + + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this has no type
+
+ + ''' + div,div_end = make_html_tags("div") + + # only match div tag having a type attribute with value "grid" + div_grid = div().set_parse_action(with_attribute(type="grid")) + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.search_string(html): + print(grid_header.body) + + # construct a match with any div tag having a type attribute, regardless of the value + div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.search_string(html): + print(div_header.body) + + prints:: + + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + if args: + attrs = args[:] + else: + attrs = attr_dict.items() + attrs = [(k, v) for k, v in attrs] + + def pa(s, l, tokens): + for attrName, attrValue in attrs: + if attrName not in tokens: + raise ParseException(s, l, "no matching attribute " + attrName) + if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException( + s, + l, + "attribute {!r} has value {!r}, must be {!r}".format( + attrName, tokens[attrName], attrValue + ), + ) + + return pa + + +with_attribute.ANY_VALUE = object() + + +def with_class(classname, namespace=""): + """ + Simplified version of :class:`with_attribute` when + matching on a div class - made difficult because ``class`` is + a reserved word in Python. + + Example:: + + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this <div> has no class
+
+ + ''' + div,div_end = make_html_tags("div") + div_grid = div().set_parse_action(with_class("grid")) + + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.search_string(html): + print(grid_header.body) + + div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.search_string(html): + print(div_header.body) + + prints:: + + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + classattr = "{}:class".format(namespace) if namespace else "class" + return with_attribute(**{classattr: classname}) + + +# pre-PEP8 compatibility symbols +replaceWith = replace_with +removeQuotes = remove_quotes +withAttribute = with_attribute +withClass = with_class +matchOnlyAtCol = match_only_at_col diff --git a/libs/common/setuptools/_vendor/pyparsing/common.py b/libs/common/setuptools/_vendor/pyparsing/common.py new file mode 100644 index 00000000..1859fb79 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/common.py @@ -0,0 +1,424 @@ +# common.py +from .core import * +from .helpers import delimited_list, any_open_tag, any_close_tag +from datetime import datetime + + +# some other useful expressions - using lower-case class name since we are really using this as a namespace +class pyparsing_common: + """Here are some common low-level expressions that may be useful in + jump-starting parser development: + + - numeric forms (:class:`integers`, :class:`reals`, + :class:`scientific notation`) + - common :class:`programming identifiers` + - network addresses (:class:`MAC`, + :class:`IPv4`, :class:`IPv6`) + - ISO8601 :class:`dates` and + :class:`datetime` + - :class:`UUID` + - :class:`comma-separated list` + - :class:`url` + + Parse actions: + + - :class:`convertToInteger` + - :class:`convertToFloat` + - :class:`convertToDate` + - :class:`convertToDatetime` + - :class:`stripHTMLTags` + - :class:`upcaseTokens` + - :class:`downcaseTokens` + + Example:: + + pyparsing_common.number.runTests(''' + # any int or real number, returned as the appropriate type + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.fnumber.runTests(''' + # any int or real number, returned as float + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.hex_integer.runTests(''' + # hex numbers + 100 + FF + ''') + + pyparsing_common.fraction.runTests(''' + # fractions + 1/2 + -3/4 + ''') + + pyparsing_common.mixed_integer.runTests(''' + # mixed fractions + 1 + 1/2 + -3/4 + 1-3/4 + ''') + + import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) + pyparsing_common.uuid.runTests(''' + # uuid + 12345678-1234-5678-1234-567812345678 + ''') + + prints:: + + # any int or real number, returned as the appropriate type + 100 + [100] + + -100 + [-100] + + +100 + [100] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # any int or real number, returned as float + 100 + [100.0] + + -100 + [-100.0] + + +100 + [100.0] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # hex numbers + 100 + [256] + + FF + [255] + + # fractions + 1/2 + [0.5] + + -3/4 + [-0.75] + + # mixed fractions + 1 + [1] + + 1/2 + [0.5] + + -3/4 + [-0.75] + + 1-3/4 + [1.75] + + # uuid + 12345678-1234-5678-1234-567812345678 + [UUID('12345678-1234-5678-1234-567812345678')] + """ + + convert_to_integer = token_map(int) + """ + Parse action for converting parsed integers to Python int + """ + + convert_to_float = token_map(float) + """ + Parse action for converting parsed numbers to Python float + """ + + integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer) + """expression that parses an unsigned integer, returns an int""" + + hex_integer = ( + Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) + ) + """expression that parses a hexadecimal integer, returns an int""" + + signed_integer = ( + Regex(r"[+-]?\d+") + .set_name("signed integer") + .set_parse_action(convert_to_integer) + ) + """expression that parses an integer with optional leading sign, returns an int""" + + fraction = ( + signed_integer().set_parse_action(convert_to_float) + + "/" + + signed_integer().set_parse_action(convert_to_float) + ).set_name("fraction") + """fractional expression of an integer divided by an integer, returns a float""" + fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) + + mixed_integer = ( + fraction | signed_integer + Opt(Opt("-").suppress() + fraction) + ).set_name("fraction or mixed integer-fraction") + """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" + mixed_integer.add_parse_action(sum) + + real = ( + Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") + .set_name("real number") + .set_parse_action(convert_to_float) + ) + """expression that parses a floating point number and returns a float""" + + sci_real = ( + Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") + .set_name("real number with scientific notation") + .set_parse_action(convert_to_float) + ) + """expression that parses a floating point number with optional + scientific notation and returns a float""" + + # streamlining this expression makes the docs nicer-looking + number = (sci_real | real | signed_integer).setName("number").streamline() + """any numeric expression, returns the corresponding Python type""" + + fnumber = ( + Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") + .set_name("fnumber") + .set_parse_action(convert_to_float) + ) + """any int or real number, returned as float""" + + identifier = Word(identchars, identbodychars).set_name("identifier") + """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" + + ipv4_address = Regex( + r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" + ).set_name("IPv4 address") + "IPv4 address (``0.0.0.0 - 255.255.255.255``)" + + _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") + _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( + "full IPv6 address" + ) + _short_ipv6_address = ( + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) + + "::" + + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) + ).set_name("short IPv6 address") + _short_ipv6_address.add_condition( + lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 + ) + _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") + ipv6_address = Combine( + (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( + "IPv6 address" + ) + ).set_name("IPv6 address") + "IPv6 address (long, short, or mixed form)" + + mac_address = Regex( + r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" + ).set_name("MAC address") + "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" + + @staticmethod + def convert_to_date(fmt: str = "%Y-%m-%d"): + """ + Helper to create a parse action for converting parsed date string to Python datetime.date + + Params - + - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) + + Example:: + + date_expr = pyparsing_common.iso8601_date.copy() + date_expr.setParseAction(pyparsing_common.convertToDate()) + print(date_expr.parseString("1999-12-31")) + + prints:: + + [datetime.date(1999, 12, 31)] + """ + + def cvt_fn(ss, ll, tt): + try: + return datetime.strptime(tt[0], fmt).date() + except ValueError as ve: + raise ParseException(ss, ll, str(ve)) + + return cvt_fn + + @staticmethod + def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): + """Helper to create a parse action for converting parsed + datetime string to Python datetime.datetime + + Params - + - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) + + Example:: + + dt_expr = pyparsing_common.iso8601_datetime.copy() + dt_expr.setParseAction(pyparsing_common.convertToDatetime()) + print(dt_expr.parseString("1999-12-31T23:59:59.999")) + + prints:: + + [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] + """ + + def cvt_fn(s, l, t): + try: + return datetime.strptime(t[0], fmt) + except ValueError as ve: + raise ParseException(s, l, str(ve)) + + return cvt_fn + + iso8601_date = Regex( + r"(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?" + ).set_name("ISO8601 date") + "ISO8601 date (``yyyy-mm-dd``)" + + iso8601_datetime = Regex( + r"(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?" + ).set_name("ISO8601 datetime") + "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" + + uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID") + "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" + + _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() + + @staticmethod + def strip_html_tags(s: str, l: int, tokens: ParseResults): + """Parse action to remove HTML tags from web page HTML source + + Example:: + + # strip HTML links from normal text + text = 'More info at the pyparsing wiki page' + td, td_end = makeHTMLTags("TD") + table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end + print(table_text.parseString(text).body) + + Prints:: + + More info at the pyparsing wiki page + """ + return pyparsing_common._html_stripper.transform_string(tokens[0]) + + _commasepitem = ( + Combine( + OneOrMore( + ~Literal(",") + + ~LineEnd() + + Word(printables, exclude_chars=",") + + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) + ) + ) + .streamline() + .set_name("commaItem") + ) + comma_separated_list = delimited_list( + Opt(quoted_string.copy() | _commasepitem, default="") + ).set_name("comma separated list") + """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" + + upcase_tokens = staticmethod(token_map(lambda t: t.upper())) + """Parse action to convert tokens to upper case.""" + + downcase_tokens = staticmethod(token_map(lambda t: t.lower())) + """Parse action to convert tokens to lower case.""" + + # fmt: off + url = Regex( + # https://mathiasbynens.be/demo/url-regex + # https://gist.github.com/dperini/729294 + r"^" + + # protocol identifier (optional) + # short syntax // still required + r"(?:(?:(?Phttps?|ftp):)?\/\/)" + + # user:pass BasicAuth (optional) + r"(?:(?P\S+(?::\S*)?)@)?" + + r"(?P" + + # IP address exclusion + # private & local networks + r"(?!(?:10|127)(?:\.\d{1,3}){3})" + + r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + + r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + + # IP address dotted notation octets + # excludes loopback network 0.0.0.0 + # excludes reserved space >= 224.0.0.0 + # excludes network & broadcast addresses + # (first & last IP address of each class) + r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + + r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + + r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + + r"|" + + # host & domain names, may end with dot + # can be replaced by a shortest alternative + # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ + r"(?:" + + r"(?:" + + r"[a-z0-9\u00a1-\uffff]" + + r"[a-z0-9\u00a1-\uffff_-]{0,62}" + + r")?" + + r"[a-z0-9\u00a1-\uffff]\." + + r")+" + + # TLD identifier name, may end with dot + r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + + r")" + + # port number (optional) + r"(:(?P\d{2,5}))?" + + # resource path (optional) + r"(?P\/[^?# ]*)?" + + # query string (optional) + r"(\?(?P[^#]*))?" + + # fragment (optional) + r"(#(?P\S*))?" + + r"$" + ).set_name("url") + # fmt: on + + # pre-PEP8 compatibility names + convertToInteger = convert_to_integer + convertToFloat = convert_to_float + convertToDate = convert_to_date + convertToDatetime = convert_to_datetime + stripHTMLTags = strip_html_tags + upcaseTokens = upcase_tokens + downcaseTokens = downcase_tokens + + +_builtin_exprs = [ + v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) +] diff --git a/libs/common/setuptools/_vendor/pyparsing/core.py b/libs/common/setuptools/_vendor/pyparsing/core.py new file mode 100644 index 00000000..9acba3f3 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/core.py @@ -0,0 +1,5814 @@ +# +# core.py +# +import os +import typing +from typing import ( + NamedTuple, + Union, + Callable, + Any, + Generator, + Tuple, + List, + TextIO, + Set, + Sequence, +) +from abc import ABC, abstractmethod +from enum import Enum +import string +import copy +import warnings +import re +import sys +from collections.abc import Iterable +import traceback +import types +from operator import itemgetter +from functools import wraps +from threading import RLock +from pathlib import Path + +from .util import ( + _FifoCache, + _UnboundedCache, + __config_flags, + _collapse_string_to_ranges, + _escape_regex_range_chars, + _bslash, + _flatten, + LRUMemo as _LRUMemo, + UnboundedMemo as _UnboundedMemo, +) +from .exceptions import * +from .actions import * +from .results import ParseResults, _ParseResultsWithOffset +from .unicode import pyparsing_unicode + +_MAX_INT = sys.maxsize +str_type: Tuple[type, ...] = (str, bytes) + +# +# Copyright (c) 2003-2022 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + + +if sys.version_info >= (3, 8): + from functools import cached_property +else: + + class cached_property: + def __init__(self, func): + self._func = func + + def __get__(self, instance, owner=None): + ret = instance.__dict__[self._func.__name__] = self._func(instance) + return ret + + +class __compat__(__config_flags): + """ + A cross-version compatibility configuration for pyparsing features that will be + released in a future version. By setting values in this configuration to True, + those features can be enabled in prior versions for compatibility development + and testing. + + - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping + of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; + maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 + behavior + """ + + _type_desc = "compatibility" + + collect_all_And_tokens = True + + _all_names = [__ for __ in locals() if not __.startswith("_")] + _fixed_names = """ + collect_all_And_tokens + """.split() + + +class __diag__(__config_flags): + _type_desc = "diagnostic" + + warn_multiple_tokens_in_named_alternation = False + warn_ungrouped_named_tokens_in_collection = False + warn_name_set_on_empty_Forward = False + warn_on_parse_using_empty_Forward = False + warn_on_assignment_to_Forward = False + warn_on_multiple_string_args_to_oneof = False + warn_on_match_first_with_lshift_operator = False + enable_debug_on_named_expressions = False + + _all_names = [__ for __ in locals() if not __.startswith("_")] + _warning_names = [name for name in _all_names if name.startswith("warn")] + _debug_names = [name for name in _all_names if name.startswith("enable_debug")] + + @classmethod + def enable_all_warnings(cls) -> None: + for name in cls._warning_names: + cls.enable(name) + + +class Diagnostics(Enum): + """ + Diagnostic configuration (all default to disabled) + - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results + name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions + - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results + name is defined on a containing expression with ungrouped subexpressions that also + have results names + - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined + with a results name, but has no contents defined + - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is + defined in a grammar but has never had an expression attached to it + - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined + but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` + - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is + incorrectly called with multiple str arguments + - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent + calls to :class:`ParserElement.set_name` + + Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. + All warnings can be enabled by calling :class:`enable_all_warnings`. + """ + + warn_multiple_tokens_in_named_alternation = 0 + warn_ungrouped_named_tokens_in_collection = 1 + warn_name_set_on_empty_Forward = 2 + warn_on_parse_using_empty_Forward = 3 + warn_on_assignment_to_Forward = 4 + warn_on_multiple_string_args_to_oneof = 5 + warn_on_match_first_with_lshift_operator = 6 + enable_debug_on_named_expressions = 7 + + +def enable_diag(diag_enum: Diagnostics) -> None: + """ + Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). + """ + __diag__.enable(diag_enum.name) + + +def disable_diag(diag_enum: Diagnostics) -> None: + """ + Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). + """ + __diag__.disable(diag_enum.name) + + +def enable_all_warnings() -> None: + """ + Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). + """ + __diag__.enable_all_warnings() + + +# hide abstract class +del __config_flags + + +def _should_enable_warnings( + cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] +) -> bool: + enable = bool(warn_env_var) + for warn_opt in cmd_line_warn_options: + w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( + ":" + )[:5] + if not w_action.lower().startswith("i") and ( + not (w_message or w_category or w_module) or w_module == "pyparsing" + ): + enable = True + elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): + enable = False + return enable + + +if _should_enable_warnings( + sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") +): + enable_all_warnings() + + +# build list of single arg builtins, that can be used as parse actions +_single_arg_builtins = { + sum, + len, + sorted, + reversed, + list, + tuple, + set, + any, + all, + min, + max, +} + +_generatorType = types.GeneratorType +ParseAction = Union[ + Callable[[], Any], + Callable[[ParseResults], Any], + Callable[[int, ParseResults], Any], + Callable[[str, int, ParseResults], Any], +] +ParseCondition = Union[ + Callable[[], bool], + Callable[[ParseResults], bool], + Callable[[int, ParseResults], bool], + Callable[[str, int, ParseResults], bool], +] +ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] +DebugStartAction = Callable[[str, int, "ParserElement", bool], None] +DebugSuccessAction = Callable[ + [str, int, int, "ParserElement", ParseResults, bool], None +] +DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] + + +alphas = string.ascii_uppercase + string.ascii_lowercase +identchars = pyparsing_unicode.Latin1.identchars +identbodychars = pyparsing_unicode.Latin1.identbodychars +nums = "0123456789" +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +printables = "".join([c for c in string.printable if c not in string.whitespace]) + +_trim_arity_call_line: traceback.StackSummary = None + + +def _trim_arity(func, max_limit=3): + """decorator to trim function calls to match the arity of the target""" + global _trim_arity_call_line + + if func in _single_arg_builtins: + return lambda s, l, t: func(t) + + limit = 0 + found_arity = False + + def extract_tb(tb, limit=0): + frames = traceback.extract_tb(tb, limit=limit) + frame_summary = frames[-1] + return [frame_summary[:2]] + + # synthesize what would be returned by traceback.extract_stack at the call to + # user's parse action 'func', so that we don't incur call penalty at parse time + + # fmt: off + LINE_DIFF = 7 + # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND + # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! + _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) + pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) + + def wrapper(*args): + nonlocal found_arity, limit + while 1: + try: + ret = func(*args[limit:]) + found_arity = True + return ret + except TypeError as te: + # re-raise TypeErrors if they did not come from our arity testing + if found_arity: + raise + else: + tb = te.__traceback__ + trim_arity_type_error = ( + extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth + ) + del tb + + if trim_arity_type_error: + if limit < max_limit: + limit += 1 + continue + + raise + # fmt: on + + # copy func name to wrapper for sensible debug output + # (can't use functools.wraps, since that messes with function signature) + func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) + wrapper.__name__ = func_name + wrapper.__doc__ = func.__doc__ + + return wrapper + + +def condition_as_parse_action( + fn: ParseCondition, message: str = None, fatal: bool = False +) -> ParseAction: + """ + Function to convert a simple predicate function that returns ``True`` or ``False`` + into a parse action. Can be used in places when a parse action is required + and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition + to an operator level in :class:`infix_notation`). + + Optional keyword arguments: + + - ``message`` - define a custom message to be used in the raised exception + - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; + otherwise will raise :class:`ParseException` + + """ + msg = message if message is not None else "failed user-defined condition" + exc_type = ParseFatalException if fatal else ParseException + fn = _trim_arity(fn) + + @wraps(fn) + def pa(s, l, t): + if not bool(fn(s, l, t)): + raise exc_type(s, l, msg) + + return pa + + +def _default_start_debug_action( + instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False +): + cache_hit_str = "*" if cache_hit else "" + print( + ( + "{}Match {} at loc {}({},{})\n {}\n {}^".format( + cache_hit_str, + expr, + loc, + lineno(loc, instring), + col(loc, instring), + line(loc, instring), + " " * (col(loc, instring) - 1), + ) + ) + ) + + +def _default_success_debug_action( + instring: str, + startloc: int, + endloc: int, + expr: "ParserElement", + toks: ParseResults, + cache_hit: bool = False, +): + cache_hit_str = "*" if cache_hit else "" + print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list())) + + +def _default_exception_debug_action( + instring: str, + loc: int, + expr: "ParserElement", + exc: Exception, + cache_hit: bool = False, +): + cache_hit_str = "*" if cache_hit else "" + print( + "{}Match {} failed, {} raised: {}".format( + cache_hit_str, expr, type(exc).__name__, exc + ) + ) + + +def null_debug_action(*args): + """'Do-nothing' debug action, to suppress debugging output during parsing.""" + + +class ParserElement(ABC): + """Abstract base level parser element class.""" + + DEFAULT_WHITE_CHARS: str = " \n\t\r" + verbose_stacktrace: bool = False + _literalStringClass: typing.Optional[type] = None + + @staticmethod + def set_default_whitespace_chars(chars: str) -> None: + r""" + Overrides the default whitespace chars + + Example:: + + # default whitespace chars are space, and newline + Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] + + # change to just treat newline as significant + ParserElement.set_default_whitespace_chars(" \t") + Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] + """ + ParserElement.DEFAULT_WHITE_CHARS = chars + + # update whitespace all parse expressions defined in this module + for expr in _builtin_exprs: + if expr.copyDefaultWhiteChars: + expr.whiteChars = set(chars) + + @staticmethod + def inline_literals_using(cls: type) -> None: + """ + Set class to be used for inclusion of string literals into a parser. + + Example:: + + # default literal class used is Literal + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] + + + # change to Suppress + ParserElement.inline_literals_using(Suppress) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] + """ + ParserElement._literalStringClass = cls + + class DebugActions(NamedTuple): + debug_try: typing.Optional[DebugStartAction] + debug_match: typing.Optional[DebugSuccessAction] + debug_fail: typing.Optional[DebugExceptionAction] + + def __init__(self, savelist: bool = False): + self.parseAction: List[ParseAction] = list() + self.failAction: typing.Optional[ParseFailAction] = None + self.customName = None + self._defaultName = None + self.resultsName = None + self.saveAsList = savelist + self.skipWhitespace = True + self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) + self.copyDefaultWhiteChars = True + # used when checking for left-recursion + self.mayReturnEmpty = False + self.keepTabs = False + self.ignoreExprs: List["ParserElement"] = list() + self.debug = False + self.streamlined = False + # optimize exception handling for subclasses that don't advance parse index + self.mayIndexError = True + self.errmsg = "" + # mark results names as modal (report only last) or cumulative (list all) + self.modalResults = True + # custom debug actions + self.debugActions = self.DebugActions(None, None, None) + # avoid redundant calls to preParse + self.callPreparse = True + self.callDuringTry = False + self.suppress_warnings_: List[Diagnostics] = [] + + def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": + """ + Suppress warnings emitted for a particular diagnostic on this expression. + + Example:: + + base = pp.Forward() + base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) + + # statement would normally raise a warning, but is now suppressed + print(base.parseString("x")) + + """ + self.suppress_warnings_.append(warning_type) + return self + + def copy(self) -> "ParserElement": + """ + Make a copy of this :class:`ParserElement`. Useful for defining + different parse actions for the same parsing pattern, using copies of + the original parse element. + + Example:: + + integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) + integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") + integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") + + print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) + + prints:: + + [5120, 100, 655360, 268435456] + + Equivalent form of ``expr.copy()`` is just ``expr()``:: + + integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") + """ + cpy = copy.copy(self) + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) + return cpy + + def set_results_name( + self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False + ) -> "ParserElement": + """ + Define name for referencing matching tokens as a nested attribute + of the returned parse results. + + Normally, results names are assigned as you would assign keys in a dict: + any existing value is overwritten by later values. If it is necessary to + keep all values captured for a particular results name, call ``set_results_name`` + with ``list_all_matches`` = True. + + NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + + You can also set results names using the abbreviated syntax, + ``expr("name")`` in place of ``expr.set_results_name("name")`` + - see :class:`__call__`. If ``list_all_matches`` is required, use + ``expr("name*")``. + + Example:: + + date_str = (integer.set_results_name("year") + '/' + + integer.set_results_name("month") + '/' + + integer.set_results_name("day")) + + # equivalent form: + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + """ + listAllMatches = listAllMatches or list_all_matches + return self._setResultsName(name, listAllMatches) + + def _setResultsName(self, name, listAllMatches=False): + if name is None: + return self + newself = self.copy() + if name.endswith("*"): + name = name[:-1] + listAllMatches = True + newself.resultsName = name + newself.modalResults = not listAllMatches + return newself + + def set_break(self, break_flag: bool = True) -> "ParserElement": + """ + Method to invoke the Python pdb debugger when this element is + about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to + disable. + """ + if break_flag: + _parseMethod = self._parse + + def breaker(instring, loc, doActions=True, callPreParse=True): + import pdb + + # this call to pdb.set_trace() is intentional, not a checkin error + pdb.set_trace() + return _parseMethod(instring, loc, doActions, callPreParse) + + breaker._originalParseMethod = _parseMethod + self._parse = breaker + else: + if hasattr(self._parse, "_originalParseMethod"): + self._parse = self._parse._originalParseMethod + return self + + def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": + """ + Define one or more actions to perform when successfully matching parse element definition. + + Parse actions can be called to perform data conversions, do extra validation, + update external data structures, or enhance or replace the parsed tokens. + Each parse action ``fn`` is a callable method with 0-3 arguments, called as + ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: + + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object + + The parsed tokens are passed to the parse action as ParseResults. They can be + modified in place using list-style append, extend, and pop operations to update + the parsed list elements; and with dictionary-style item set and del operations + to add, update, or remove any named results. If the tokens are modified in place, + it is not necessary to return them with a return statement. + + Parse actions can also completely replace the given tokens, with another ``ParseResults`` + object, or with some entirely different object (common for parse actions that perform data + conversions). A convenient way to build a new parse result is to define the values + using a dict, and then create the return value using :class:`ParseResults.from_dict`. + + If None is passed as the ``fn`` parse action, all previously added parse actions for this + expression are cleared. + + Optional keyword arguments: + + - call_during_try = (default= ``False``) indicate if parse action should be run during + lookaheads and alternate testing. For parse actions that have side effects, it is + important to only call the parse action once it is determined that it is being + called as part of a successful parse. For parse actions that perform additional + validation, then call_during_try should be passed as True, so that the validation + code is included in the preliminary "try" parses. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See :class:`parse_string` for more + information on parsing strings containing ```` s, and suggested + methods to maintain a consistent view of the parsed string, the parse + location, and line and column positions within the parsed string. + + Example:: + + # parse dates in the form YYYY/MM/DD + + # use parse action to convert toks from str to int at parse time + def convert_to_int(toks): + return int(toks[0]) + + # use a parse action to verify that the date is a valid date + def is_valid_date(instring, loc, toks): + from datetime import date + year, month, day = toks[::2] + try: + date(year, month, day) + except ValueError: + raise ParseException(instring, loc, "invalid date given") + + integer = Word(nums) + date_str = integer + '/' + integer + '/' + integer + + # add parse actions + integer.set_parse_action(convert_to_int) + date_str.set_parse_action(is_valid_date) + + # note that integer fields are now ints, not strings + date_str.run_tests(''' + # successful parse - note that integer fields were converted to ints + 1999/12/31 + + # fail - invalid date + 1999/13/31 + ''') + """ + if list(fns) == [None]: + self.parseAction = [] + else: + if not all(callable(fn) for fn in fns): + raise TypeError("parse actions must be callable") + self.parseAction = [_trim_arity(fn) for fn in fns] + self.callDuringTry = kwargs.get( + "call_during_try", kwargs.get("callDuringTry", False) + ) + return self + + def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": + """ + Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. + + See examples in :class:`copy`. + """ + self.parseAction += [_trim_arity(fn) for fn in fns] + self.callDuringTry = self.callDuringTry or kwargs.get( + "call_during_try", kwargs.get("callDuringTry", False) + ) + return self + + def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": + """Add a boolean predicate function to expression's list of parse actions. See + :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, + functions passed to ``add_condition`` need to return boolean success/fail of the condition. + + Optional keyword arguments: + + - message = define a custom message to be used in the raised exception + - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise + ParseException + - call_during_try = boolean to indicate if this method should be called during internal tryParse calls, + default=False + + Example:: + + integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) + year_int = integer.copy() + year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") + date_str = year_int + '/' + integer + '/' + integer + + result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), + (line:1, col:1) + """ + for fn in fns: + self.parseAction.append( + condition_as_parse_action( + fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False) + ) + ) + + self.callDuringTry = self.callDuringTry or kwargs.get( + "call_during_try", kwargs.get("callDuringTry", False) + ) + return self + + def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": + """ + Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + ``fn(s, loc, expr, err)`` where: + + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + + The function returns no value. It may throw :class:`ParseFatalException` + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + + def _skipIgnorables(self, instring, loc): + exprsFound = True + while exprsFound: + exprsFound = False + for e in self.ignoreExprs: + try: + while 1: + loc, dummy = e._parse(instring, loc) + exprsFound = True + except ParseException: + pass + return loc + + def preParse(self, instring, loc): + if self.ignoreExprs: + loc = self._skipIgnorables(instring, loc) + + if self.skipWhitespace: + instrlen = len(instring) + white_chars = self.whiteChars + while loc < instrlen and instring[loc] in white_chars: + loc += 1 + + return loc + + def parseImpl(self, instring, loc, doActions=True): + return loc, [] + + def postParse(self, instring, loc, tokenlist): + return tokenlist + + # @profile + def _parseNoCache( + self, instring, loc, doActions=True, callPreParse=True + ) -> Tuple[int, ParseResults]: + TRY, MATCH, FAIL = 0, 1, 2 + debugging = self.debug # and doActions) + len_instring = len(instring) + + if debugging or self.failAction: + # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) + try: + if callPreParse and self.callPreparse: + pre_loc = self.preParse(instring, loc) + else: + pre_loc = loc + tokens_start = pre_loc + if self.debugActions.debug_try: + self.debugActions.debug_try(instring, tokens_start, self, False) + if self.mayIndexError or pre_loc >= len_instring: + try: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + except IndexError: + raise ParseException(instring, len_instring, self.errmsg, self) + else: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + except Exception as err: + # print("Exception raised:", err) + if self.debugActions.debug_fail: + self.debugActions.debug_fail( + instring, tokens_start, self, err, False + ) + if self.failAction: + self.failAction(instring, tokens_start, self, err) + raise + else: + if callPreParse and self.callPreparse: + pre_loc = self.preParse(instring, loc) + else: + pre_loc = loc + tokens_start = pre_loc + if self.mayIndexError or pre_loc >= len_instring: + try: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + except IndexError: + raise ParseException(instring, len_instring, self.errmsg, self) + else: + loc, tokens = self.parseImpl(instring, pre_loc, doActions) + + tokens = self.postParse(instring, loc, tokens) + + ret_tokens = ParseResults( + tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults + ) + if self.parseAction and (doActions or self.callDuringTry): + if debugging: + try: + for fn in self.parseAction: + try: + tokens = fn(instring, tokens_start, ret_tokens) + except IndexError as parse_action_exc: + exc = ParseException("exception raised in parse action") + raise exc from parse_action_exc + + if tokens is not None and tokens is not ret_tokens: + ret_tokens = ParseResults( + tokens, + self.resultsName, + asList=self.saveAsList + and isinstance(tokens, (ParseResults, list)), + modal=self.modalResults, + ) + except Exception as err: + # print "Exception raised in user parse action:", err + if self.debugActions.debug_fail: + self.debugActions.debug_fail( + instring, tokens_start, self, err, False + ) + raise + else: + for fn in self.parseAction: + try: + tokens = fn(instring, tokens_start, ret_tokens) + except IndexError as parse_action_exc: + exc = ParseException("exception raised in parse action") + raise exc from parse_action_exc + + if tokens is not None and tokens is not ret_tokens: + ret_tokens = ParseResults( + tokens, + self.resultsName, + asList=self.saveAsList + and isinstance(tokens, (ParseResults, list)), + modal=self.modalResults, + ) + if debugging: + # print("Matched", self, "->", ret_tokens.as_list()) + if self.debugActions.debug_match: + self.debugActions.debug_match( + instring, tokens_start, loc, self, ret_tokens, False + ) + + return loc, ret_tokens + + def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int: + try: + return self._parse(instring, loc, doActions=False)[0] + except ParseFatalException: + if raise_fatal: + raise + raise ParseException(instring, loc, self.errmsg, self) + + def can_parse_next(self, instring: str, loc: int) -> bool: + try: + self.try_parse(instring, loc) + except (ParseException, IndexError): + return False + else: + return True + + # cache for left-recursion in Forward references + recursion_lock = RLock() + recursion_memos: typing.Dict[ + Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] + ] = {} + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + packrat_cache = ( + {} + ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail + packrat_cache_lock = RLock() + packrat_cache_stats = [0, 0] + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( + self, instring, loc, doActions=True, callPreParse=True + ) -> Tuple[int, ParseResults]: + HIT, MISS = 0, 1 + TRY, MATCH, FAIL = 0, 1, 2 + lookup = (self, instring, loc, callPreParse, doActions) + with ParserElement.packrat_cache_lock: + cache = ParserElement.packrat_cache + value = cache.get(lookup) + if value is cache.not_in_cache: + ParserElement.packrat_cache_stats[MISS] += 1 + try: + value = self._parseNoCache(instring, loc, doActions, callPreParse) + except ParseBaseException as pe: + # cache a copy of the exception, without the traceback + cache.set(lookup, pe.__class__(*pe.args)) + raise + else: + cache.set(lookup, (value[0], value[1].copy(), loc)) + return value + else: + ParserElement.packrat_cache_stats[HIT] += 1 + if self.debug and self.debugActions.debug_try: + try: + self.debugActions.debug_try(instring, loc, self, cache_hit=True) + except TypeError: + pass + if isinstance(value, Exception): + if self.debug and self.debugActions.debug_fail: + try: + self.debugActions.debug_fail( + instring, loc, self, value, cache_hit=True + ) + except TypeError: + pass + raise value + + loc_, result, endloc = value[0], value[1].copy(), value[2] + if self.debug and self.debugActions.debug_match: + try: + self.debugActions.debug_match( + instring, loc_, endloc, self, result, cache_hit=True + ) + except TypeError: + pass + + return loc_, result + + _parse = _parseNoCache + + @staticmethod + def reset_cache() -> None: + ParserElement.packrat_cache.clear() + ParserElement.packrat_cache_stats[:] = [0] * len( + ParserElement.packrat_cache_stats + ) + ParserElement.recursion_memos.clear() + + _packratEnabled = False + _left_recursion_enabled = False + + @staticmethod + def disable_memoization() -> None: + """ + Disables active Packrat or Left Recursion parsing and their memoization + + This method also works if neither Packrat nor Left Recursion are enabled. + This makes it safe to call before activating Packrat nor Left Recursion + to clear any previous settings. + """ + ParserElement.reset_cache() + ParserElement._left_recursion_enabled = False + ParserElement._packratEnabled = False + ParserElement._parse = ParserElement._parseNoCache + + @staticmethod + def enable_left_recursion( + cache_size_limit: typing.Optional[int] = None, *, force=False + ) -> None: + """ + Enables "bounded recursion" parsing, which allows for both direct and indirect + left-recursion. During parsing, left-recursive :class:`Forward` elements are + repeatedly matched with a fixed recursion depth that is gradually increased + until finding the longest match. + + Example:: + + import pyparsing as pp + pp.ParserElement.enable_left_recursion() + + E = pp.Forward("E") + num = pp.Word(pp.nums) + # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... + E <<= E + '+' - num | num + + print(E.parse_string("1+2+3")) + + Recursion search naturally memoizes matches of ``Forward`` elements and may + thus skip reevaluation of parse actions during backtracking. This may break + programs with parse actions which rely on strict ordering of side-effects. + + Parameters: + + - cache_size_limit - (default=``None``) - memoize at most this many + ``Forward`` elements during matching; if ``None`` (the default), + memoize all ``Forward`` elements. + + Bounded Recursion parsing works similar but not identical to Packrat parsing, + thus the two cannot be used together. Use ``force=True`` to disable any + previous, conflicting settings. + """ + if force: + ParserElement.disable_memoization() + elif ParserElement._packratEnabled: + raise RuntimeError("Packrat and Bounded Recursion are not compatible") + if cache_size_limit is None: + ParserElement.recursion_memos = _UnboundedMemo() + elif cache_size_limit > 0: + ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) + else: + raise NotImplementedError("Memo size of %s" % cache_size_limit) + ParserElement._left_recursion_enabled = True + + @staticmethod + def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: + """ + Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + Parameters: + + - cache_size_limit - (default= ``128``) - if an integer value is provided + will limit the size of the packrat cache; if None is passed, then + the cache size will be unbounded; if 0 is passed, the cache will + be effectively disabled. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method :class:`ParserElement.enable_packrat`. + For best results, call ``enable_packrat()`` immediately after + importing pyparsing. + + Example:: + + import pyparsing + pyparsing.ParserElement.enable_packrat() + + Packrat parsing works similar but not identical to Bounded Recursion parsing, + thus the two cannot be used together. Use ``force=True`` to disable any + previous, conflicting settings. + """ + if force: + ParserElement.disable_memoization() + elif ParserElement._left_recursion_enabled: + raise RuntimeError("Packrat and Bounded Recursion are not compatible") + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + if cache_size_limit is None: + ParserElement.packrat_cache = _UnboundedCache() + else: + ParserElement.packrat_cache = _FifoCache(cache_size_limit) + ParserElement._parse = ParserElement._parseCache + + def parse_string( + self, instring: str, parse_all: bool = False, *, parseAll: bool = False + ) -> ParseResults: + """ + Parse a string with respect to the parser definition. This function is intended as the primary interface to the + client code. + + :param instring: The input string to be parsed. + :param parse_all: If set, the entire input string must match the grammar. + :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. + :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. + :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or + an object with attributes if the given parser includes results names. + + If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This + is also equivalent to ending the grammar with :class:`StringEnd`(). + + To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are + converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string + contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string + being parsed, one can ensure a consistent view of the input string by doing one of the following: + + - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), + - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the + parse action's ``s`` argument, or + - explicitly expand the tabs in your input string before calling ``parse_string``. + + Examples: + + By default, partial matches are OK. + + >>> res = Word('a').parse_string('aaaaabaaa') + >>> print(res) + ['aaaaa'] + + The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children + directly to see more examples. + + It raises an exception if parse_all flag is set and instring does not match the whole grammar. + + >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) + Traceback (most recent call last): + ... + pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) + """ + parseAll = parse_all or parseAll + + ParserElement.reset_cache() + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + if not self.keepTabs: + instring = instring.expandtabs() + try: + loc, tokens = self._parse(instring, 0) + if parseAll: + loc = self.preParse(instring, loc) + se = Empty() + StringEnd() + se._parse(instring, loc) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clearing out pyparsing internal stack trace + raise exc.with_traceback(None) + else: + return tokens + + def scan_string( + self, + instring: str, + max_matches: int = _MAX_INT, + overlap: bool = False, + *, + debug: bool = False, + maxMatches: int = _MAX_INT, + ) -> Generator[Tuple[ParseResults, int, int], None, None]: + """ + Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + ``max_matches`` argument, to clip scanning after 'n' matches are found. If + ``overlap`` is specified, then overlapping matches will be reported. + + Note that the start and end locations are reported relative to the string + being parsed. See :class:`parse_string` for more information on parsing + strings with embedded tabs. + + Example:: + + source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" + print(source) + for tokens, start, end in Word(alphas).scan_string(source): + print(' '*start + '^'*(end-start)) + print(' '*start + tokens[0]) + + prints:: + + sldjf123lsdjjkf345sldkjf879lkjsfd987 + ^^^^^ + sldjf + ^^^^^^^ + lsdjjkf + ^^^^^^ + sldkjf + ^^^^^^ + lkjsfd + """ + maxMatches = min(maxMatches, max_matches) + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + + if not self.keepTabs: + instring = str(instring).expandtabs() + instrlen = len(instring) + loc = 0 + preparseFn = self.preParse + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + try: + while loc <= instrlen and matches < maxMatches: + try: + preloc = preparseFn(instring, loc) + nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) + except ParseException: + loc = preloc + 1 + else: + if nextLoc > loc: + matches += 1 + if debug: + print( + { + "tokens": tokens.asList(), + "start": preloc, + "end": nextLoc, + } + ) + yield tokens, preloc, nextLoc + if overlap: + nextloc = preparseFn(instring, loc) + if nextloc > loc: + loc = nextLoc + else: + loc += 1 + else: + loc = nextLoc + else: + loc = preloc + 1 + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def transform_string(self, instring: str, *, debug: bool = False) -> str: + """ + Extension to :class:`scan_string`, to modify matching text with modified tokens that may + be returned from a parse action. To use ``transform_string``, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking ``transform_string()`` on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. ``transform_string()`` returns the resulting transformed string. + + Example:: + + wd = Word(alphas) + wd.set_parse_action(lambda toks: toks[0].title()) + + print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) + + prints:: + + Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. + """ + out: List[str] = [] + lastE = 0 + # force preservation of s, to minimize unwanted transformation of string, and to + # keep string locs straight between transform_string and scan_string + self.keepTabs = True + try: + for t, s, e in self.scan_string(instring, debug=debug): + out.append(instring[lastE:s]) + if t: + if isinstance(t, ParseResults): + out += t.as_list() + elif isinstance(t, Iterable) and not isinstance(t, str_type): + out.extend(t) + else: + out.append(t) + lastE = e + out.append(instring[lastE:]) + out = [o for o in out if o] + return "".join([str(s) for s in _flatten(out)]) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def search_string( + self, + instring: str, + max_matches: int = _MAX_INT, + *, + debug: bool = False, + maxMatches: int = _MAX_INT, + ) -> ParseResults: + """ + Another extension to :class:`scan_string`, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + ``max_matches`` argument, to clip searching after 'n' matches are found. + + Example:: + + # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters + cap_word = Word(alphas.upper(), alphas.lower()) + + print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) + + # the sum() builtin can be used to merge results into a single ParseResults object + print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) + + prints:: + + [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] + ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] + """ + maxMatches = min(maxMatches, max_matches) + try: + return ParseResults( + [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] + ) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def split( + self, + instring: str, + maxsplit: int = _MAX_INT, + include_separators: bool = False, + *, + includeSeparators=False, + ) -> Generator[str, None, None]: + """ + Generator method to split a string using the given expression as a separator. + May be called with optional ``maxsplit`` argument, to limit the number of splits; + and the optional ``include_separators`` argument (default= ``False``), if the separating + matching text should be included in the split results. + + Example:: + + punc = one_of(list(".,;:/-!?")) + print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) + + prints:: + + ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] + """ + includeSeparators = includeSeparators or include_separators + last = 0 + for t, s, e in self.scan_string(instring, max_matches=maxsplit): + yield instring[last:s] + if includeSeparators: + yield t[0] + last = e + yield instring[last:] + + def __add__(self, other) -> "ParserElement": + """ + Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` + converts them to :class:`Literal`s by default. + + Example:: + + greet = Word(alphas) + "," + Word(alphas) + "!" + hello = "Hello, World!" + print(hello, "->", greet.parse_string(hello)) + + prints:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + + ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. + + Literal('start') + ... + Literal('end') + + is equivalent to: + + Literal('start') + SkipTo('end')("_skipped*") + Literal('end') + + Note that the skipped text is returned with '_skipped' as a results name, + and to support having multiple skips in the same parser, the value returned is + a list of all skipped text. + """ + if other is Ellipsis: + return _PendingSkip(self) + + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return And([self, other]) + + def __radd__(self, other) -> "ParserElement": + """ + Implementation of ``+`` operator when left operand is not a :class:`ParserElement` + """ + if other is Ellipsis: + return SkipTo(self)("_skipped*") + self + + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other + self + + def __sub__(self, other) -> "ParserElement": + """ + Implementation of ``-`` operator, returns :class:`And` with error stop + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return self + And._ErrorStop() + other + + def __rsub__(self, other) -> "ParserElement": + """ + Implementation of ``-`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other - self + + def __mul__(self, other) -> "ParserElement": + """ + Implementation of ``*`` operator, allows use of ``expr * 3`` in place of + ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer + tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples + may also include ``None`` as in: + - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") + - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` + (read as "0 to n instances of ``expr``") + - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` + - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` + + Note that ``expr*(None, n)`` does not raise an exception if + more than n exprs exist in the input stream; that is, + ``expr*(None, n)`` does not enforce a maximum number of expr + occurrences. If this behavior is desired, then write + ``expr*(None, n) + ~expr`` + """ + if other is Ellipsis: + other = (0, None) + elif isinstance(other, tuple) and other[:1] == (Ellipsis,): + other = ((0,) + other[1:] + (None,))[:2] + + if isinstance(other, int): + minElements, optElements = other, 0 + elif isinstance(other, tuple): + other = tuple(o if o is not Ellipsis else None for o in other) + other = (other + (None, None))[:2] + if other[0] is None: + other = (0, other[1]) + if isinstance(other[0], int) and other[1] is None: + if other[0] == 0: + return ZeroOrMore(self) + if other[0] == 1: + return OneOrMore(self) + else: + return self * other[0] + ZeroOrMore(self) + elif isinstance(other[0], int) and isinstance(other[1], int): + minElements, optElements = other + optElements -= minElements + else: + raise TypeError( + "cannot multiply ParserElement and ({}) objects".format( + ",".join(type(item).__name__ for item in other) + ) + ) + else: + raise TypeError( + "cannot multiply ParserElement and {} objects".format( + type(other).__name__ + ) + ) + + if minElements < 0: + raise ValueError("cannot multiply ParserElement by negative value") + if optElements < 0: + raise ValueError( + "second tuple value must be greater or equal to first tuple value" + ) + if minElements == optElements == 0: + return And([]) + + if optElements: + + def makeOptionalList(n): + if n > 1: + return Opt(self + makeOptionalList(n - 1)) + else: + return Opt(self) + + if minElements: + if minElements == 1: + ret = self + makeOptionalList(optElements) + else: + ret = And([self] * minElements) + makeOptionalList(optElements) + else: + ret = makeOptionalList(optElements) + else: + if minElements == 1: + ret = self + else: + ret = And([self] * minElements) + return ret + + def __rmul__(self, other) -> "ParserElement": + return self.__mul__(other) + + def __or__(self, other) -> "ParserElement": + """ + Implementation of ``|`` operator - returns :class:`MatchFirst` + """ + if other is Ellipsis: + return _PendingSkip(self, must_skip=True) + + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return MatchFirst([self, other]) + + def __ror__(self, other) -> "ParserElement": + """ + Implementation of ``|`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other | self + + def __xor__(self, other) -> "ParserElement": + """ + Implementation of ``^`` operator - returns :class:`Or` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return Or([self, other]) + + def __rxor__(self, other) -> "ParserElement": + """ + Implementation of ``^`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other ^ self + + def __and__(self, other) -> "ParserElement": + """ + Implementation of ``&`` operator - returns :class:`Each` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return Each([self, other]) + + def __rand__(self, other) -> "ParserElement": + """ + Implementation of ``&`` operator when left operand is not a :class:`ParserElement` + """ + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + raise TypeError( + "Cannot combine element of type {} with ParserElement".format( + type(other).__name__ + ) + ) + return other & self + + def __invert__(self) -> "ParserElement": + """ + Implementation of ``~`` operator - returns :class:`NotAny` + """ + return NotAny(self) + + # disable __iter__ to override legacy use of sequential access to __getitem__ to + # iterate over a sequence + __iter__ = None + + def __getitem__(self, key): + """ + use ``[]`` indexing notation as a short form for expression repetition: + + - ``expr[n]`` is equivalent to ``expr*n`` + - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` + - ``expr[n, ...]`` or ``expr[n,]`` is equivalent + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") + - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` + (read as "0 to n instances of ``expr``") + - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` + - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` + + ``None`` may be used in place of ``...``. + + Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception + if more than ``n`` ``expr``s exist in the input stream. If this behavior is + desired, then write ``expr[..., n] + ~expr``. + """ + + # convert single arg keys to tuples + try: + if isinstance(key, str_type): + key = (key,) + iter(key) + except TypeError: + key = (key, key) + + if len(key) > 2: + raise TypeError( + "only 1 or 2 index arguments supported ({}{})".format( + key[:5], "... [{}]".format(len(key)) if len(key) > 5 else "" + ) + ) + + # clip to 2 elements + ret = self * tuple(key[:2]) + return ret + + def __call__(self, name: str = None) -> "ParserElement": + """ + Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. + + If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be + passed as ``True``. + + If ``name` is omitted, same as calling :class:`copy`. + + Example:: + + # these are equivalent + userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") + userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") + """ + if name is not None: + return self._setResultsName(name) + else: + return self.copy() + + def suppress(self) -> "ParserElement": + """ + Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from + cluttering up returned output. + """ + return Suppress(self) + + def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": + """ + Enables the skipping of whitespace before matching the characters in the + :class:`ParserElement`'s defined pattern. + + :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) + """ + self.skipWhitespace = True + return self + + def leave_whitespace(self, recursive: bool = True) -> "ParserElement": + """ + Disables the skipping of whitespace before matching the characters in the + :class:`ParserElement`'s defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. + + :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) + """ + self.skipWhitespace = False + return self + + def set_whitespace_chars( + self, chars: Union[Set[str], str], copy_defaults: bool = False + ) -> "ParserElement": + """ + Overrides the default whitespace chars + """ + self.skipWhitespace = True + self.whiteChars = set(chars) + self.copyDefaultWhiteChars = copy_defaults + return self + + def parse_with_tabs(self) -> "ParserElement": + """ + Overrides default behavior to expand ```` s to spaces before parsing the input string. + Must be called before ``parse_string`` when the input grammar contains elements that + match ```` characters. + """ + self.keepTabs = True + return self + + def ignore(self, other: "ParserElement") -> "ParserElement": + """ + Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + + Example:: + + patt = Word(alphas)[1, ...] + patt.parse_string('ablaj /* comment */ lskjd') + # -> ['ablaj'] + + patt.ignore(c_style_comment) + patt.parse_string('ablaj /* comment */ lskjd') + # -> ['ablaj', 'lskjd'] + """ + import typing + + if isinstance(other, str_type): + other = Suppress(other) + + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + self.ignoreExprs.append(other) + else: + self.ignoreExprs.append(Suppress(other.copy())) + return self + + def set_debug_actions( + self, + start_action: DebugStartAction, + success_action: DebugSuccessAction, + exception_action: DebugExceptionAction, + ) -> "ParserElement": + """ + Customize display of debugging messages while doing pattern matching: + + - ``start_action`` - method to be called when an expression is about to be parsed; + should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` + + - ``success_action`` - method to be called when an expression has successfully parsed; + should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` + + - ``exception_action`` - method to be called when expression fails to parse; + should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` + """ + self.debugActions = self.DebugActions( + start_action or _default_start_debug_action, + success_action or _default_success_debug_action, + exception_action or _default_exception_debug_action, + ) + self.debug = True + return self + + def set_debug(self, flag: bool = True) -> "ParserElement": + """ + Enable display of debugging messages while doing pattern matching. + Set ``flag`` to ``True`` to enable, ``False`` to disable. + + Example:: + + wd = Word(alphas).set_name("alphaword") + integer = Word(nums).set_name("numword") + term = wd | integer + + # turn on debugging for wd + wd.set_debug() + + term[1, ...].parse_string("abc 123 xyz 890") + + prints:: + + Match alphaword at loc 0(1,1) + Matched alphaword -> ['abc'] + Match alphaword at loc 3(1,4) + Exception raised:Expected alphaword (at char 4), (line:1, col:5) + Match alphaword at loc 7(1,8) + Matched alphaword -> ['xyz'] + Match alphaword at loc 11(1,12) + Exception raised:Expected alphaword (at char 12), (line:1, col:13) + Match alphaword at loc 15(1,16) + Exception raised:Expected alphaword (at char 15), (line:1, col:16) + + The output shown is that produced by the default debug actions - custom debug actions can be + specified using :class:`set_debug_actions`. Prior to attempting + to match the ``wd`` expression, the debugging message ``"Match at loc (,)"`` + is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` + message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, + which makes debugging and exception messages easier to understand - for instance, the default + name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. + """ + if flag: + self.set_debug_actions( + _default_start_debug_action, + _default_success_debug_action, + _default_exception_debug_action, + ) + else: + self.debug = False + return self + + @property + def default_name(self) -> str: + if self._defaultName is None: + self._defaultName = self._generateDefaultName() + return self._defaultName + + @abstractmethod + def _generateDefaultName(self): + """ + Child classes must define this method, which defines how the ``default_name`` is set. + """ + + def set_name(self, name: str) -> "ParserElement": + """ + Define name for this expression, makes debugging and exception messages clearer. + Example:: + Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) + Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) + """ + self.customName = name + self.errmsg = "Expected " + self.name + if __diag__.enable_debug_on_named_expressions: + self.set_debug() + return self + + @property + def name(self) -> str: + # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name + return self.customName if self.customName is not None else self.default_name + + def __str__(self) -> str: + return self.name + + def __repr__(self) -> str: + return str(self) + + def streamline(self) -> "ParserElement": + self.streamlined = True + self._defaultName = None + return self + + def recurse(self) -> Sequence["ParserElement"]: + return [] + + def _checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.recurse(): + e._checkRecursion(subRecCheckList) + + def validate(self, validateTrace=None) -> None: + """ + Check defined expressions for valid structure, check for infinite recursive definitions. + """ + self._checkRecursion([]) + + def parse_file( + self, + file_or_filename: Union[str, Path, TextIO], + encoding: str = "utf-8", + parse_all: bool = False, + *, + parseAll: bool = False, + ) -> ParseResults: + """ + Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. + """ + parseAll = parseAll or parse_all + try: + file_contents = file_or_filename.read() + except AttributeError: + with open(file_or_filename, "r", encoding=encoding) as f: + file_contents = f.read() + try: + return self.parse_string(file_contents, parseAll) + except ParseBaseException as exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc.with_traceback(None) + + def __eq__(self, other): + if self is other: + return True + elif isinstance(other, str_type): + return self.matches(other, parse_all=True) + elif isinstance(other, ParserElement): + return vars(self) == vars(other) + return False + + def __hash__(self): + return id(self) + + def matches( + self, test_string: str, parse_all: bool = True, *, parseAll: bool = True + ) -> bool: + """ + Method for quick testing of a parser against a test string. Good for simple + inline microtests of sub expressions while building up larger parser. + + Parameters: + - ``test_string`` - to test against this expression for a match + - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests + + Example:: + + expr = Word(nums) + assert expr.matches("100") + """ + parseAll = parseAll and parse_all + try: + self.parse_string(str(test_string), parse_all=parseAll) + return True + except ParseBaseException: + return False + + def run_tests( + self, + tests: Union[str, List[str]], + parse_all: bool = True, + comment: typing.Optional[Union["ParserElement", str]] = "#", + full_dump: bool = True, + print_results: bool = True, + failure_tests: bool = False, + post_parse: Callable[[str, ParseResults], str] = None, + file: typing.Optional[TextIO] = None, + with_line_numbers: bool = False, + *, + parseAll: bool = True, + fullDump: bool = True, + printResults: bool = True, + failureTests: bool = False, + postParse: Callable[[str, ParseResults], str] = None, + ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: + """ + Execute the parse expression on a series of test strings, showing each + test, the parsed results or where the parse failed. Quick and easy way to + run a parse expression against a list of sample strings. + + Parameters: + - ``tests`` - a list of separate test strings, or a multiline string of test strings + - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests + - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test + string; pass None to disable comment filtering + - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; + if False, only dump nested list + - ``print_results`` - (default= ``True``) prints test output to stdout + - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing + - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as + `fn(test_string, parse_results)` and returns a string to be added to the test output + - ``file`` - (default= ``None``) optional file-like object to which test output will be written; + if None, will default to ``sys.stdout`` + - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers + + Returns: a (success, results) tuple, where success indicates that all tests succeeded + (or failed if ``failure_tests`` is True), and the results contain a list of lines of each + test's output + + Example:: + + number_expr = pyparsing_common.number.copy() + + result = number_expr.run_tests(''' + # unsigned integer + 100 + # negative integer + -100 + # float with scientific notation + 6.02e23 + # integer with scientific notation + 1e-12 + ''') + print("Success" if result[0] else "Failed!") + + result = number_expr.run_tests(''' + # stray character + 100Z + # missing leading digit before '.' + -.100 + # too many '.' + 3.14.159 + ''', failure_tests=True) + print("Success" if result[0] else "Failed!") + + prints:: + + # unsigned integer + 100 + [100] + + # negative integer + -100 + [-100] + + # float with scientific notation + 6.02e23 + [6.02e+23] + + # integer with scientific notation + 1e-12 + [1e-12] + + Success + + # stray character + 100Z + ^ + FAIL: Expected end of text (at char 3), (line:1, col:4) + + # missing leading digit before '.' + -.100 + ^ + FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) + + # too many '.' + 3.14.159 + ^ + FAIL: Expected end of text (at char 4), (line:1, col:5) + + Success + + Each test string must be on a single line. If you want to test a string that spans multiple + lines, create a test like this:: + + expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") + + (Note that this is a raw string literal, you must include the leading ``'r'``.) + """ + from .testing import pyparsing_test + + parseAll = parseAll and parse_all + fullDump = fullDump and full_dump + printResults = printResults and print_results + failureTests = failureTests or failure_tests + postParse = postParse or post_parse + if isinstance(tests, str_type): + line_strip = type(tests).strip + tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] + if isinstance(comment, str_type): + comment = Literal(comment) + if file is None: + file = sys.stdout + print_ = file.write + + result: Union[ParseResults, Exception] + allResults = [] + comments = [] + success = True + NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) + BOM = "\ufeff" + for t in tests: + if comment is not None and comment.matches(t, False) or comments and not t: + comments.append( + pyparsing_test.with_line_numbers(t) if with_line_numbers else t + ) + continue + if not t: + continue + out = [ + "\n" + "\n".join(comments) if comments else "", + pyparsing_test.with_line_numbers(t) if with_line_numbers else t, + ] + comments = [] + try: + # convert newline marks to actual newlines, and strip leading BOM if present + t = NL.transform_string(t.lstrip(BOM)) + result = self.parse_string(t, parse_all=parseAll) + except ParseBaseException as pe: + fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" + out.append(pe.explain()) + out.append("FAIL: " + str(pe)) + if ParserElement.verbose_stacktrace: + out.extend(traceback.format_tb(pe.__traceback__)) + success = success and failureTests + result = pe + except Exception as exc: + out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc)) + if ParserElement.verbose_stacktrace: + out.extend(traceback.format_tb(exc.__traceback__)) + success = success and failureTests + result = exc + else: + success = success and not failureTests + if postParse is not None: + try: + pp_value = postParse(t, result) + if pp_value is not None: + if isinstance(pp_value, ParseResults): + out.append(pp_value.dump()) + else: + out.append(str(pp_value)) + else: + out.append(result.dump()) + except Exception as e: + out.append(result.dump(full=fullDump)) + out.append( + "{} failed: {}: {}".format( + postParse.__name__, type(e).__name__, e + ) + ) + else: + out.append(result.dump(full=fullDump)) + out.append("") + + if printResults: + print_("\n".join(out)) + + allResults.append((t, result)) + + return success, allResults + + def create_diagram( + self, + output_html: Union[TextIO, Path, str], + vertical: int = 3, + show_results_names: bool = False, + show_groups: bool = False, + **kwargs, + ) -> None: + """ + Create a railroad diagram for the parser. + + Parameters: + - output_html (str or file-like object) - output target for generated + diagram HTML + - vertical (int) - threshold for formatting multiple alternatives vertically + instead of horizontally (default=3) + - show_results_names - bool flag whether diagram should show annotations for + defined results names + - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box + Additional diagram-formatting keyword arguments can also be included; + see railroad.Diagram class. + """ + + try: + from .diagram import to_railroad, railroad_to_html + except ImportError as ie: + raise Exception( + "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" + ) from ie + + self.streamline() + + railroad = to_railroad( + self, + vertical=vertical, + show_results_names=show_results_names, + show_groups=show_groups, + diagram_kwargs=kwargs, + ) + if isinstance(output_html, (str, Path)): + with open(output_html, "w", encoding="utf-8") as diag_file: + diag_file.write(railroad_to_html(railroad)) + else: + # we were passed a file-like object, just write to it + output_html.write(railroad_to_html(railroad)) + + setDefaultWhitespaceChars = set_default_whitespace_chars + inlineLiteralsUsing = inline_literals_using + setResultsName = set_results_name + setBreak = set_break + setParseAction = set_parse_action + addParseAction = add_parse_action + addCondition = add_condition + setFailAction = set_fail_action + tryParse = try_parse + canParseNext = can_parse_next + resetCache = reset_cache + enableLeftRecursion = enable_left_recursion + enablePackrat = enable_packrat + parseString = parse_string + scanString = scan_string + searchString = search_string + transformString = transform_string + setWhitespaceChars = set_whitespace_chars + parseWithTabs = parse_with_tabs + setDebugActions = set_debug_actions + setDebug = set_debug + defaultName = default_name + setName = set_name + parseFile = parse_file + runTests = run_tests + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class _PendingSkip(ParserElement): + # internal placeholder class to hold a place were '...' is added to a parser element, + # once another ParserElement is added, this placeholder will be replaced with a SkipTo + def __init__(self, expr: ParserElement, must_skip: bool = False): + super().__init__() + self.anchor = expr + self.must_skip = must_skip + + def _generateDefaultName(self): + return str(self.anchor + Empty()).replace("Empty", "...") + + def __add__(self, other) -> "ParserElement": + skipper = SkipTo(other).set_name("...")("_skipped*") + if self.must_skip: + + def must_skip(t): + if not t._skipped or t._skipped.as_list() == [""]: + del t[0] + t.pop("_skipped", None) + + def show_skip(t): + if t._skipped.as_list()[-1:] == [""]: + t.pop("_skipped") + t["_skipped"] = "missing <" + repr(self.anchor) + ">" + + return ( + self.anchor + skipper().add_parse_action(must_skip) + | skipper().add_parse_action(show_skip) + ) + other + + return self.anchor + skipper + other + + def __repr__(self): + return self.defaultName + + def parseImpl(self, *args): + raise Exception( + "use of `...` expression without following SkipTo target expression" + ) + + +class Token(ParserElement): + """Abstract :class:`ParserElement` subclass, for defining atomic + matching patterns. + """ + + def __init__(self): + super().__init__(savelist=False) + + def _generateDefaultName(self): + return type(self).__name__ + + +class Empty(Token): + """ + An empty token, will always match. + """ + + def __init__(self): + super().__init__() + self.mayReturnEmpty = True + self.mayIndexError = False + + +class NoMatch(Token): + """ + A token that will never match. + """ + + def __init__(self): + super().__init__() + self.mayReturnEmpty = True + self.mayIndexError = False + self.errmsg = "Unmatchable token" + + def parseImpl(self, instring, loc, doActions=True): + raise ParseException(instring, loc, self.errmsg, self) + + +class Literal(Token): + """ + Token to exactly match a specified string. + + Example:: + + Literal('blah').parse_string('blah') # -> ['blah'] + Literal('blah').parse_string('blahfooblah') # -> ['blah'] + Literal('blah').parse_string('bla') # -> Exception: Expected "blah" + + For case-insensitive matching, use :class:`CaselessLiteral`. + + For keyword matching (force word break before and after the matched string), + use :class:`Keyword` or :class:`CaselessKeyword`. + """ + + def __init__(self, match_string: str = "", *, matchString: str = ""): + super().__init__() + match_string = matchString or match_string + self.match = match_string + self.matchLen = len(match_string) + try: + self.firstMatchChar = match_string[0] + except IndexError: + raise ValueError("null string passed to Literal; use Empty() instead") + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + self.mayIndexError = False + + # Performance tuning: modify __class__ to select + # a parseImpl optimized for single-character check + if self.matchLen == 1 and type(self) is Literal: + self.__class__ = _SingleCharLiteral + + def _generateDefaultName(self): + return repr(self.match) + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] == self.firstMatchChar and instring.startswith( + self.match, loc + ): + return loc + self.matchLen, self.match + raise ParseException(instring, loc, self.errmsg, self) + + +class _SingleCharLiteral(Literal): + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] == self.firstMatchChar: + return loc + 1, self.match + raise ParseException(instring, loc, self.errmsg, self) + + +ParserElement._literalStringClass = Literal + + +class Keyword(Token): + """ + Token to exactly match a specified string as a keyword, that is, + it must be immediately followed by a non-keyword character. Compare + with :class:`Literal`: + + - ``Literal("if")`` will match the leading ``'if'`` in + ``'ifAndOnlyIf'``. + - ``Keyword("if")`` will not; it will only match the leading + ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` + + Accepts two optional constructor arguments in addition to the + keyword string: + + - ``identChars`` is a string of characters that would be valid + identifier characters, defaulting to all alphanumerics + "_" and + "$" + - ``caseless`` allows case-insensitive matching, default is ``False``. + + Example:: + + Keyword("start").parse_string("start") # -> ['start'] + Keyword("start").parse_string("starting") # -> Exception + + For case-insensitive matching, use :class:`CaselessKeyword`. + """ + + DEFAULT_KEYWORD_CHARS = alphanums + "_$" + + def __init__( + self, + match_string: str = "", + ident_chars: typing.Optional[str] = None, + caseless: bool = False, + *, + matchString: str = "", + identChars: typing.Optional[str] = None, + ): + super().__init__() + identChars = identChars or ident_chars + if identChars is None: + identChars = Keyword.DEFAULT_KEYWORD_CHARS + match_string = matchString or match_string + self.match = match_string + self.matchLen = len(match_string) + try: + self.firstMatchChar = match_string[0] + except IndexError: + raise ValueError("null string passed to Keyword; use Empty() instead") + self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) + self.mayReturnEmpty = False + self.mayIndexError = False + self.caseless = caseless + if caseless: + self.caselessmatch = match_string.upper() + identChars = identChars.upper() + self.identChars = set(identChars) + + def _generateDefaultName(self): + return repr(self.match) + + def parseImpl(self, instring, loc, doActions=True): + errmsg = self.errmsg + errloc = loc + if self.caseless: + if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: + if loc == 0 or instring[loc - 1].upper() not in self.identChars: + if ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen].upper() not in self.identChars + ): + return loc + self.matchLen, self.match + else: + # followed by keyword char + errmsg += ", was immediately followed by keyword character" + errloc = loc + self.matchLen + else: + # preceded by keyword char + errmsg += ", keyword was immediately preceded by keyword character" + errloc = loc - 1 + # else no match just raise plain exception + + else: + if ( + instring[loc] == self.firstMatchChar + and self.matchLen == 1 + or instring.startswith(self.match, loc) + ): + if loc == 0 or instring[loc - 1] not in self.identChars: + if ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen] not in self.identChars + ): + return loc + self.matchLen, self.match + else: + # followed by keyword char + errmsg += ( + ", keyword was immediately followed by keyword character" + ) + errloc = loc + self.matchLen + else: + # preceded by keyword char + errmsg += ", keyword was immediately preceded by keyword character" + errloc = loc - 1 + # else no match just raise plain exception + + raise ParseException(instring, errloc, errmsg, self) + + @staticmethod + def set_default_keyword_chars(chars) -> None: + """ + Overrides the default characters used by :class:`Keyword` expressions. + """ + Keyword.DEFAULT_KEYWORD_CHARS = chars + + setDefaultKeywordChars = set_default_keyword_chars + + +class CaselessLiteral(Literal): + """ + Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + + Example:: + + CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") + # -> ['CMD', 'CMD', 'CMD'] + + (Contrast with example for :class:`CaselessKeyword`.) + """ + + def __init__(self, match_string: str = "", *, matchString: str = ""): + match_string = matchString or match_string + super().__init__(match_string.upper()) + # Preserve the defining literal. + self.returnString = match_string + self.errmsg = "Expected " + self.name + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc : loc + self.matchLen].upper() == self.match: + return loc + self.matchLen, self.returnString + raise ParseException(instring, loc, self.errmsg, self) + + +class CaselessKeyword(Keyword): + """ + Caseless version of :class:`Keyword`. + + Example:: + + CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") + # -> ['CMD', 'CMD'] + + (Contrast with example for :class:`CaselessLiteral`.) + """ + + def __init__( + self, + match_string: str = "", + ident_chars: typing.Optional[str] = None, + *, + matchString: str = "", + identChars: typing.Optional[str] = None, + ): + identChars = identChars or ident_chars + match_string = matchString or match_string + super().__init__(match_string, identChars, caseless=True) + + +class CloseMatch(Token): + """A variation on :class:`Literal` which matches "close" matches, + that is, strings with at most 'n' mismatching characters. + :class:`CloseMatch` takes parameters: + + - ``match_string`` - string to be matched + - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters + - ``max_mismatches`` - (``default=1``) maximum number of + mismatches allowed to count as a match + + The results from a successful parse will contain the matched text + from the input string and the following named results: + + - ``mismatches`` - a list of the positions within the + match_string where mismatches were found + - ``original`` - the original match_string used to compare + against the input string + + If ``mismatches`` is an empty list, then the match was an exact + match. + + Example:: + + patt = CloseMatch("ATCATCGAATGGA") + patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) + patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) + + # exact match + patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) + + # close match allowing up to 2 mismatches + patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) + patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) + """ + + def __init__( + self, + match_string: str, + max_mismatches: int = None, + *, + maxMismatches: int = 1, + caseless=False, + ): + maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches + super().__init__() + self.match_string = match_string + self.maxMismatches = maxMismatches + self.errmsg = "Expected {!r} (with up to {} mismatches)".format( + self.match_string, self.maxMismatches + ) + self.caseless = caseless + self.mayIndexError = False + self.mayReturnEmpty = False + + def _generateDefaultName(self): + return "{}:{!r}".format(type(self).__name__, self.match_string) + + def parseImpl(self, instring, loc, doActions=True): + start = loc + instrlen = len(instring) + maxloc = start + len(self.match_string) + + if maxloc <= instrlen: + match_string = self.match_string + match_stringloc = 0 + mismatches = [] + maxMismatches = self.maxMismatches + + for match_stringloc, s_m in enumerate( + zip(instring[loc:maxloc], match_string) + ): + src, mat = s_m + if self.caseless: + src, mat = src.lower(), mat.lower() + + if src != mat: + mismatches.append(match_stringloc) + if len(mismatches) > maxMismatches: + break + else: + loc = start + match_stringloc + 1 + results = ParseResults([instring[start:loc]]) + results["original"] = match_string + results["mismatches"] = mismatches + return loc, results + + raise ParseException(instring, loc, self.errmsg, self) + + +class Word(Token): + """Token for matching words composed of allowed character sets. + Parameters: + - ``init_chars`` - string of all characters that should be used to + match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; + if ``body_chars`` is also specified, then this is the string of + initial characters + - ``body_chars`` - string of characters that + can be used for matching after a matched initial character as + given in ``init_chars``; if omitted, same as the initial characters + (default=``None``) + - ``min`` - minimum number of characters to match (default=1) + - ``max`` - maximum number of characters to match (default=0) + - ``exact`` - exact number of characters to match (default=0) + - ``as_keyword`` - match as a keyword (default=``False``) + - ``exclude_chars`` - characters that might be + found in the input ``body_chars`` string but which should not be + accepted for matching ;useful to define a word of all + printables except for one or two characters, for instance + (default=``None``) + + :class:`srange` is useful for defining custom character set strings + for defining :class:`Word` expressions, using range notation from + regular expression character sets. + + A common mistake is to use :class:`Word` to match a specific literal + string, as in ``Word("Address")``. Remember that :class:`Word` + uses the string argument to define *sets* of matchable characters. + This expression would match "Add", "AAA", "dAred", or any other word + made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an + exact literal string, use :class:`Literal` or :class:`Keyword`. + + pyparsing includes helper strings for building Words: + + - :class:`alphas` + - :class:`nums` + - :class:`alphanums` + - :class:`hexnums` + - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 + - accented, tilded, umlauted, etc.) + - :class:`punc8bit` (non-alphabetic characters in ASCII range + 128-255 - currency, symbols, superscripts, diacriticals, etc.) + - :class:`printables` (any non-whitespace character) + + ``alphas``, ``nums``, and ``printables`` are also defined in several + Unicode sets - see :class:`pyparsing_unicode``. + + Example:: + + # a word composed of digits + integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) + + # a word with a leading capital, and zero or more lowercase + capital_word = Word(alphas.upper(), alphas.lower()) + + # hostnames are alphanumeric, with leading alpha, and '-' + hostname = Word(alphas, alphanums + '-') + + # roman numeral (not a strict parser, accepts invalid mix of characters) + roman = Word("IVXLCDM") + + # any string of non-whitespace characters, except for ',' + csv_value = Word(printables, exclude_chars=",") + """ + + def __init__( + self, + init_chars: str = "", + body_chars: typing.Optional[str] = None, + min: int = 1, + max: int = 0, + exact: int = 0, + as_keyword: bool = False, + exclude_chars: typing.Optional[str] = None, + *, + initChars: typing.Optional[str] = None, + bodyChars: typing.Optional[str] = None, + asKeyword: bool = False, + excludeChars: typing.Optional[str] = None, + ): + initChars = initChars or init_chars + bodyChars = bodyChars or body_chars + asKeyword = asKeyword or as_keyword + excludeChars = excludeChars or exclude_chars + super().__init__() + if not initChars: + raise ValueError( + "invalid {}, initChars cannot be empty string".format( + type(self).__name__ + ) + ) + + initChars = set(initChars) + self.initChars = initChars + if excludeChars: + excludeChars = set(excludeChars) + initChars -= excludeChars + if bodyChars: + bodyChars = set(bodyChars) - excludeChars + self.initCharsOrig = "".join(sorted(initChars)) + + if bodyChars: + self.bodyCharsOrig = "".join(sorted(bodyChars)) + self.bodyChars = set(bodyChars) + else: + self.bodyCharsOrig = "".join(sorted(initChars)) + self.bodyChars = set(initChars) + + self.maxSpecified = max > 0 + + if min < 1: + raise ValueError( + "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" + ) + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.asKeyword = asKeyword + + # see if we can make a regex for this Word + if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): + if self.bodyChars == self.initChars: + if max == 0: + repeat = "+" + elif max == 1: + repeat = "" + else: + repeat = "{{{},{}}}".format( + self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen + ) + self.reString = "[{}]{}".format( + _collapse_string_to_ranges(self.initChars), + repeat, + ) + elif len(self.initChars) == 1: + if max == 0: + repeat = "*" + else: + repeat = "{{0,{}}}".format(max - 1) + self.reString = "{}[{}]{}".format( + re.escape(self.initCharsOrig), + _collapse_string_to_ranges(self.bodyChars), + repeat, + ) + else: + if max == 0: + repeat = "*" + elif max == 2: + repeat = "" + else: + repeat = "{{0,{}}}".format(max - 1) + self.reString = "[{}][{}]{}".format( + _collapse_string_to_ranges(self.initChars), + _collapse_string_to_ranges(self.bodyChars), + repeat, + ) + if self.asKeyword: + self.reString = r"\b" + self.reString + r"\b" + + try: + self.re = re.compile(self.reString) + except re.error: + self.re = None + else: + self.re_match = self.re.match + self.__class__ = _WordRegex + + def _generateDefaultName(self): + def charsAsStr(s): + max_repr_len = 16 + s = _collapse_string_to_ranges(s, re_escape=False) + if len(s) > max_repr_len: + return s[: max_repr_len - 3] + "..." + else: + return s + + if self.initChars != self.bodyChars: + base = "W:({}, {})".format( + charsAsStr(self.initChars), charsAsStr(self.bodyChars) + ) + else: + base = "W:({})".format(charsAsStr(self.initChars)) + + # add length specification + if self.minLen > 1 or self.maxLen != _MAX_INT: + if self.minLen == self.maxLen: + if self.minLen == 1: + return base[2:] + else: + return base + "{{{}}}".format(self.minLen) + elif self.maxLen == _MAX_INT: + return base + "{{{},...}}".format(self.minLen) + else: + return base + "{{{},{}}}".format(self.minLen, self.maxLen) + return base + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] not in self.initChars: + raise ParseException(instring, loc, self.errmsg, self) + + start = loc + loc += 1 + instrlen = len(instring) + bodychars = self.bodyChars + maxloc = start + self.maxLen + maxloc = min(maxloc, instrlen) + while loc < maxloc and instring[loc] in bodychars: + loc += 1 + + throwException = False + if loc - start < self.minLen: + throwException = True + elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: + throwException = True + elif self.asKeyword: + if ( + start > 0 + and instring[start - 1] in bodychars + or loc < instrlen + and instring[loc] in bodychars + ): + throwException = True + + if throwException: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class _WordRegex(Word): + def parseImpl(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + return loc, result.group() + + +class Char(_WordRegex): + """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, + when defining a match of any single character in a string of + characters. + """ + + def __init__( + self, + charset: str, + as_keyword: bool = False, + exclude_chars: typing.Optional[str] = None, + *, + asKeyword: bool = False, + excludeChars: typing.Optional[str] = None, + ): + asKeyword = asKeyword or as_keyword + excludeChars = excludeChars or exclude_chars + super().__init__( + charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars + ) + self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars)) + if asKeyword: + self.reString = r"\b{}\b".format(self.reString) + self.re = re.compile(self.reString) + self.re_match = self.re.match + + +class Regex(Token): + r"""Token for matching strings that match a given regular + expression. Defined with string specifying the regular expression in + a form recognized by the stdlib Python `re module `_. + If the given regex contains named groups (defined using ``(?P...)``), + these will be preserved as named :class:`ParseResults`. + + If instead of the Python stdlib ``re`` module you wish to use a different RE module + (such as the ``regex`` module), you can do so by building your ``Regex`` object with + a compiled RE that was compiled using ``regex``. + + Example:: + + realnum = Regex(r"[+-]?\d+\.\d*") + # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression + roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") + + # named fields in a regex will be returned as named results + date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)') + + # the Regex class will accept re's compiled using the regex module + import regex + parser = pp.Regex(regex.compile(r'[0-9]')) + """ + + def __init__( + self, + pattern: Any, + flags: Union[re.RegexFlag, int] = 0, + as_group_list: bool = False, + as_match: bool = False, + *, + asGroupList: bool = False, + asMatch: bool = False, + ): + """The parameters ``pattern`` and ``flags`` are passed + to the ``re.compile()`` function as-is. See the Python + `re module `_ module for an + explanation of the acceptable patterns and flags. + """ + super().__init__() + asGroupList = asGroupList or as_group_list + asMatch = asMatch or as_match + + if isinstance(pattern, str_type): + if not pattern: + raise ValueError("null string passed to Regex; use Empty() instead") + + self._re = None + self.reString = self.pattern = pattern + self.flags = flags + + elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): + self._re = pattern + self.pattern = self.reString = pattern.pattern + self.flags = flags + + else: + raise TypeError( + "Regex may only be constructed with a string or a compiled RE object" + ) + + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.asGroupList = asGroupList + self.asMatch = asMatch + if self.asGroupList: + self.parseImpl = self.parseImplAsGroupList + if self.asMatch: + self.parseImpl = self.parseImplAsMatch + + @cached_property + def re(self): + if self._re: + return self._re + else: + try: + return re.compile(self.pattern, self.flags) + except re.error: + raise ValueError( + "invalid pattern ({!r}) passed to Regex".format(self.pattern) + ) + + @cached_property + def re_match(self): + return self.re.match + + @cached_property + def mayReturnEmpty(self): + return self.re_match("") is not None + + def _generateDefaultName(self): + return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) + + def parseImpl(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = ParseResults(result.group()) + d = result.groupdict() + if d: + for k, v in d.items(): + ret[k] = v + return loc, ret + + def parseImplAsGroupList(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result.groups() + return loc, ret + + def parseImplAsMatch(self, instring, loc, doActions=True): + result = self.re_match(instring, loc) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result + return loc, ret + + def sub(self, repl: str) -> ParserElement: + r""" + Return :class:`Regex` with an attached parse action to transform the parsed + result as if called using `re.sub(expr, repl, string) `_. + + Example:: + + make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2") + print(make_html.transform_string("h1:main title:")) + # prints "

main title

" + """ + if self.asGroupList: + raise TypeError("cannot use sub() with Regex(asGroupList=True)") + + if self.asMatch and callable(repl): + raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)") + + if self.asMatch: + + def pa(tokens): + return tokens[0].expand(repl) + + else: + + def pa(tokens): + return self.re.sub(repl, tokens[0]) + + return self.add_parse_action(pa) + + +class QuotedString(Token): + r""" + Token for matching strings that are delimited by quoting characters. + + Defined with the following parameters: + + - ``quote_char`` - string of one or more characters defining the + quote delimiting string + - ``esc_char`` - character to re_escape quotes, typically backslash + (default= ``None``) + - ``esc_quote`` - special quote sequence to re_escape an embedded quote + string (such as SQL's ``""`` to re_escape an embedded ``"``) + (default= ``None``) + - ``multiline`` - boolean indicating whether quotes can span + multiple lines (default= ``False``) + - ``unquote_results`` - boolean indicating whether the matched text + should be unquoted (default= ``True``) + - ``end_quote_char`` - string of one or more characters defining the + end of the quote delimited string (default= ``None`` => same as + quote_char) + - ``convert_whitespace_escapes`` - convert escaped whitespace + (``'\t'``, ``'\n'``, etc.) to actual whitespace + (default= ``True``) + + Example:: + + qs = QuotedString('"') + print(qs.search_string('lsjdf "This is the quote" sldjf')) + complex_qs = QuotedString('{{', end_quote_char='}}') + print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) + sql_qs = QuotedString('"', esc_quote='""') + print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) + + prints:: + + [['This is the quote']] + [['This is the "quote"']] + [['This is the quote with "embedded" quotes']] + """ + ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")) + + def __init__( + self, + quote_char: str = "", + esc_char: typing.Optional[str] = None, + esc_quote: typing.Optional[str] = None, + multiline: bool = False, + unquote_results: bool = True, + end_quote_char: typing.Optional[str] = None, + convert_whitespace_escapes: bool = True, + *, + quoteChar: str = "", + escChar: typing.Optional[str] = None, + escQuote: typing.Optional[str] = None, + unquoteResults: bool = True, + endQuoteChar: typing.Optional[str] = None, + convertWhitespaceEscapes: bool = True, + ): + super().__init__() + escChar = escChar or esc_char + escQuote = escQuote or esc_quote + unquoteResults = unquoteResults and unquote_results + endQuoteChar = endQuoteChar or end_quote_char + convertWhitespaceEscapes = ( + convertWhitespaceEscapes and convert_whitespace_escapes + ) + quote_char = quoteChar or quote_char + + # remove white space from quote chars - wont work anyway + quote_char = quote_char.strip() + if not quote_char: + raise ValueError("quote_char cannot be the empty string") + + if endQuoteChar is None: + endQuoteChar = quote_char + else: + endQuoteChar = endQuoteChar.strip() + if not endQuoteChar: + raise ValueError("endQuoteChar cannot be the empty string") + + self.quoteChar = quote_char + self.quoteCharLen = len(quote_char) + self.firstQuoteChar = quote_char[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + self.convertWhitespaceEscapes = convertWhitespaceEscapes + + sep = "" + inner_pattern = "" + + if escQuote: + inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote)) + sep = "|" + + if escChar: + inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar)) + sep = "|" + self.escCharReplacePattern = re.escape(self.escChar) + "(.)" + + if len(self.endQuoteChar) > 1: + inner_pattern += ( + "{}(?:".format(sep) + + "|".join( + "(?:{}(?!{}))".format( + re.escape(self.endQuoteChar[:i]), + re.escape(self.endQuoteChar[i:]), + ) + for i in range(len(self.endQuoteChar) - 1, 0, -1) + ) + + ")" + ) + sep = "|" + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + inner_pattern += r"{}(?:[^{}{}])".format( + sep, + _escape_regex_range_chars(self.endQuoteChar[0]), + (_escape_regex_range_chars(escChar) if escChar is not None else ""), + ) + else: + self.flags = 0 + inner_pattern += r"{}(?:[^{}\n\r{}])".format( + sep, + _escape_regex_range_chars(self.endQuoteChar[0]), + (_escape_regex_range_chars(escChar) if escChar is not None else ""), + ) + + self.pattern = "".join( + [ + re.escape(self.quoteChar), + "(?:", + inner_pattern, + ")*", + re.escape(self.endQuoteChar), + ] + ) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + self.re_match = self.re.match + except re.error: + raise ValueError( + "invalid pattern {!r} passed to Regex".format(self.pattern) + ) + + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def _generateDefaultName(self): + if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): + return "string enclosed in {!r}".format(self.quoteChar) + + return "quoted string, starting with {} ending with {}".format( + self.quoteChar, self.endQuoteChar + ) + + def parseImpl(self, instring, loc, doActions=True): + result = ( + instring[loc] == self.firstQuoteChar + and self.re_match(instring, loc) + or None + ) + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen : -self.endQuoteCharLen] + + if isinstance(ret, str_type): + # replace escaped whitespace + if "\\" in ret and self.convertWhitespaceEscapes: + for wslit, wschar in self.ws_map: + ret = ret.replace(wslit, wschar) + + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given + set (will include whitespace in matched characters if not listed in + the provided exclusion set - see example). Defined with string + containing all disallowed characters, and an optional minimum, + maximum, and/or exact length. The default value for ``min`` is + 1 (a minimum value < 1 is not valid); the default values for + ``max`` and ``exact`` are 0, meaning no maximum or exact + length restriction. + + Example:: + + # define a comma-separated-value as anything that is not a ',' + csv_value = CharsNotIn(',') + print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) + + prints:: + + ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] + """ + + def __init__( + self, + not_chars: str = "", + min: int = 1, + max: int = 0, + exact: int = 0, + *, + notChars: str = "", + ): + super().__init__() + self.skipWhitespace = False + self.notChars = not_chars or notChars + self.notCharsSet = set(self.notChars) + + if min < 1: + raise ValueError( + "cannot specify a minimum length < 1; use " + "Opt(CharsNotIn()) if zero-length char group is permitted" + ) + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = self.minLen == 0 + self.mayIndexError = False + + def _generateDefaultName(self): + not_chars_str = _collapse_string_to_ranges(self.notChars) + if len(not_chars_str) > 16: + return "!W:({}...)".format(self.notChars[: 16 - 3]) + else: + return "!W:({})".format(self.notChars) + + def parseImpl(self, instring, loc, doActions=True): + notchars = self.notCharsSet + if instring[loc] in notchars: + raise ParseException(instring, loc, self.errmsg, self) + + start = loc + loc += 1 + maxlen = min(start + self.maxLen, len(instring)) + while loc < maxlen and instring[loc] not in notchars: + loc += 1 + + if loc - start < self.minLen: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class White(Token): + """Special matching class for matching whitespace. Normally, + whitespace is ignored by pyparsing grammars. This class is included + when some whitespace structures are significant. Define with + a string containing the whitespace characters to be matched; default + is ``" \\t\\r\\n"``. Also takes optional ``min``, + ``max``, and ``exact`` arguments, as defined for the + :class:`Word` class. + """ + + whiteStrs = { + " ": "", + "\t": "", + "\n": "", + "\r": "", + "\f": "", + "\u00A0": "", + "\u1680": "", + "\u180E": "", + "\u2000": "", + "\u2001": "", + "\u2002": "", + "\u2003": "", + "\u2004": "", + "\u2005": "", + "\u2006": "", + "\u2007": "", + "\u2008": "", + "\u2009": "", + "\u200A": "", + "\u200B": "", + "\u202F": "", + "\u205F": "", + "\u3000": "", + } + + def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): + super().__init__() + self.matchWhite = ws + self.set_whitespace_chars( + "".join(c for c in self.whiteStrs if c not in self.matchWhite), + copy_defaults=True, + ) + # self.leave_whitespace() + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def _generateDefaultName(self): + return "".join(White.whiteStrs[c] for c in self.matchWhite) + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] not in self.matchWhite: + raise ParseException(instring, loc, self.errmsg, self) + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min(maxloc, len(instring)) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class PositionToken(Token): + def __init__(self): + super().__init__() + self.mayReturnEmpty = True + self.mayIndexError = False + + +class GoToColumn(PositionToken): + """Token to advance to a specific column of input text; useful for + tabular report scraping. + """ + + def __init__(self, colno: int): + super().__init__() + self.col = colno + + def preParse(self, instring, loc): + if col(loc, instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables(instring, loc) + while ( + loc < instrlen + and instring[loc].isspace() + and col(loc, instring) != self.col + ): + loc += 1 + return loc + + def parseImpl(self, instring, loc, doActions=True): + thiscol = col(loc, instring) + if thiscol > self.col: + raise ParseException(instring, loc, "Text not in expected column", self) + newloc = loc + self.col - thiscol + ret = instring[loc:newloc] + return newloc, ret + + +class LineStart(PositionToken): + r"""Matches if current position is at the beginning of a line within + the parse string + + Example:: + + test = '''\ + AAA this line + AAA and this line + AAA but not this one + B AAA and definitely not this one + ''' + + for t in (LineStart() + 'AAA' + restOfLine).search_string(test): + print(t) + + prints:: + + ['AAA', ' this line'] + ['AAA', ' and this line'] + + """ + + def __init__(self): + super().__init__() + self.leave_whitespace() + self.orig_whiteChars = set() | self.whiteChars + self.whiteChars.discard("\n") + self.skipper = Empty().set_whitespace_chars(self.whiteChars) + self.errmsg = "Expected start of line" + + def preParse(self, instring, loc): + if loc == 0: + return loc + else: + ret = self.skipper.preParse(instring, loc) + if "\n" in self.orig_whiteChars: + while instring[ret : ret + 1] == "\n": + ret = self.skipper.preParse(instring, ret + 1) + return ret + + def parseImpl(self, instring, loc, doActions=True): + if col(loc, instring) == 1: + return loc, [] + raise ParseException(instring, loc, self.errmsg, self) + + +class LineEnd(PositionToken): + """Matches if current position is at the end of a line within the + parse string + """ + + def __init__(self): + super().__init__() + self.whiteChars.discard("\n") + self.set_whitespace_chars(self.whiteChars, copy_defaults=False) + self.errmsg = "Expected end of line" + + def parseImpl(self, instring, loc, doActions=True): + if loc < len(instring): + if instring[loc] == "\n": + return loc + 1, "\n" + else: + raise ParseException(instring, loc, self.errmsg, self) + elif loc == len(instring): + return loc + 1, [] + else: + raise ParseException(instring, loc, self.errmsg, self) + + +class StringStart(PositionToken): + """Matches if current position is at the beginning of the parse + string + """ + + def __init__(self): + super().__init__() + self.errmsg = "Expected start of text" + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + # see if entire string up to here is just whitespace and ignoreables + if loc != self.preParse(instring, 0): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class StringEnd(PositionToken): + """ + Matches if current position is at the end of the parse string + """ + + def __init__(self): + super().__init__() + self.errmsg = "Expected end of text" + + def parseImpl(self, instring, loc, doActions=True): + if loc < len(instring): + raise ParseException(instring, loc, self.errmsg, self) + elif loc == len(instring): + return loc + 1, [] + elif loc > len(instring): + return loc, [] + else: + raise ParseException(instring, loc, self.errmsg, self) + + +class WordStart(PositionToken): + """Matches if the current position is at the beginning of a + :class:`Word`, and is not preceded by any character in a given + set of ``word_chars`` (default= ``printables``). To emulate the + ``\b`` behavior of regular expressions, use + ``WordStart(alphanums)``. ``WordStart`` will also match at + the beginning of the string being parsed, or at the beginning of + a line. + """ + + def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + wordChars = word_chars if wordChars == printables else wordChars + super().__init__() + self.wordChars = set(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + if ( + instring[loc - 1] in self.wordChars + or instring[loc] not in self.wordChars + ): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class WordEnd(PositionToken): + """Matches if the current position is at the end of a :class:`Word`, + and is not followed by any character in a given set of ``word_chars`` + (default= ``printables``). To emulate the ``\b`` behavior of + regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` + will also match at the end of the string being parsed, or at the end + of a line. + """ + + def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + wordChars = word_chars if wordChars == printables else wordChars + super().__init__() + self.wordChars = set(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True): + instrlen = len(instring) + if instrlen > 0 and loc < instrlen: + if ( + instring[loc] in self.wordChars + or instring[loc - 1] not in self.wordChars + ): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class ParseExpression(ParserElement): + """Abstract subclass of ParserElement, for combining and + post-processing parsed tokens. + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + super().__init__(savelist) + self.exprs: List[ParserElement] + if isinstance(exprs, _generatorType): + exprs = list(exprs) + + if isinstance(exprs, str_type): + self.exprs = [self._literalStringClass(exprs)] + elif isinstance(exprs, ParserElement): + self.exprs = [exprs] + elif isinstance(exprs, Iterable): + exprs = list(exprs) + # if sequence of strings provided, wrap with Literal + if any(isinstance(expr, str_type) for expr in exprs): + exprs = ( + self._literalStringClass(e) if isinstance(e, str_type) else e + for e in exprs + ) + self.exprs = list(exprs) + else: + try: + self.exprs = list(exprs) + except TypeError: + self.exprs = [exprs] + self.callPreparse = False + + def recurse(self) -> Sequence[ParserElement]: + return self.exprs[:] + + def append(self, other) -> ParserElement: + self.exprs.append(other) + self._defaultName = None + return self + + def leave_whitespace(self, recursive: bool = True) -> ParserElement: + """ + Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on + all contained expressions. + """ + super().leave_whitespace(recursive) + + if recursive: + self.exprs = [e.copy() for e in self.exprs] + for e in self.exprs: + e.leave_whitespace(recursive) + return self + + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: + """ + Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on + all contained expressions. + """ + super().ignore_whitespace(recursive) + if recursive: + self.exprs = [e.copy() for e in self.exprs] + for e in self.exprs: + e.ignore_whitespace(recursive) + return self + + def ignore(self, other) -> ParserElement: + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + super().ignore(other) + for e in self.exprs: + e.ignore(self.ignoreExprs[-1]) + else: + super().ignore(other) + for e in self.exprs: + e.ignore(self.ignoreExprs[-1]) + return self + + def _generateDefaultName(self): + return "{}:({})".format(self.__class__.__name__, str(self.exprs)) + + def streamline(self) -> ParserElement: + if self.streamlined: + return self + + super().streamline() + + for e in self.exprs: + e.streamline() + + # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` + # but only if there are no parse actions or resultsNames on the nested And's + # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) + if len(self.exprs) == 2: + other = self.exprs[0] + if ( + isinstance(other, self.__class__) + and not other.parseAction + and other.resultsName is None + and not other.debug + ): + self.exprs = other.exprs[:] + [self.exprs[1]] + self._defaultName = None + self.mayReturnEmpty |= other.mayReturnEmpty + self.mayIndexError |= other.mayIndexError + + other = self.exprs[-1] + if ( + isinstance(other, self.__class__) + and not other.parseAction + and other.resultsName is None + and not other.debug + ): + self.exprs = self.exprs[:-1] + other.exprs[:] + self._defaultName = None + self.mayReturnEmpty |= other.mayReturnEmpty + self.mayIndexError |= other.mayIndexError + + self.errmsg = "Expected " + str(self) + + return self + + def validate(self, validateTrace=None) -> None: + tmp = (validateTrace if validateTrace is not None else [])[:] + [self] + for e in self.exprs: + e.validate(tmp) + self._checkRecursion([]) + + def copy(self) -> ParserElement: + ret = super().copy() + ret.exprs = [e.copy() for e in self.exprs] + return ret + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_ungrouped_named_tokens_in_collection + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in self.suppress_warnings_ + ): + for e in self.exprs: + if ( + isinstance(e, ParserElement) + and e.resultsName + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in e.suppress_warnings_ + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "collides with {!r} on contained expression".format( + "warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class And(ParseExpression): + """ + Requires all given :class:`ParseExpression` s to be found in the given order. + Expressions may be separated by whitespace. + May be constructed using the ``'+'`` operator. + May also be constructed using the ``'-'`` operator, which will + suppress backtracking. + + Example:: + + integer = Word(nums) + name_expr = Word(alphas)[1, ...] + + expr = And([integer("id"), name_expr("name"), integer("age")]) + # more easily written as: + expr = integer("id") + name_expr("name") + integer("age") + """ + + class _ErrorStop(Empty): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.leave_whitespace() + + def _generateDefaultName(self): + return "-" + + def __init__( + self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True + ): + exprs: List[ParserElement] = list(exprs_arg) + if exprs and Ellipsis in exprs: + tmp = [] + for i, expr in enumerate(exprs): + if expr is Ellipsis: + if i < len(exprs) - 1: + skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1] + tmp.append(SkipTo(skipto_arg)("_skipped*")) + else: + raise Exception( + "cannot construct And with sequence ending in ..." + ) + else: + tmp.append(expr) + exprs[:] = tmp + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + if not isinstance(self.exprs[0], White): + self.set_whitespace_chars( + self.exprs[0].whiteChars, + copy_defaults=self.exprs[0].copyDefaultWhiteChars, + ) + self.skipWhitespace = self.exprs[0].skipWhitespace + else: + self.skipWhitespace = False + else: + self.mayReturnEmpty = True + self.callPreparse = True + + def streamline(self) -> ParserElement: + # collapse any _PendingSkip's + if self.exprs: + if any( + isinstance(e, ParseExpression) + and e.exprs + and isinstance(e.exprs[-1], _PendingSkip) + for e in self.exprs[:-1] + ): + for i, e in enumerate(self.exprs[:-1]): + if e is None: + continue + if ( + isinstance(e, ParseExpression) + and e.exprs + and isinstance(e.exprs[-1], _PendingSkip) + ): + e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] + self.exprs[i + 1] = None + self.exprs = [e for e in self.exprs if e is not None] + + super().streamline() + + # link any IndentedBlocks to the prior expression + for prev, cur in zip(self.exprs, self.exprs[1:]): + # traverse cur or any first embedded expr of cur looking for an IndentedBlock + # (but watch out for recursive grammar) + seen = set() + while cur: + if id(cur) in seen: + break + seen.add(id(cur)) + if isinstance(cur, IndentedBlock): + prev.add_parse_action( + lambda s, l, t, cur_=cur: setattr( + cur_, "parent_anchor", col(l, s) + ) + ) + break + subs = cur.recurse() + cur = next(iter(subs), None) + + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + return self + + def parseImpl(self, instring, loc, doActions=True): + # pass False as callPreParse arg to _parse for first element, since we already + # pre-parsed the string as part of our And pre-parsing + loc, resultlist = self.exprs[0]._parse( + instring, loc, doActions, callPreParse=False + ) + errorStop = False + for e in self.exprs[1:]: + # if isinstance(e, And._ErrorStop): + if type(e) is And._ErrorStop: + errorStop = True + continue + if errorStop: + try: + loc, exprtokens = e._parse(instring, loc, doActions) + except ParseSyntaxException: + raise + except ParseBaseException as pe: + pe.__traceback__ = None + raise ParseSyntaxException._from_exception(pe) + except IndexError: + raise ParseSyntaxException( + instring, len(instring), self.errmsg, self + ) + else: + loc, exprtokens = e._parse(instring, loc, doActions) + if exprtokens or exprtokens.haskeys(): + resultlist += exprtokens + return loc, resultlist + + def __iadd__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + return self.append(other) # And([self, other]) + + def _checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.exprs: + e._checkRecursion(subRecCheckList) + if not e.mayReturnEmpty: + break + + def _generateDefaultName(self): + inner = " ".join(str(e) for e in self.exprs) + # strip off redundant inner {}'s + while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": + inner = inner[1:-1] + return "{" + inner + "}" + + +class Or(ParseExpression): + """Requires that at least one :class:`ParseExpression` is found. If + two expressions match, the expression that matches the longest + string will be used. May be constructed using the ``'^'`` + operator. + + Example:: + + # construct Or using '^' operator + + number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) + print(number.search_string("123 3.1416 789")) + + prints:: + + [['123'], ['3.1416'], ['789']] + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) + else: + self.mayReturnEmpty = True + + def streamline(self) -> ParserElement: + super().streamline() + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.saveAsList = any(e.saveAsList for e in self.exprs) + self.skipWhitespace = all( + e.skipWhitespace and not isinstance(e, White) for e in self.exprs + ) + else: + self.saveAsList = False + return self + + def parseImpl(self, instring, loc, doActions=True): + maxExcLoc = -1 + maxException = None + matches = [] + fatals = [] + if all(e.callPreparse for e in self.exprs): + loc = self.preParse(instring, loc) + for e in self.exprs: + try: + loc2 = e.try_parse(instring, loc, raise_fatal=True) + except ParseFatalException as pfe: + pfe.__traceback__ = None + pfe.parserElement = e + fatals.append(pfe) + maxException = None + maxExcLoc = -1 + except ParseException as err: + if not fatals: + err.__traceback__ = None + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException( + instring, len(instring), e.errmsg, self + ) + maxExcLoc = len(instring) + else: + # save match among all matches, to retry longest to shortest + matches.append((loc2, e)) + + if matches: + # re-evaluate all matches in descending order of length of match, in case attached actions + # might change whether or how much they match of the input. + matches.sort(key=itemgetter(0), reverse=True) + + if not doActions: + # no further conditions or parse actions to change the selection of + # alternative, so the first match will be the best match + best_expr = matches[0][1] + return best_expr._parse(instring, loc, doActions) + + longest = -1, None + for loc1, expr1 in matches: + if loc1 <= longest[0]: + # already have a longer match than this one will deliver, we are done + return longest + + try: + loc2, toks = expr1._parse(instring, loc, doActions) + except ParseException as err: + err.__traceback__ = None + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + else: + if loc2 >= loc1: + return loc2, toks + # didn't match as much as before + elif loc2 > longest[0]: + longest = loc2, toks + + if longest != (-1, None): + return longest + + if fatals: + if len(fatals) > 1: + fatals.sort(key=lambda e: -e.loc) + if fatals[0].loc == fatals[1].loc: + fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) + max_fatal = fatals[0] + raise max_fatal + + if maxException is not None: + maxException.msg = self.errmsg + raise maxException + else: + raise ParseException( + instring, loc, "no defined alternatives to match", self + ) + + def __ixor__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + return self.append(other) # Or([self, other]) + + def _generateDefaultName(self): + return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_multiple_tokens_in_named_alternation + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in self.suppress_warnings_ + ): + if any( + isinstance(e, And) + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in e.suppress_warnings_ + for e in self.exprs + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "will return a list of all parsed tokens in an And alternative, " + "in prior versions only the first token was returned; enclose " + "contained argument in Group".format( + "warn_multiple_tokens_in_named_alternation", + name, + type(self).__name__, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + +class MatchFirst(ParseExpression): + """Requires that at least one :class:`ParseExpression` is found. If + more than one expression matches, the first one listed is the one that will + match. May be constructed using the ``'|'`` operator. + + Example:: + + # construct MatchFirst using '|' operator + + # watch the order of expressions to match + number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) + print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] + + # put more selective expression first + number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) + print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) + else: + self.mayReturnEmpty = True + + def streamline(self) -> ParserElement: + if self.streamlined: + return self + + super().streamline() + if self.exprs: + self.saveAsList = any(e.saveAsList for e in self.exprs) + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all( + e.skipWhitespace and not isinstance(e, White) for e in self.exprs + ) + else: + self.saveAsList = False + self.mayReturnEmpty = True + return self + + def parseImpl(self, instring, loc, doActions=True): + maxExcLoc = -1 + maxException = None + + for e in self.exprs: + try: + return e._parse( + instring, + loc, + doActions, + ) + except ParseFatalException as pfe: + pfe.__traceback__ = None + pfe.parserElement = e + raise + except ParseException as err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException( + instring, len(instring), e.errmsg, self + ) + maxExcLoc = len(instring) + + if maxException is not None: + maxException.msg = self.errmsg + raise maxException + else: + raise ParseException( + instring, loc, "no defined alternatives to match", self + ) + + def __ior__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + return self.append(other) # MatchFirst([self, other]) + + def _generateDefaultName(self): + return "{" + " | ".join(str(e) for e in self.exprs) + "}" + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_multiple_tokens_in_named_alternation + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in self.suppress_warnings_ + ): + if any( + isinstance(e, And) + and Diagnostics.warn_multiple_tokens_in_named_alternation + not in e.suppress_warnings_ + for e in self.exprs + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "will return a list of all parsed tokens in an And alternative, " + "in prior versions only the first token was returned; enclose " + "contained argument in Group".format( + "warn_multiple_tokens_in_named_alternation", + name, + type(self).__name__, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + +class Each(ParseExpression): + """Requires all given :class:`ParseExpression` s to be found, but in + any order. Expressions may be separated by whitespace. + + May be constructed using the ``'&'`` operator. + + Example:: + + color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") + shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") + integer = Word(nums) + shape_attr = "shape:" + shape_type("shape") + posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") + color_attr = "color:" + color("color") + size_attr = "size:" + integer("size") + + # use Each (using operator '&') to accept attributes in any order + # (shape and posn are required, color and size are optional) + shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) + + shape_spec.run_tests(''' + shape: SQUARE color: BLACK posn: 100, 120 + shape: CIRCLE size: 50 color: BLUE posn: 50,80 + color:GREEN size:20 shape:TRIANGLE posn:20,40 + ''' + ) + + prints:: + + shape: SQUARE color: BLACK posn: 100, 120 + ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] + - color: BLACK + - posn: ['100', ',', '120'] + - x: 100 + - y: 120 + - shape: SQUARE + + + shape: CIRCLE size: 50 color: BLUE posn: 50,80 + ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] + - color: BLUE + - posn: ['50', ',', '80'] + - x: 50 + - y: 80 + - shape: CIRCLE + - size: 50 + + + color: GREEN size: 20 shape: TRIANGLE posn: 20,40 + ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] + - color: GREEN + - posn: ['20', ',', '40'] + - x: 20 + - y: 40 + - shape: TRIANGLE + - size: 20 + """ + + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): + super().__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + else: + self.mayReturnEmpty = True + self.skipWhitespace = True + self.initExprGroups = True + self.saveAsList = True + + def streamline(self) -> ParserElement: + super().streamline() + if self.exprs: + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + else: + self.mayReturnEmpty = True + return self + + def parseImpl(self, instring, loc, doActions=True): + if self.initExprGroups: + self.opt1map = dict( + (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) + ) + opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] + opt2 = [ + e + for e in self.exprs + if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) + ] + self.optionals = opt1 + opt2 + self.multioptionals = [ + e.expr.set_results_name(e.resultsName, list_all_matches=True) + for e in self.exprs + if isinstance(e, _MultipleMatch) + ] + self.multirequired = [ + e.expr.set_results_name(e.resultsName, list_all_matches=True) + for e in self.exprs + if isinstance(e, OneOrMore) + ] + self.required = [ + e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) + ] + self.required += self.multirequired + self.initExprGroups = False + + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + multis = self.multioptionals[:] + matchOrder = [] + + keepMatching = True + failed = [] + fatals = [] + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + multis + failed.clear() + fatals.clear() + for e in tmpExprs: + try: + tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) + except ParseFatalException as pfe: + pfe.__traceback__ = None + pfe.parserElement = e + fatals.append(pfe) + failed.append(e) + except ParseException: + failed.append(e) + else: + matchOrder.append(self.opt1map.get(id(e), e)) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + # look for any ParseFatalExceptions + if fatals: + if len(fatals) > 1: + fatals.sort(key=lambda e: -e.loc) + if fatals[0].loc == fatals[1].loc: + fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) + max_fatal = fatals[0] + raise max_fatal + + if tmpReqd: + missing = ", ".join([str(e) for e in tmpReqd]) + raise ParseException( + instring, + loc, + "Missing one or more required elements ({})".format(missing), + ) + + # add any unmatched Opts, in case they have default values defined + matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] + + total_results = ParseResults([]) + for e in matchOrder: + loc, results = e._parse(instring, loc, doActions) + total_results += results + + return loc, total_results + + def _generateDefaultName(self): + return "{" + " & ".join(str(e) for e in self.exprs) + "}" + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of :class:`ParserElement`, for combining and + post-processing parsed tokens. + """ + + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + super().__init__(savelist) + if isinstance(expr, str_type): + if issubclass(self._literalStringClass, Token): + expr = self._literalStringClass(expr) + elif issubclass(type(self), self._literalStringClass): + expr = Literal(expr) + else: + expr = self._literalStringClass(Literal(expr)) + self.expr = expr + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.set_whitespace_chars( + expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars + ) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def recurse(self) -> Sequence[ParserElement]: + return [self.expr] if self.expr is not None else [] + + def parseImpl(self, instring, loc, doActions=True): + if self.expr is not None: + return self.expr._parse(instring, loc, doActions, callPreParse=False) + else: + raise ParseException(instring, loc, "No expression defined", self) + + def leave_whitespace(self, recursive: bool = True) -> ParserElement: + super().leave_whitespace(recursive) + + if recursive: + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leave_whitespace(recursive) + return self + + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: + super().ignore_whitespace(recursive) + + if recursive: + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.ignore_whitespace(recursive) + return self + + def ignore(self, other) -> ParserElement: + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + super().ignore(other) + if self.expr is not None: + self.expr.ignore(self.ignoreExprs[-1]) + else: + super().ignore(other) + if self.expr is not None: + self.expr.ignore(self.ignoreExprs[-1]) + return self + + def streamline(self) -> ParserElement: + super().streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def _checkRecursion(self, parseElementList): + if self in parseElementList: + raise RecursiveGrammarException(parseElementList + [self]) + subRecCheckList = parseElementList[:] + [self] + if self.expr is not None: + self.expr._checkRecursion(subRecCheckList) + + def validate(self, validateTrace=None) -> None: + if validateTrace is None: + validateTrace = [] + tmp = validateTrace[:] + [self] + if self.expr is not None: + self.expr.validate(tmp) + self._checkRecursion([]) + + def _generateDefaultName(self): + return "{}:({})".format(self.__class__.__name__, str(self.expr)) + + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class IndentedBlock(ParseElementEnhance): + """ + Expression to match one or more expressions at a given indentation level. + Useful for parsing text where structure is implied by indentation (like Python source code). + """ + + class _Indent(Empty): + def __init__(self, ref_col: int): + super().__init__() + self.errmsg = "expected indent at column {}".format(ref_col) + self.add_condition(lambda s, l, t: col(l, s) == ref_col) + + class _IndentGreater(Empty): + def __init__(self, ref_col: int): + super().__init__() + self.errmsg = "expected indent at column greater than {}".format(ref_col) + self.add_condition(lambda s, l, t: col(l, s) > ref_col) + + def __init__( + self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True + ): + super().__init__(expr, savelist=True) + # if recursive: + # raise NotImplementedError("IndentedBlock with recursive is not implemented") + self._recursive = recursive + self._grouped = grouped + self.parent_anchor = 1 + + def parseImpl(self, instring, loc, doActions=True): + # advance parse position to non-whitespace by using an Empty() + # this should be the column to be used for all subsequent indented lines + anchor_loc = Empty().preParse(instring, loc) + + # see if self.expr matches at the current location - if not it will raise an exception + # and no further work is necessary + self.expr.try_parse(instring, anchor_loc, doActions) + + indent_col = col(anchor_loc, instring) + peer_detect_expr = self._Indent(indent_col) + + inner_expr = Empty() + peer_detect_expr + self.expr + if self._recursive: + sub_indent = self._IndentGreater(indent_col) + nested_block = IndentedBlock( + self.expr, recursive=self._recursive, grouped=self._grouped + ) + nested_block.set_debug(self.debug) + nested_block.parent_anchor = indent_col + inner_expr += Opt(sub_indent + nested_block) + + inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") + block = OneOrMore(inner_expr) + + trailing_undent = self._Indent(self.parent_anchor) | StringEnd() + + if self._grouped: + wrapper = Group + else: + wrapper = lambda expr: expr + return (wrapper(block) + Optional(trailing_undent)).parseImpl( + instring, anchor_loc, doActions + ) + + +class AtStringStart(ParseElementEnhance): + """Matches if expression matches at the beginning of the parse + string:: + + AtStringStart(Word(nums)).parse_string("123") + # prints ["123"] + + AtStringStart(Word(nums)).parse_string(" 123") + # raises ParseException + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + self.callPreparse = False + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + raise ParseException(instring, loc, "not found at string start") + return super().parseImpl(instring, loc, doActions) + + +class AtLineStart(ParseElementEnhance): + r"""Matches if an expression matches at the beginning of a line within + the parse string + + Example:: + + test = '''\ + AAA this line + AAA and this line + AAA but not this one + B AAA and definitely not this one + ''' + + for t in (AtLineStart('AAA') + restOfLine).search_string(test): + print(t) + + prints:: + + ['AAA', ' this line'] + ['AAA', ' and this line'] + + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + self.callPreparse = False + + def parseImpl(self, instring, loc, doActions=True): + if col(loc, instring) != 1: + raise ParseException(instring, loc, "not found at line start") + return super().parseImpl(instring, loc, doActions) + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. + ``FollowedBy`` does *not* advance the parsing position within + the input string, it only verifies that the specified parse + expression matches at the current position. ``FollowedBy`` + always returns a null token list. If any results names are defined + in the lookahead expression, those *will* be returned for access by + name. + + Example:: + + # use FollowedBy to match a label only if it is followed by a ':' + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + + attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() + + prints:: + + [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + # by using self._expr.parse and deleting the contents of the returned ParseResults list + # we keep any named results that were defined in the FollowedBy expression + _, ret = self.expr._parse(instring, loc, doActions=doActions) + del ret[:] + + return loc, ret + + +class PrecededBy(ParseElementEnhance): + """Lookbehind matching of the given parse expression. + ``PrecededBy`` does not advance the parsing position within the + input string, it only verifies that the specified parse expression + matches prior to the current position. ``PrecededBy`` always + returns a null token list, but if a results name is defined on the + given expression, it is returned. + + Parameters: + + - expr - expression that must match prior to the current parse + location + - retreat - (default= ``None``) - (int) maximum number of characters + to lookbehind prior to the current parse location + + If the lookbehind expression is a string, :class:`Literal`, + :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` + with a specified exact or maximum length, then the retreat + parameter is not required. Otherwise, retreat must be specified to + give a maximum number of characters to look back from + the current parse position for a lookbehind match. + + Example:: + + # VB-style variable names with type prefixes + int_var = PrecededBy("#") + pyparsing_common.identifier + str_var = PrecededBy("$") + pyparsing_common.identifier + + """ + + def __init__( + self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None + ): + super().__init__(expr) + self.expr = self.expr().leave_whitespace() + self.mayReturnEmpty = True + self.mayIndexError = False + self.exact = False + if isinstance(expr, str_type): + retreat = len(expr) + self.exact = True + elif isinstance(expr, (Literal, Keyword)): + retreat = expr.matchLen + self.exact = True + elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: + retreat = expr.maxLen + self.exact = True + elif isinstance(expr, PositionToken): + retreat = 0 + self.exact = True + self.retreat = retreat + self.errmsg = "not preceded by " + str(expr) + self.skipWhitespace = False + self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) + + def parseImpl(self, instring, loc=0, doActions=True): + if self.exact: + if loc < self.retreat: + raise ParseException(instring, loc, self.errmsg) + start = loc - self.retreat + _, ret = self.expr._parse(instring, start) + else: + # retreat specified a maximum lookbehind window, iterate + test_expr = self.expr + StringEnd() + instring_slice = instring[max(0, loc - self.retreat) : loc] + last_expr = ParseException(instring, loc, self.errmsg) + for offset in range(1, min(loc, self.retreat + 1) + 1): + try: + # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) + _, ret = test_expr._parse( + instring_slice, len(instring_slice) - offset + ) + except ParseBaseException as pbe: + last_expr = pbe + else: + break + else: + raise last_expr + return loc, ret + + +class Located(ParseElementEnhance): + """ + Decorates a returned token with its starting and ending + locations in the input string. + + This helper adds the following results names: + + - ``locn_start`` - location where matched expression begins + - ``locn_end`` - location where matched expression ends + - ``value`` - the actual parsed results + + Be careful if the input text contains ```` characters, you + may want to call :class:`ParserElement.parse_with_tabs` + + Example:: + + wd = Word(alphas) + for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): + print(match) + + prints:: + + [0, ['ljsdf'], 5] + [8, ['lksdjjf'], 15] + [18, ['lkkjj'], 23] + + """ + + def parseImpl(self, instring, loc, doActions=True): + start = loc + loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) + ret_tokens = ParseResults([start, tokens, loc]) + ret_tokens["locn_start"] = start + ret_tokens["value"] = tokens + ret_tokens["locn_end"] = loc + if self.resultsName: + # must return as a list, so that the name will be attached to the complete group + return loc, [ret_tokens] + else: + return loc, ret_tokens + + +class NotAny(ParseElementEnhance): + """ + Lookahead to disallow matching with the given parse expression. + ``NotAny`` does *not* advance the parsing position within the + input string, it only verifies that the specified parse expression + does *not* match at the current position. Also, ``NotAny`` does + *not* skip over leading whitespace. ``NotAny`` always returns + a null token list. May be constructed using the ``'~'`` operator. + + Example:: + + AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) + + # take care not to mistake keywords for identifiers + ident = ~(AND | OR | NOT) + Word(alphas) + boolean_term = Opt(NOT) + ident + + # very crude boolean expression - to support parenthesis groups and + # operation hierarchy, use infix_notation + boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] + + # integers that are followed by "." are actually floats + integer = Word(nums) + ~Char(".") + """ + + def __init__(self, expr: Union[ParserElement, str]): + super().__init__(expr) + # do NOT use self.leave_whitespace(), don't want to propagate to exprs + # self.leave_whitespace() + self.skipWhitespace = False + + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, " + str(self.expr) + + def parseImpl(self, instring, loc, doActions=True): + if self.expr.can_parse_next(instring, loc): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + def _generateDefaultName(self): + return "~{" + str(self.expr) + "}" + + +class _MultipleMatch(ParseElementEnhance): + def __init__( + self, + expr: ParserElement, + stop_on: typing.Optional[Union[ParserElement, str]] = None, + *, + stopOn: typing.Optional[Union[ParserElement, str]] = None, + ): + super().__init__(expr) + stopOn = stopOn or stop_on + self.saveAsList = True + ender = stopOn + if isinstance(ender, str_type): + ender = self._literalStringClass(ender) + self.stopOn(ender) + + def stopOn(self, ender) -> ParserElement: + if isinstance(ender, str_type): + ender = self._literalStringClass(ender) + self.not_ender = ~ender if ender is not None else None + return self + + def parseImpl(self, instring, loc, doActions=True): + self_expr_parse = self.expr._parse + self_skip_ignorables = self._skipIgnorables + check_ender = self.not_ender is not None + if check_ender: + try_not_ender = self.not_ender.tryParse + + # must be at least one (but first see if we are the stopOn sentinel; + # if so, fail) + if check_ender: + try_not_ender(instring, loc) + loc, tokens = self_expr_parse(instring, loc, doActions) + try: + hasIgnoreExprs = not not self.ignoreExprs + while 1: + if check_ender: + try_not_ender(instring, loc) + if hasIgnoreExprs: + preloc = self_skip_ignorables(instring, loc) + else: + preloc = loc + loc, tmptokens = self_expr_parse(instring, preloc, doActions) + if tmptokens or tmptokens.haskeys(): + tokens += tmptokens + except (ParseException, IndexError): + pass + + return loc, tokens + + def _setResultsName(self, name, listAllMatches=False): + if ( + __diag__.warn_ungrouped_named_tokens_in_collection + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in self.suppress_warnings_ + ): + for e in [self.expr] + self.expr.recurse(): + if ( + isinstance(e, ParserElement) + and e.resultsName + and Diagnostics.warn_ungrouped_named_tokens_in_collection + not in e.suppress_warnings_ + ): + warnings.warn( + "{}: setting results name {!r} on {} expression " + "collides with {!r} on contained expression".format( + "warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName, + ), + stacklevel=3, + ) + + return super()._setResultsName(name, listAllMatches) + + +class OneOrMore(_MultipleMatch): + """ + Repetition of one or more of the given expression. + + Parameters: + - expr - expression that must match one or more times + - stop_on - (default= ``None``) - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) + + Example:: + + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) + + text = "shape: SQUARE posn: upper left color: BLACK" + attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] + + # use stop_on attribute for OneOrMore to avoid reading label string as part of the data + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] + + # could also be written as + (attr_expr * (1,)).parse_string(text).pprint() + """ + + def _generateDefaultName(self): + return "{" + str(self.expr) + "}..." + + +class ZeroOrMore(_MultipleMatch): + """ + Optional repetition of zero or more of the given expression. + + Parameters: + - ``expr`` - expression that must match zero or more times + - ``stop_on`` - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) - (default= ``None``) + + Example: similar to :class:`OneOrMore` + """ + + def __init__( + self, + expr: ParserElement, + stop_on: typing.Optional[Union[ParserElement, str]] = None, + *, + stopOn: typing.Optional[Union[ParserElement, str]] = None, + ): + super().__init__(expr, stopOn=stopOn or stop_on) + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + try: + return super().parseImpl(instring, loc, doActions) + except (ParseException, IndexError): + return loc, ParseResults([], name=self.resultsName) + + def _generateDefaultName(self): + return "[" + str(self.expr) + "]..." + + +class _NullToken: + def __bool__(self): + return False + + def __str__(self): + return "" + + +class Opt(ParseElementEnhance): + """ + Optional matching of the given expression. + + Parameters: + - ``expr`` - expression that must match zero or more times + - ``default`` (optional) - value to be returned if the optional expression is not found. + + Example:: + + # US postal code can be a 5-digit zip, plus optional 4-digit qualifier + zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) + zip.run_tests(''' + # traditional ZIP code + 12345 + + # ZIP+4 form + 12101-0001 + + # invalid ZIP + 98765- + ''') + + prints:: + + # traditional ZIP code + 12345 + ['12345'] + + # ZIP+4 form + 12101-0001 + ['12101-0001'] + + # invalid ZIP + 98765- + ^ + FAIL: Expected end of text (at char 5), (line:1, col:6) + """ + + __optionalNotMatched = _NullToken() + + def __init__( + self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched + ): + super().__init__(expr, savelist=False) + self.saveAsList = self.expr.saveAsList + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + self_expr = self.expr + try: + loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) + except (ParseException, IndexError): + default_value = self.defaultValue + if default_value is not self.__optionalNotMatched: + if self_expr.resultsName: + tokens = ParseResults([default_value]) + tokens[self_expr.resultsName] = default_value + else: + tokens = [default_value] + else: + tokens = [] + return loc, tokens + + def _generateDefaultName(self): + inner = str(self.expr) + # strip off redundant inner {}'s + while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": + inner = inner[1:-1] + return "[" + inner + "]" + + +Optional = Opt + + +class SkipTo(ParseElementEnhance): + """ + Token for skipping over all undefined text until the matched + expression is found. + + Parameters: + - ``expr`` - target expression marking the end of the data to be skipped + - ``include`` - if ``True``, the target expression is also parsed + (the skipped text and target expression are returned as a 2-element + list) (default= ``False``). + - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and + comments) that might contain false matches to the target expression + - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be + included in the skipped test; if found before the target expression is found, + the :class:`SkipTo` is not a match + + Example:: + + report = ''' + Outstanding Issues Report - 1 Jan 2000 + + # | Severity | Description | Days Open + -----+----------+-------------------------------------------+----------- + 101 | Critical | Intermittent system crash | 6 + 94 | Cosmetic | Spelling error on Login ('log|n') | 14 + 79 | Minor | System slow when running too many reports | 47 + ''' + integer = Word(nums) + SEP = Suppress('|') + # use SkipTo to simply match everything up until the next SEP + # - ignore quoted strings, so that a '|' character inside a quoted string does not match + # - parse action will call token.strip() for each matched token, i.e., the description body + string_data = SkipTo(SEP, ignore=quoted_string) + string_data.set_parse_action(token_map(str.strip)) + ticket_expr = (integer("issue_num") + SEP + + string_data("sev") + SEP + + string_data("desc") + SEP + + integer("days_open")) + + for tkt in ticket_expr.search_string(report): + print tkt.dump() + + prints:: + + ['101', 'Critical', 'Intermittent system crash', '6'] + - days_open: '6' + - desc: 'Intermittent system crash' + - issue_num: '101' + - sev: 'Critical' + ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] + - days_open: '14' + - desc: "Spelling error on Login ('log|n')" + - issue_num: '94' + - sev: 'Cosmetic' + ['79', 'Minor', 'System slow when running too many reports', '47'] + - days_open: '47' + - desc: 'System slow when running too many reports' + - issue_num: '79' + - sev: 'Minor' + """ + + def __init__( + self, + other: Union[ParserElement, str], + include: bool = False, + ignore: bool = None, + fail_on: typing.Optional[Union[ParserElement, str]] = None, + *, + failOn: Union[ParserElement, str] = None, + ): + super().__init__(other) + failOn = failOn or fail_on + self.ignoreExpr = ignore + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.saveAsList = False + if isinstance(failOn, str_type): + self.failOn = self._literalStringClass(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for " + str(self.expr) + + def parseImpl(self, instring, loc, doActions=True): + startloc = loc + instrlen = len(instring) + self_expr_parse = self.expr._parse + self_failOn_canParseNext = ( + self.failOn.canParseNext if self.failOn is not None else None + ) + self_ignoreExpr_tryParse = ( + self.ignoreExpr.tryParse if self.ignoreExpr is not None else None + ) + + tmploc = loc + while tmploc <= instrlen: + if self_failOn_canParseNext is not None: + # break if failOn expression matches + if self_failOn_canParseNext(instring, tmploc): + break + + if self_ignoreExpr_tryParse is not None: + # advance past ignore expressions + while 1: + try: + tmploc = self_ignoreExpr_tryParse(instring, tmploc) + except ParseBaseException: + break + + try: + self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) + except (ParseException, IndexError): + # no match, advance loc in string + tmploc += 1 + else: + # matched skipto expr, done + break + + else: + # ran off the end of the input string without matching skipto expr, fail + raise ParseException(instring, loc, self.errmsg, self) + + # build up return values + loc = tmploc + skiptext = instring[startloc:loc] + skipresult = ParseResults(skiptext) + + if self.includeMatch: + loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) + skipresult += mat + + return loc, skipresult + + +class Forward(ParseElementEnhance): + """ + Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the ``Forward`` + variable using the ``'<<'`` operator. + + Note: take care when assigning to ``Forward`` not to overlook + precedence of operators. + + Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: + + fwd_expr << a | b | c + + will actually be evaluated as:: + + (fwd_expr << a) | b | c + + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the ``Forward``:: + + fwd_expr << (a | b | c) + + Converting to use the ``'<<='`` operator instead will avoid this problem. + + See :class:`ParseResults.pprint` for an example of a recursive + parser created using ``Forward``. + """ + + def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): + self.caller_frame = traceback.extract_stack(limit=2)[0] + super().__init__(other, savelist=False) + self.lshift_line = None + + def __lshift__(self, other): + if hasattr(self, "caller_frame"): + del self.caller_frame + if isinstance(other, str_type): + other = self._literalStringClass(other) + self.expr = other + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.set_whitespace_chars( + self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars + ) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + self.lshift_line = traceback.extract_stack(limit=2)[-2] + return self + + def __ilshift__(self, other): + return self << other + + def __or__(self, other): + caller_line = traceback.extract_stack(limit=2)[-2] + if ( + __diag__.warn_on_match_first_with_lshift_operator + and caller_line == self.lshift_line + and Diagnostics.warn_on_match_first_with_lshift_operator + not in self.suppress_warnings_ + ): + warnings.warn( + "using '<<' operator with '|' is probably an error, use '<<='", + stacklevel=2, + ) + ret = super().__or__(other) + return ret + + def __del__(self): + # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' + if ( + self.expr is None + and __diag__.warn_on_assignment_to_Forward + and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ + ): + warnings.warn_explicit( + "Forward defined here but no expression attached later using '<<=' or '<<'", + UserWarning, + filename=self.caller_frame.filename, + lineno=self.caller_frame.lineno, + ) + + def parseImpl(self, instring, loc, doActions=True): + if ( + self.expr is None + and __diag__.warn_on_parse_using_empty_Forward + and Diagnostics.warn_on_parse_using_empty_Forward + not in self.suppress_warnings_ + ): + # walk stack until parse_string, scan_string, search_string, or transform_string is found + parse_fns = [ + "parse_string", + "scan_string", + "search_string", + "transform_string", + ] + tb = traceback.extract_stack(limit=200) + for i, frm in enumerate(reversed(tb), start=1): + if frm.name in parse_fns: + stacklevel = i + 1 + break + else: + stacklevel = 2 + warnings.warn( + "Forward expression was never assigned a value, will not parse any input", + stacklevel=stacklevel, + ) + if not ParserElement._left_recursion_enabled: + return super().parseImpl(instring, loc, doActions) + # ## Bounded Recursion algorithm ## + # Recursion only needs to be processed at ``Forward`` elements, since they are + # the only ones that can actually refer to themselves. The general idea is + # to handle recursion stepwise: We start at no recursion, then recurse once, + # recurse twice, ..., until more recursion offers no benefit (we hit the bound). + # + # The "trick" here is that each ``Forward`` gets evaluated in two contexts + # - to *match* a specific recursion level, and + # - to *search* the bounded recursion level + # and the two run concurrently. The *search* must *match* each recursion level + # to find the best possible match. This is handled by a memo table, which + # provides the previous match to the next level match attempt. + # + # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. + # + # There is a complication since we not only *parse* but also *transform* via + # actions: We do not want to run the actions too often while expanding. Thus, + # we expand using `doActions=False` and only run `doActions=True` if the next + # recursion level is acceptable. + with ParserElement.recursion_lock: + memo = ParserElement.recursion_memos + try: + # we are parsing at a specific recursion expansion - use it as-is + prev_loc, prev_result = memo[loc, self, doActions] + if isinstance(prev_result, Exception): + raise prev_result + return prev_loc, prev_result.copy() + except KeyError: + act_key = (loc, self, True) + peek_key = (loc, self, False) + # we are searching for the best recursion expansion - keep on improving + # both `doActions` cases must be tracked separately here! + prev_loc, prev_peek = memo[peek_key] = ( + loc - 1, + ParseException( + instring, loc, "Forward recursion without base case", self + ), + ) + if doActions: + memo[act_key] = memo[peek_key] + while True: + try: + new_loc, new_peek = super().parseImpl(instring, loc, False) + except ParseException: + # we failed before getting any match – do not hide the error + if isinstance(prev_peek, Exception): + raise + new_loc, new_peek = prev_loc, prev_peek + # the match did not get better: we are done + if new_loc <= prev_loc: + if doActions: + # replace the match for doActions=False as well, + # in case the action did backtrack + prev_loc, prev_result = memo[peek_key] = memo[act_key] + del memo[peek_key], memo[act_key] + return prev_loc, prev_result.copy() + del memo[peek_key] + return prev_loc, prev_peek.copy() + # the match did get better: see if we can improve further + else: + if doActions: + try: + memo[act_key] = super().parseImpl(instring, loc, True) + except ParseException as e: + memo[peek_key] = memo[act_key] = (new_loc, e) + raise + prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek + + def leave_whitespace(self, recursive: bool = True) -> ParserElement: + self.skipWhitespace = False + return self + + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: + self.skipWhitespace = True + return self + + def streamline(self) -> ParserElement: + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate(self, validateTrace=None) -> None: + if validateTrace is None: + validateTrace = [] + + if self not in validateTrace: + tmp = validateTrace[:] + [self] + if self.expr is not None: + self.expr.validate(tmp) + self._checkRecursion([]) + + def _generateDefaultName(self): + # Avoid infinite recursion by setting a temporary _defaultName + self._defaultName = ": ..." + + # Use the string representation of main expression. + retString = "..." + try: + if self.expr is not None: + retString = str(self.expr)[:1000] + else: + retString = "None" + finally: + return self.__class__.__name__ + ": " + retString + + def copy(self) -> ParserElement: + if self.expr is not None: + return super().copy() + else: + ret = Forward() + ret <<= self + return ret + + def _setResultsName(self, name, list_all_matches=False): + if ( + __diag__.warn_name_set_on_empty_Forward + and Diagnostics.warn_name_set_on_empty_Forward + not in self.suppress_warnings_ + ): + if self.expr is None: + warnings.warn( + "{}: setting results name {!r} on {} expression " + "that has no contained expression".format( + "warn_name_set_on_empty_Forward", name, type(self).__name__ + ), + stacklevel=3, + ) + + return super()._setResultsName(name, list_all_matches) + + ignoreWhitespace = ignore_whitespace + leaveWhitespace = leave_whitespace + + +class TokenConverter(ParseElementEnhance): + """ + Abstract subclass of :class:`ParseExpression`, for converting parsed results. + """ + + def __init__(self, expr: Union[ParserElement, str], savelist=False): + super().__init__(expr) # , savelist) + self.saveAsList = False + + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the + input string; this can be disabled by specifying + ``'adjacent=False'`` in the constructor. + + Example:: + + real = Word(nums) + '.' + Word(nums) + print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] + # will also erroneously match the following + print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] + + real = Combine(Word(nums) + '.' + Word(nums)) + print(real.parse_string('3.1416')) # -> ['3.1416'] + # no match when there are internal spaces + print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) + """ + + def __init__( + self, + expr: ParserElement, + join_string: str = "", + adjacent: bool = True, + *, + joinString: typing.Optional[str] = None, + ): + super().__init__(expr) + joinString = joinString if joinString is not None else join_string + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leave_whitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + self.callPreparse = True + + def ignore(self, other) -> ParserElement: + if self.adjacent: + ParserElement.ignore(self, other) + else: + super().ignore(other) + return self + + def postParse(self, instring, loc, tokenlist): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults( + ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults + ) + + if self.resultsName and retToks.haskeys(): + return [retToks] + else: + return retToks + + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for + returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. + + The optional ``aslist`` argument when set to True will return the + parsed tokens as a Python list instead of a pyparsing ParseResults. + + Example:: + + ident = Word(alphas) + num = Word(nums) + term = ident | num + func = ident + Opt(delimited_list(term)) + print(func.parse_string("fn a, b, 100")) + # -> ['fn', 'a', 'b', '100'] + + func = ident + Group(Opt(delimited_list(term))) + print(func.parse_string("fn a, b, 100")) + # -> ['fn', ['a', 'b', '100']] + """ + + def __init__(self, expr: ParserElement, aslist: bool = False): + super().__init__(expr) + self.saveAsList = True + self._asPythonList = aslist + + def postParse(self, instring, loc, tokenlist): + if self._asPythonList: + return ParseResults.List( + tokenlist.asList() + if isinstance(tokenlist, ParseResults) + else list(tokenlist) + ) + else: + return [tokenlist] + + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also + as a dictionary. Each element can also be referenced using the first + token in the expression as its key. Useful for tabular report + scraping when the first column can be used as a item key. + + The optional ``asdict`` argument when set to True will return the + parsed tokens as a Python dict instead of a pyparsing ParseResults. + + Example:: + + data_word = Word(alphas) + label = data_word + FollowedBy(':') + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + + # print attributes as plain groups + print(attr_expr[1, ...].parse_string(text).dump()) + + # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names + result = Dict(Group(attr_expr)[1, ...]).parse_string(text) + print(result.dump()) + + # access named fields as dict entries, or output as dict + print(result['shape']) + print(result.as_dict()) + + prints:: + + ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] + [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] + - color: 'light blue' + - posn: 'upper left' + - shape: 'SQUARE' + - texture: 'burlap' + SQUARE + {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} + + See more examples at :class:`ParseResults` of accessing fields by results name. + """ + + def __init__(self, expr: ParserElement, asdict: bool = False): + super().__init__(expr) + self.saveAsList = True + self._asPythonDict = asdict + + def postParse(self, instring, loc, tokenlist): + for i, tok in enumerate(tokenlist): + if len(tok) == 0: + continue + + ikey = tok[0] + if isinstance(ikey, int): + ikey = str(ikey).strip() + + if len(tok) == 1: + tokenlist[ikey] = _ParseResultsWithOffset("", i) + + elif len(tok) == 2 and not isinstance(tok[1], ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) + + else: + try: + dictvalue = tok.copy() # ParseResults(i) + except Exception: + exc = TypeError( + "could not extract dict values from parsed results" + " - Dict expression must contain Grouped expressions" + ) + raise exc from None + + del dictvalue[0] + + if len(dictvalue) != 1 or ( + isinstance(dictvalue, ParseResults) and dictvalue.haskeys() + ): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) + + if self._asPythonDict: + return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() + else: + return [tokenlist] if self.resultsName else tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression. + + Example:: + + source = "a, b, c,d" + wd = Word(alphas) + wd_list1 = wd + (',' + wd)[...] + print(wd_list1.parse_string(source)) + + # often, delimiters that are useful during parsing are just in the + # way afterward - use Suppress to keep them out of the parsed output + wd_list2 = wd + (Suppress(',') + wd)[...] + print(wd_list2.parse_string(source)) + + # Skipped text (using '...') can be suppressed as well + source = "lead in START relevant text END trailing text" + start_marker = Keyword("START") + end_marker = Keyword("END") + find_body = Suppress(...) + start_marker + ... + end_marker + print(find_body.parse_string(source) + + prints:: + + ['a', ',', 'b', ',', 'c', ',', 'd'] + ['a', 'b', 'c', 'd'] + ['START', 'relevant text ', 'END'] + + (See also :class:`delimited_list`.) + """ + + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + if expr is ...: + expr = _PendingSkip(NoMatch()) + super().__init__(expr) + + def __add__(self, other) -> "ParserElement": + if isinstance(self.expr, _PendingSkip): + return Suppress(SkipTo(other)) + other + else: + return super().__add__(other) + + def __sub__(self, other) -> "ParserElement": + if isinstance(self.expr, _PendingSkip): + return Suppress(SkipTo(other)) - other + else: + return super().__sub__(other) + + def postParse(self, instring, loc, tokenlist): + return [] + + def suppress(self) -> ParserElement: + return self + + +def trace_parse_action(f: ParseAction) -> ParseAction: + """Decorator for debugging parse actions. + + When the parse action is called, this decorator will print + ``">> entering method-name(line:, , )"``. + When the parse action completes, the decorator will print + ``"<<"`` followed by the returned value, or any exception that the parse action raised. + + Example:: + + wd = Word(alphas) + + @trace_parse_action + def remove_duplicate_chars(tokens): + return ''.join(sorted(set(''.join(tokens)))) + + wds = wd[1, ...].set_parse_action(remove_duplicate_chars) + print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) + + prints:: + + >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) + < 3: + thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc + sys.stderr.write( + ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t) + ) + try: + ret = f(*paArgs) + except Exception as exc: + sys.stderr.write("< str: + r"""Helper to easily define string ranges for use in :class:`Word` + construction. Borrows syntax from regexp ``'[]'`` string range + definitions:: + + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + + The input string must be enclosed in []'s, and the returned string + is the expanded character set joined into a single string. The + values enclosed in the []'s may be: + + - a single character + - an escaped character with a leading backslash (such as ``\-`` + or ``\]``) + - an escaped hex character with a leading ``'\x'`` + (``\x21``, which is a ``'!'`` character) (``\0x##`` + is also supported for backwards compatibility) + - an escaped octal character with a leading ``'\0'`` + (``\041``, which is a ``'!'`` character) + - a range of any of the above, separated by a dash (``'a-z'``, + etc.) + - any combination of the above (``'aeiouy'``, + ``'a-zA-Z0-9_$'``, etc.) + """ + _expanded = ( + lambda p: p + if not isinstance(p, ParseResults) + else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) + ) + try: + return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) + except Exception: + return "" + + +def token_map(func, *args) -> ParseAction: + """Helper to define a parse action by mapping a function to all + elements of a :class:`ParseResults` list. If any additional args are passed, + they are forwarded to the given function as additional arguments + after the token, as in + ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, + which will convert the parsed data to an integer using base 16. + + Example (compare the last to example in :class:`ParserElement.transform_string`:: + + hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) + hex_ints.run_tests(''' + 00 11 22 aa FF 0a 0d 1a + ''') + + upperword = Word(alphas).set_parse_action(token_map(str.upper)) + upperword[1, ...].run_tests(''' + my kingdom for a horse + ''') + + wd = Word(alphas).set_parse_action(token_map(str.title)) + wd[1, ...].set_parse_action(' '.join).run_tests(''' + now is the winter of our discontent made glorious summer by this sun of york + ''') + + prints:: + + 00 11 22 aa FF 0a 0d 1a + [0, 17, 34, 170, 255, 10, 13, 26] + + my kingdom for a horse + ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] + + now is the winter of our discontent made glorious summer by this sun of york + ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] + """ + + def pa(s, l, t): + return [func(tokn, *args) for tokn in t] + + func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) + pa.__name__ = func_name + + return pa + + +def autoname_elements() -> None: + """ + Utility to simplify mass-naming of parser elements, for + generating railroad diagram with named subdiagrams. + """ + for name, var in sys._getframe().f_back.f_locals.items(): + if isinstance(var, ParserElement) and not var.customName: + var.set_name(name) + + +dbl_quoted_string = Combine( + Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' +).set_name("string enclosed in double quotes") + +sgl_quoted_string = Combine( + Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" +).set_name("string enclosed in single quotes") + +quoted_string = Combine( + Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' + | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" +).set_name("quotedString using single or double quotes") + +unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") + + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +# build list of built-in expressions, for future reference if a global default value +# gets updated +_builtin_exprs: List[ParserElement] = [ + v for v in vars().values() if isinstance(v, ParserElement) +] + +# backward compatibility names +tokenMap = token_map +conditionAsParseAction = condition_as_parse_action +nullDebugAction = null_debug_action +sglQuotedString = sgl_quoted_string +dblQuotedString = dbl_quoted_string +quotedString = quoted_string +unicodeString = unicode_string +lineStart = line_start +lineEnd = line_end +stringStart = string_start +stringEnd = string_end +traceParseAction = trace_parse_action diff --git a/libs/common/setuptools/_vendor/pyparsing/diagram/__init__.py b/libs/common/setuptools/_vendor/pyparsing/diagram/__init__.py new file mode 100644 index 00000000..89864475 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/diagram/__init__.py @@ -0,0 +1,642 @@ +import railroad +import pyparsing +import typing +from typing import ( + List, + NamedTuple, + Generic, + TypeVar, + Dict, + Callable, + Set, + Iterable, +) +from jinja2 import Template +from io import StringIO +import inspect + + +jinja2_template_source = """\ + + + + {% if not head %} + + {% else %} + {{ head | safe }} + {% endif %} + + +{{ body | safe }} +{% for diagram in diagrams %} +
+

{{ diagram.title }}

+
{{ diagram.text }}
+
+ {{ diagram.svg }} +
+
+{% endfor %} + + +""" + +template = Template(jinja2_template_source) + +# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet +NamedDiagram = NamedTuple( + "NamedDiagram", + [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)], +) +""" +A simple structure for associating a name with a railroad diagram +""" + +T = TypeVar("T") + + +class EachItem(railroad.Group): + """ + Custom railroad item to compose a: + - Group containing a + - OneOrMore containing a + - Choice of the elements in the Each + with the group label indicating that all must be matched + """ + + all_label = "[ALL]" + + def __init__(self, *items): + choice_item = railroad.Choice(len(items) - 1, *items) + one_or_more_item = railroad.OneOrMore(item=choice_item) + super().__init__(one_or_more_item, label=self.all_label) + + +class AnnotatedItem(railroad.Group): + """ + Simple subclass of Group that creates an annotation label + """ + + def __init__(self, label: str, item): + super().__init__(item=item, label="[{}]".format(label) if label else label) + + +class EditablePartial(Generic[T]): + """ + Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been + constructed. + """ + + # We need this here because the railroad constructors actually transform the data, so can't be called until the + # entire tree is assembled + + def __init__(self, func: Callable[..., T], args: list, kwargs: dict): + self.func = func + self.args = args + self.kwargs = kwargs + + @classmethod + def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]": + """ + If you call this function in the same way that you would call the constructor, it will store the arguments + as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3) + """ + return EditablePartial(func=func, args=list(args), kwargs=kwargs) + + @property + def name(self): + return self.kwargs["name"] + + def __call__(self) -> T: + """ + Evaluate the partial and return the result + """ + args = self.args.copy() + kwargs = self.kwargs.copy() + + # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g. + # args=['list', 'of', 'things']) + arg_spec = inspect.getfullargspec(self.func) + if arg_spec.varargs in self.kwargs: + args += kwargs.pop(arg_spec.varargs) + + return self.func(*args, **kwargs) + + +def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str: + """ + Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams + :params kwargs: kwargs to be passed in to the template + """ + data = [] + for diagram in diagrams: + if diagram.diagram is None: + continue + io = StringIO() + diagram.diagram.writeSvg(io.write) + title = diagram.name + if diagram.index == 0: + title += " (root)" + data.append({"title": title, "text": "", "svg": io.getvalue()}) + + return template.render(diagrams=data, **kwargs) + + +def resolve_partial(partial: "EditablePartial[T]") -> T: + """ + Recursively resolves a collection of Partials into whatever type they are + """ + if isinstance(partial, EditablePartial): + partial.args = resolve_partial(partial.args) + partial.kwargs = resolve_partial(partial.kwargs) + return partial() + elif isinstance(partial, list): + return [resolve_partial(x) for x in partial] + elif isinstance(partial, dict): + return {key: resolve_partial(x) for key, x in partial.items()} + else: + return partial + + +def to_railroad( + element: pyparsing.ParserElement, + diagram_kwargs: typing.Optional[dict] = None, + vertical: int = 3, + show_results_names: bool = False, + show_groups: bool = False, +) -> List[NamedDiagram]: + """ + Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram + creation if you want to access the Railroad tree before it is converted to HTML + :param element: base element of the parser being diagrammed + :param diagram_kwargs: kwargs to pass to the Diagram() constructor + :param vertical: (optional) - int - limit at which number of alternatives should be + shown vertically instead of horizontally + :param show_results_names - bool to indicate whether results name annotations should be + included in the diagram + :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled + surrounding box + """ + # Convert the whole tree underneath the root + lookup = ConverterState(diagram_kwargs=diagram_kwargs or {}) + _to_diagram_element( + element, + lookup=lookup, + parent=None, + vertical=vertical, + show_results_names=show_results_names, + show_groups=show_groups, + ) + + root_id = id(element) + # Convert the root if it hasn't been already + if root_id in lookup: + if not element.customName: + lookup[root_id].name = "" + lookup[root_id].mark_for_extraction(root_id, lookup, force=True) + + # Now that we're finished, we can convert from intermediate structures into Railroad elements + diags = list(lookup.diagrams.values()) + if len(diags) > 1: + # collapse out duplicate diags with the same name + seen = set() + deduped_diags = [] + for d in diags: + # don't extract SkipTo elements, they are uninformative as subdiagrams + if d.name == "...": + continue + if d.name is not None and d.name not in seen: + seen.add(d.name) + deduped_diags.append(d) + resolved = [resolve_partial(partial) for partial in deduped_diags] + else: + # special case - if just one diagram, always display it, even if + # it has no name + resolved = [resolve_partial(partial) for partial in diags] + return sorted(resolved, key=lambda diag: diag.index) + + +def _should_vertical( + specification: int, exprs: Iterable[pyparsing.ParserElement] +) -> bool: + """ + Returns true if we should return a vertical list of elements + """ + if specification is None: + return False + else: + return len(_visible_exprs(exprs)) >= specification + + +class ElementState: + """ + State recorded for an individual pyparsing Element + """ + + # Note: this should be a dataclass, but we have to support Python 3.5 + def __init__( + self, + element: pyparsing.ParserElement, + converted: EditablePartial, + parent: EditablePartial, + number: int, + name: str = None, + parent_index: typing.Optional[int] = None, + ): + #: The pyparsing element that this represents + self.element: pyparsing.ParserElement = element + #: The name of the element + self.name: typing.Optional[str] = name + #: The output Railroad element in an unconverted state + self.converted: EditablePartial = converted + #: The parent Railroad element, which we store so that we can extract this if it's duplicated + self.parent: EditablePartial = parent + #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram + self.number: int = number + #: The index of this inside its parent + self.parent_index: typing.Optional[int] = parent_index + #: If true, we should extract this out into a subdiagram + self.extract: bool = False + #: If true, all of this element's children have been filled out + self.complete: bool = False + + def mark_for_extraction( + self, el_id: int, state: "ConverterState", name: str = None, force: bool = False + ): + """ + Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram + :param el_id: id of the element + :param state: element/diagram state tracker + :param name: name to use for this element's text + :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the + root element when we know we're finished + """ + self.extract = True + + # Set the name + if not self.name: + if name: + # Allow forcing a custom name + self.name = name + elif self.element.customName: + self.name = self.element.customName + else: + self.name = "" + + # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children + # to be added + # Also, if this is just a string literal etc, don't bother extracting it + if force or (self.complete and _worth_extracting(self.element)): + state.extract_into_diagram(el_id) + + +class ConverterState: + """ + Stores some state that persists between recursions into the element tree + """ + + def __init__(self, diagram_kwargs: typing.Optional[dict] = None): + #: A dictionary mapping ParserElements to state relating to them + self._element_diagram_states: Dict[int, ElementState] = {} + #: A dictionary mapping ParserElement IDs to subdiagrams generated from them + self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {} + #: The index of the next unnamed element + self.unnamed_index: int = 1 + #: The index of the next element. This is used for sorting + self.index: int = 0 + #: Shared kwargs that are used to customize the construction of diagrams + self.diagram_kwargs: dict = diagram_kwargs or {} + self.extracted_diagram_names: Set[str] = set() + + def __setitem__(self, key: int, value: ElementState): + self._element_diagram_states[key] = value + + def __getitem__(self, key: int) -> ElementState: + return self._element_diagram_states[key] + + def __delitem__(self, key: int): + del self._element_diagram_states[key] + + def __contains__(self, key: int): + return key in self._element_diagram_states + + def generate_unnamed(self) -> int: + """ + Generate a number used in the name of an otherwise unnamed diagram + """ + self.unnamed_index += 1 + return self.unnamed_index + + def generate_index(self) -> int: + """ + Generate a number used to index a diagram + """ + self.index += 1 + return self.index + + def extract_into_diagram(self, el_id: int): + """ + Used when we encounter the same token twice in the same tree. When this + happens, we replace all instances of that token with a terminal, and + create a new subdiagram for the token + """ + position = self[el_id] + + # Replace the original definition of this element with a regular block + if position.parent: + ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name) + if "item" in position.parent.kwargs: + position.parent.kwargs["item"] = ret + elif "items" in position.parent.kwargs: + position.parent.kwargs["items"][position.parent_index] = ret + + # If the element we're extracting is a group, skip to its content but keep the title + if position.converted.func == railroad.Group: + content = position.converted.kwargs["item"] + else: + content = position.converted + + self.diagrams[el_id] = EditablePartial.from_call( + NamedDiagram, + name=position.name, + diagram=EditablePartial.from_call( + railroad.Diagram, content, **self.diagram_kwargs + ), + index=position.number, + ) + + del self[el_id] + + +def _worth_extracting(element: pyparsing.ParserElement) -> bool: + """ + Returns true if this element is worth having its own sub-diagram. Simply, if any of its children + themselves have children, then its complex enough to extract + """ + children = element.recurse() + return any(child.recurse() for child in children) + + +def _apply_diagram_item_enhancements(fn): + """ + decorator to ensure enhancements to a diagram item (such as results name annotations) + get applied on return from _to_diagram_element (we do this since there are several + returns in _to_diagram_element) + """ + + def _inner( + element: pyparsing.ParserElement, + parent: typing.Optional[EditablePartial], + lookup: ConverterState = None, + vertical: int = None, + index: int = 0, + name_hint: str = None, + show_results_names: bool = False, + show_groups: bool = False, + ) -> typing.Optional[EditablePartial]: + + ret = fn( + element, + parent, + lookup, + vertical, + index, + name_hint, + show_results_names, + show_groups, + ) + + # apply annotation for results name, if present + if show_results_names and ret is not None: + element_results_name = element.resultsName + if element_results_name: + # add "*" to indicate if this is a "list all results" name + element_results_name += "" if element.modalResults else "*" + ret = EditablePartial.from_call( + railroad.Group, item=ret, label=element_results_name + ) + + return ret + + return _inner + + +def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]): + non_diagramming_exprs = ( + pyparsing.ParseElementEnhance, + pyparsing.PositionToken, + pyparsing.And._ErrorStop, + ) + return [ + e + for e in exprs + if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs)) + ] + + +@_apply_diagram_item_enhancements +def _to_diagram_element( + element: pyparsing.ParserElement, + parent: typing.Optional[EditablePartial], + lookup: ConverterState = None, + vertical: int = None, + index: int = 0, + name_hint: str = None, + show_results_names: bool = False, + show_groups: bool = False, +) -> typing.Optional[EditablePartial]: + """ + Recursively converts a PyParsing Element to a railroad Element + :param lookup: The shared converter state that keeps track of useful things + :param index: The index of this element within the parent + :param parent: The parent of this element in the output tree + :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default), + it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never + do so + :param name_hint: If provided, this will override the generated name + :param show_results_names: bool flag indicating whether to add annotations for results names + :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed + :param show_groups: bool flag indicating whether to show groups using bounding box + """ + exprs = element.recurse() + name = name_hint or element.customName or element.__class__.__name__ + + # Python's id() is used to provide a unique identifier for elements + el_id = id(element) + + element_results_name = element.resultsName + + # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram + if not element.customName: + if isinstance( + element, + ( + # pyparsing.TokenConverter, + # pyparsing.Forward, + pyparsing.Located, + ), + ): + # However, if this element has a useful custom name, and its child does not, we can pass it on to the child + if exprs: + if not exprs[0].customName: + propagated_name = name + else: + propagated_name = None + + return _to_diagram_element( + element.expr, + parent=parent, + lookup=lookup, + vertical=vertical, + index=index, + name_hint=propagated_name, + show_results_names=show_results_names, + show_groups=show_groups, + ) + + # If the element isn't worth extracting, we always treat it as the first time we say it + if _worth_extracting(element): + if el_id in lookup: + # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, + # so we have to extract it into a new diagram. + looked_up = lookup[el_id] + looked_up.mark_for_extraction(el_id, lookup, name=name_hint) + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) + return ret + + elif el_id in lookup.diagrams: + # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we + # just put in a marker element that refers to the sub-diagram + ret = EditablePartial.from_call( + railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + ) + return ret + + # Recursively convert child elements + # Here we find the most relevant Railroad element for matching pyparsing Element + # We use ``items=[]`` here to hold the place for where the child elements will go once created + if isinstance(element, pyparsing.And): + # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat + # (all will have the same name, and resultsName) + if not exprs: + return None + if len(set((e.name, e.resultsName) for e in exprs)) == 1: + ret = EditablePartial.from_call( + railroad.OneOrMore, item="", repeat=str(len(exprs)) + ) + elif _should_vertical(vertical, exprs): + ret = EditablePartial.from_call(railroad.Stack, items=[]) + else: + ret = EditablePartial.from_call(railroad.Sequence, items=[]) + elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)): + if not exprs: + return None + if _should_vertical(vertical, exprs): + ret = EditablePartial.from_call(railroad.Choice, 0, items=[]) + else: + ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[]) + elif isinstance(element, pyparsing.Each): + if not exprs: + return None + ret = EditablePartial.from_call(EachItem, items=[]) + elif isinstance(element, pyparsing.NotAny): + ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="") + elif isinstance(element, pyparsing.FollowedBy): + ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="") + elif isinstance(element, pyparsing.PrecededBy): + ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="") + elif isinstance(element, pyparsing.Group): + if show_groups: + ret = EditablePartial.from_call(AnnotatedItem, label="", item="") + else: + ret = EditablePartial.from_call(railroad.Group, label="", item="") + elif isinstance(element, pyparsing.TokenConverter): + ret = EditablePartial.from_call( + AnnotatedItem, label=type(element).__name__.lower(), item="" + ) + elif isinstance(element, pyparsing.Opt): + ret = EditablePartial.from_call(railroad.Optional, item="") + elif isinstance(element, pyparsing.OneOrMore): + ret = EditablePartial.from_call(railroad.OneOrMore, item="") + elif isinstance(element, pyparsing.ZeroOrMore): + ret = EditablePartial.from_call(railroad.ZeroOrMore, item="") + elif isinstance(element, pyparsing.Group): + ret = EditablePartial.from_call( + railroad.Group, item=None, label=element_results_name + ) + elif isinstance(element, pyparsing.Empty) and not element.customName: + # Skip unnamed "Empty" elements + ret = None + elif len(exprs) > 1: + ret = EditablePartial.from_call(railroad.Sequence, items=[]) + elif len(exprs) > 0 and not element_results_name: + ret = EditablePartial.from_call(railroad.Group, item="", label=name) + else: + terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName) + ret = terminal + + if ret is None: + return + + # Indicate this element's position in the tree so we can extract it if necessary + lookup[el_id] = ElementState( + element=element, + converted=ret, + parent=parent, + parent_index=index, + number=lookup.generate_index(), + ) + if element.customName: + lookup[el_id].mark_for_extraction(el_id, lookup, element.customName) + + i = 0 + for expr in exprs: + # Add a placeholder index in case we have to extract the child before we even add it to the parent + if "items" in ret.kwargs: + ret.kwargs["items"].insert(i, None) + + item = _to_diagram_element( + expr, + parent=ret, + lookup=lookup, + vertical=vertical, + index=i, + show_results_names=show_results_names, + show_groups=show_groups, + ) + + # Some elements don't need to be shown in the diagram + if item is not None: + if "item" in ret.kwargs: + ret.kwargs["item"] = item + elif "items" in ret.kwargs: + # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal + ret.kwargs["items"][i] = item + i += 1 + elif "items" in ret.kwargs: + # If we're supposed to skip this element, remove it from the parent + del ret.kwargs["items"][i] + + # If all this items children are none, skip this item + if ret and ( + ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0) + or ("item" in ret.kwargs and ret.kwargs["item"] is None) + ): + ret = EditablePartial.from_call(railroad.Terminal, name) + + # Mark this element as "complete", ie it has all of its children + if el_id in lookup: + lookup[el_id].complete = True + + if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete: + lookup.extract_into_diagram(el_id) + if ret is not None: + ret = EditablePartial.from_call( + railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + ) + + return ret diff --git a/libs/common/setuptools/_vendor/pyparsing/exceptions.py b/libs/common/setuptools/_vendor/pyparsing/exceptions.py new file mode 100644 index 00000000..a38447bb --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/exceptions.py @@ -0,0 +1,267 @@ +# exceptions.py + +import re +import sys +import typing + +from .util import col, line, lineno, _collapse_string_to_ranges +from .unicode import pyparsing_unicode as ppu + + +class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic): + pass + + +_extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums) +_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.") + + +class ParseBaseException(Exception): + """base exception class for all parsing runtime exceptions""" + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( + self, + pstr: str, + loc: int = 0, + msg: typing.Optional[str] = None, + elem=None, + ): + self.loc = loc + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr + self.parser_element = self.parserElement = elem + self.args = (pstr, loc, msg) + + @staticmethod + def explain_exception(exc, depth=16): + """ + Method to take an exception and translate the Python internal traceback into a list + of the pyparsing expressions that caused the exception to be raised. + + Parameters: + + - exc - exception raised during parsing (need not be a ParseException, in support + of Python exceptions that might be raised in a parse action) + - depth (default=16) - number of levels back in the stack trace to list expression + and function names; if None, the full stack trace names will be listed; if 0, only + the failing input line, marker, and exception string will be shown + + Returns a multi-line string listing the ParserElements and/or function names in the + exception's stack trace. + """ + import inspect + from .core import ParserElement + + if depth is None: + depth = sys.getrecursionlimit() + ret = [] + if isinstance(exc, ParseBaseException): + ret.append(exc.line) + ret.append(" " * (exc.column - 1) + "^") + ret.append("{}: {}".format(type(exc).__name__, exc)) + + if depth > 0: + callers = inspect.getinnerframes(exc.__traceback__, context=depth) + seen = set() + for i, ff in enumerate(callers[-depth:]): + frm = ff[0] + + f_self = frm.f_locals.get("self", None) + if isinstance(f_self, ParserElement): + if frm.f_code.co_name not in ("parseImpl", "_parseNoCache"): + continue + if id(f_self) in seen: + continue + seen.add(id(f_self)) + + self_type = type(f_self) + ret.append( + "{}.{} - {}".format( + self_type.__module__, self_type.__name__, f_self + ) + ) + + elif f_self is not None: + self_type = type(f_self) + ret.append("{}.{}".format(self_type.__module__, self_type.__name__)) + + else: + code = frm.f_code + if code.co_name in ("wrapper", ""): + continue + + ret.append("{}".format(code.co_name)) + + depth -= 1 + if not depth: + break + + return "\n".join(ret) + + @classmethod + def _from_exception(cls, pe): + """ + internal factory method to simplify creating one type of ParseException + from another - avoids having __init__ signature conflicts among subclasses + """ + return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) + + @property + def line(self) -> str: + """ + Return the line of text where the exception occurred. + """ + return line(self.loc, self.pstr) + + @property + def lineno(self) -> int: + """ + Return the 1-based line number of text where the exception occurred. + """ + return lineno(self.loc, self.pstr) + + @property + def col(self) -> int: + """ + Return the 1-based column on the line of text where the exception occurred. + """ + return col(self.loc, self.pstr) + + @property + def column(self) -> int: + """ + Return the 1-based column on the line of text where the exception occurred. + """ + return col(self.loc, self.pstr) + + def __str__(self) -> str: + if self.pstr: + if self.loc >= len(self.pstr): + foundstr = ", found end of text" + else: + # pull out next word at error location + found_match = _exception_word_extractor.match(self.pstr, self.loc) + if found_match is not None: + found = found_match.group(0) + else: + found = self.pstr[self.loc : self.loc + 1] + foundstr = (", found %r" % found).replace(r"\\", "\\") + else: + foundstr = "" + return "{}{} (at char {}), (line:{}, col:{})".format( + self.msg, foundstr, self.loc, self.lineno, self.column + ) + + def __repr__(self): + return str(self) + + def mark_input_line(self, marker_string: str = None, *, markerString=">!<") -> str: + """ + Extracts the exception line from the input string, and marks + the location of the exception with a special symbol. + """ + markerString = marker_string if marker_string is not None else markerString + line_str = self.line + line_column = self.column - 1 + if markerString: + line_str = "".join( + (line_str[:line_column], markerString, line_str[line_column:]) + ) + return line_str.strip() + + def explain(self, depth=16) -> str: + """ + Method to translate the Python internal traceback into a list + of the pyparsing expressions that caused the exception to be raised. + + Parameters: + + - depth (default=16) - number of levels back in the stack trace to list expression + and function names; if None, the full stack trace names will be listed; if 0, only + the failing input line, marker, and exception string will be shown + + Returns a multi-line string listing the ParserElements and/or function names in the + exception's stack trace. + + Example:: + + expr = pp.Word(pp.nums) * 3 + try: + expr.parse_string("123 456 A789") + except pp.ParseException as pe: + print(pe.explain(depth=0)) + + prints:: + + 123 456 A789 + ^ + ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9) + + Note: the diagnostic output will include string representations of the expressions + that failed to parse. These representations will be more helpful if you use `set_name` to + give identifiable names to your expressions. Otherwise they will use the default string + forms, which may be cryptic to read. + + Note: pyparsing's default truncation of exception tracebacks may also truncate the + stack of expressions that are displayed in the ``explain`` output. To get the full listing + of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True`` + """ + return self.explain_exception(self, depth) + + markInputline = mark_input_line + + +class ParseException(ParseBaseException): + """ + Exception thrown when a parse expression doesn't match the input string + + Example:: + + try: + Word(nums).set_name("integer").parse_string("ABC") + except ParseException as pe: + print(pe) + print("column: {}".format(pe.column)) + + prints:: + + Expected integer (at char 0), (line:1, col:1) + column: 1 + + """ + + +class ParseFatalException(ParseBaseException): + """ + User-throwable exception thrown when inconsistent parse content + is found; stops all parsing immediately + """ + + +class ParseSyntaxException(ParseFatalException): + """ + Just like :class:`ParseFatalException`, but thrown internally + when an :class:`ErrorStop` ('-' operator) indicates + that parsing is to stop immediately because an unbacktrackable + syntax error has been found. + """ + + +class RecursiveGrammarException(Exception): + """ + Exception thrown by :class:`ParserElement.validate` if the + grammar could be left-recursive; parser may need to enable + left recursion using :class:`ParserElement.enable_left_recursion` + """ + + def __init__(self, parseElementList): + self.parseElementTrace = parseElementList + + def __str__(self) -> str: + return "RecursiveGrammarException: {}".format(self.parseElementTrace) diff --git a/libs/common/setuptools/_vendor/pyparsing/helpers.py b/libs/common/setuptools/_vendor/pyparsing/helpers.py new file mode 100644 index 00000000..9588b3b7 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/helpers.py @@ -0,0 +1,1088 @@ +# helpers.py +import html.entities +import re +import typing + +from . import __diag__ +from .core import * +from .util import _bslash, _flatten, _escape_regex_range_chars + + +# +# global helpers +# +def delimited_list( + expr: Union[str, ParserElement], + delim: Union[str, ParserElement] = ",", + combine: bool = False, + min: typing.Optional[int] = None, + max: typing.Optional[int] = None, + *, + allow_trailing_delim: bool = False, +) -> ParserElement: + """Helper to define a delimited list of expressions - the delimiter + defaults to ','. By default, the list elements and delimiters can + have intervening whitespace, and comments, but this can be + overridden by passing ``combine=True`` in the constructor. If + ``combine`` is set to ``True``, the matching tokens are + returned as a single token string, with the delimiters included; + otherwise, the matching tokens are returned as a list of tokens, + with the delimiters suppressed. + + If ``allow_trailing_delim`` is set to True, then the list may end with + a delimiter. + + Example:: + + delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] + delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] + """ + if isinstance(expr, str_type): + expr = ParserElement._literalStringClass(expr) + + dlName = "{expr} [{delim} {expr}]...{end}".format( + expr=str(expr.copy().streamline()), + delim=str(delim), + end=" [{}]".format(str(delim)) if allow_trailing_delim else "", + ) + + if not combine: + delim = Suppress(delim) + + if min is not None: + if min < 1: + raise ValueError("min must be greater than 0") + min -= 1 + if max is not None: + if min is not None and max <= min: + raise ValueError("max must be greater than, or equal to min") + max -= 1 + delimited_list_expr = expr + (delim + expr)[min, max] + + if allow_trailing_delim: + delimited_list_expr += Opt(delim) + + if combine: + return Combine(delimited_list_expr).set_name(dlName) + else: + return delimited_list_expr.set_name(dlName) + + +def counted_array( + expr: ParserElement, + int_expr: typing.Optional[ParserElement] = None, + *, + intExpr: typing.Optional[ParserElement] = None, +) -> ParserElement: + """Helper to define a counted list of expressions. + + This helper defines a pattern of the form:: + + integer expr expr expr... + + where the leading integer tells how many expr expressions follow. + The matched tokens returns the array of expr tokens as a list - the + leading count token is suppressed. + + If ``int_expr`` is specified, it should be a pyparsing expression + that produces an integer value. + + Example:: + + counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] + + # in this parser, the leading integer value is given in binary, + # '10' indicating that 2 values are in the array + binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) + counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] + + # if other fields must be parsed after the count but before the + # list items, give the fields results names and they will + # be preserved in the returned ParseResults: + count_with_metadata = integer + Word(alphas)("type") + typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") + result = typed_array.parse_string("3 bool True True False") + print(result.dump()) + + # prints + # ['True', 'True', 'False'] + # - items: ['True', 'True', 'False'] + # - type: 'bool' + """ + intExpr = intExpr or int_expr + array_expr = Forward() + + def count_field_parse_action(s, l, t): + nonlocal array_expr + n = t[0] + array_expr <<= (expr * n) if n else Empty() + # clear list contents, but keep any named results + del t[:] + + if intExpr is None: + intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) + else: + intExpr = intExpr.copy() + intExpr.set_name("arrayLen") + intExpr.add_parse_action(count_field_parse_action, call_during_try=True) + return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") + + +def match_previous_literal(expr: ParserElement) -> ParserElement: + """Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks for + a 'repeat' of a previous expression. For example:: + + first = Word(nums) + second = match_previous_literal(first) + match_expr = first + ":" + second + + will match ``"1:1"``, but not ``"1:2"``. Because this + matches a previous literal, will also match the leading + ``"1:1"`` in ``"1:10"``. If this is not desired, use + :class:`match_previous_expr`. Do *not* use with packrat parsing + enabled. + """ + rep = Forward() + + def copy_token_to_repeater(s, l, t): + if t: + if len(t) == 1: + rep << t[0] + else: + # flatten t tokens + tflat = _flatten(t.as_list()) + rep << And(Literal(tt) for tt in tflat) + else: + rep << Empty() + + expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) + rep.set_name("(prev) " + str(expr)) + return rep + + +def match_previous_expr(expr: ParserElement) -> ParserElement: + """Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks for + a 'repeat' of a previous expression. For example:: + + first = Word(nums) + second = match_previous_expr(first) + match_expr = first + ":" + second + + will match ``"1:1"``, but not ``"1:2"``. Because this + matches by expressions, will *not* match the leading ``"1:1"`` + in ``"1:10"``; the expressions are evaluated first, and then + compared, so ``"1"`` is compared with ``"10"``. Do *not* use + with packrat parsing enabled. + """ + rep = Forward() + e2 = expr.copy() + rep <<= e2 + + def copy_token_to_repeater(s, l, t): + matchTokens = _flatten(t.as_list()) + + def must_match_these_tokens(s, l, t): + theseTokens = _flatten(t.as_list()) + if theseTokens != matchTokens: + raise ParseException( + s, l, "Expected {}, found{}".format(matchTokens, theseTokens) + ) + + rep.set_parse_action(must_match_these_tokens, callDuringTry=True) + + expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) + rep.set_name("(prev) " + str(expr)) + return rep + + +def one_of( + strs: Union[typing.Iterable[str], str], + caseless: bool = False, + use_regex: bool = True, + as_keyword: bool = False, + *, + useRegex: bool = True, + asKeyword: bool = False, +) -> ParserElement: + """Helper to quickly define a set of alternative :class:`Literal` s, + and makes sure to do longest-first testing when there is a conflict, + regardless of the input order, but returns + a :class:`MatchFirst` for best performance. + + Parameters: + + - ``strs`` - a string of space-delimited literals, or a collection of + string literals + - ``caseless`` - treat all literals as caseless - (default= ``False``) + - ``use_regex`` - as an optimization, will + generate a :class:`Regex` object; otherwise, will generate + a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if + creating a :class:`Regex` raises an exception) - (default= ``True``) + - ``as_keyword`` - enforce :class:`Keyword`-style matching on the + generated expressions - (default= ``False``) + - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, + but will be removed in a future release + + Example:: + + comp_oper = one_of("< = > <= >= !=") + var = Word(alphas) + number = Word(nums) + term = var | number + comparison_expr = term + comp_oper + term + print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) + + prints:: + + [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] + """ + asKeyword = asKeyword or as_keyword + useRegex = useRegex and use_regex + + if ( + isinstance(caseless, str_type) + and __diag__.warn_on_multiple_string_args_to_oneof + ): + warnings.warn( + "More than one string argument passed to one_of, pass" + " choices as a list or space-delimited string", + stacklevel=2, + ) + + if caseless: + isequal = lambda a, b: a.upper() == b.upper() + masks = lambda a, b: b.upper().startswith(a.upper()) + parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral + else: + isequal = lambda a, b: a == b + masks = lambda a, b: b.startswith(a) + parseElementClass = Keyword if asKeyword else Literal + + symbols: List[str] = [] + if isinstance(strs, str_type): + symbols = strs.split() + elif isinstance(strs, Iterable): + symbols = list(strs) + else: + raise TypeError("Invalid argument to one_of, expected string or iterable") + if not symbols: + return NoMatch() + + # reorder given symbols to take care to avoid masking longer choices with shorter ones + # (but only if the given symbols are not just single characters) + if any(len(sym) > 1 for sym in symbols): + i = 0 + while i < len(symbols) - 1: + cur = symbols[i] + for j, other in enumerate(symbols[i + 1 :]): + if isequal(other, cur): + del symbols[i + j + 1] + break + elif masks(cur, other): + del symbols[i + j + 1] + symbols.insert(i, other) + break + else: + i += 1 + + if useRegex: + re_flags: int = re.IGNORECASE if caseless else 0 + + try: + if all(len(sym) == 1 for sym in symbols): + # symbols are just single characters, create range regex pattern + patt = "[{}]".format( + "".join(_escape_regex_range_chars(sym) for sym in symbols) + ) + else: + patt = "|".join(re.escape(sym) for sym in symbols) + + # wrap with \b word break markers if defining as keywords + if asKeyword: + patt = r"\b(?:{})\b".format(patt) + + ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) + + if caseless: + # add parse action to return symbols as specified, not in random + # casing as found in input string + symbol_map = {sym.lower(): sym for sym in symbols} + ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) + + return ret + + except re.error: + warnings.warn( + "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 + ) + + # last resort, just use MatchFirst + return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( + " | ".join(symbols) + ) + + +def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: + """Helper to easily and clearly define a dictionary by specifying + the respective patterns for the key and value. Takes care of + defining the :class:`Dict`, :class:`ZeroOrMore`, and + :class:`Group` tokens in the proper order. The key pattern + can include delimiting markers or punctuation, as long as they are + suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the :class:`Dict` results + can include named token fields. + + Example:: + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) + print(attr_expr[1, ...].parse_string(text).dump()) + + attr_label = label + attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) + + # similar to Dict, but simpler call format + result = dict_of(attr_label, attr_value).parse_string(text) + print(result.dump()) + print(result['shape']) + print(result.shape) # object attribute access works too + print(result.as_dict()) + + prints:: + + [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] + - color: 'light blue' + - posn: 'upper left' + - shape: 'SQUARE' + - texture: 'burlap' + SQUARE + SQUARE + {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} + """ + return Dict(OneOrMore(Group(key + value))) + + +def original_text_for( + expr: ParserElement, as_string: bool = True, *, asString: bool = True +) -> ParserElement: + """Helper to return the original, untokenized text for a given + expression. Useful to restore the parsed fields of an HTML start + tag into the raw tag text itself, or to revert separate tokens with + intervening whitespace back to the original matching input text. By + default, returns astring containing the original parsed text. + + If the optional ``as_string`` argument is passed as + ``False``, then the return value is + a :class:`ParseResults` containing any results names that + were originally matched, and a single token containing the original + matched text from the input string. So if the expression passed to + :class:`original_text_for` contains expressions with defined + results names, you must set ``as_string`` to ``False`` if you + want to preserve those results name values. + + The ``asString`` pre-PEP8 argument is retained for compatibility, + but will be removed in a future release. + + Example:: + + src = "this is test bold text normal text " + for tag in ("b", "i"): + opener, closer = make_html_tags(tag) + patt = original_text_for(opener + SkipTo(closer) + closer) + print(patt.search_string(src)[0]) + + prints:: + + [' bold text '] + ['text'] + """ + asString = asString and as_string + + locMarker = Empty().set_parse_action(lambda s, loc, t: loc) + endlocMarker = locMarker.copy() + endlocMarker.callPreparse = False + matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") + if asString: + extractText = lambda s, l, t: s[t._original_start : t._original_end] + else: + + def extractText(s, l, t): + t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] + + matchExpr.set_parse_action(extractText) + matchExpr.ignoreExprs = expr.ignoreExprs + matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) + return matchExpr + + +def ungroup(expr: ParserElement) -> ParserElement: + """Helper to undo pyparsing's default grouping of And expressions, + even if all but one are non-empty. + """ + return TokenConverter(expr).add_parse_action(lambda t: t[0]) + + +def locatedExpr(expr: ParserElement) -> ParserElement: + """ + (DEPRECATED - future code should use the Located class) + Helper to decorate a returned token with its starting and ending + locations in the input string. + + This helper adds the following results names: + + - ``locn_start`` - location where matched expression begins + - ``locn_end`` - location where matched expression ends + - ``value`` - the actual parsed results + + Be careful if the input text contains ```` characters, you + may want to call :class:`ParserElement.parseWithTabs` + + Example:: + + wd = Word(alphas) + for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): + print(match) + + prints:: + + [[0, 'ljsdf', 5]] + [[8, 'lksdjjf', 15]] + [[18, 'lkkjj', 23]] + """ + locator = Empty().set_parse_action(lambda ss, ll, tt: ll) + return Group( + locator("locn_start") + + expr("value") + + locator.copy().leaveWhitespace()("locn_end") + ) + + +def nested_expr( + opener: Union[str, ParserElement] = "(", + closer: Union[str, ParserElement] = ")", + content: typing.Optional[ParserElement] = None, + ignore_expr: ParserElement = quoted_string(), + *, + ignoreExpr: ParserElement = quoted_string(), +) -> ParserElement: + """Helper method for defining nested lists enclosed in opening and + closing delimiters (``"("`` and ``")"`` are the default). + + Parameters: + - ``opener`` - opening character for a nested list + (default= ``"("``); can also be a pyparsing expression + - ``closer`` - closing character for a nested list + (default= ``")"``); can also be a pyparsing expression + - ``content`` - expression for items within the nested lists + (default= ``None``) + - ``ignore_expr`` - expression for ignoring opening and closing delimiters + (default= :class:`quoted_string`) + - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility + but will be removed in a future release + + If an expression is not provided for the content argument, the + nested expression will capture all whitespace-delimited content + between delimiters as a list of separate values. + + Use the ``ignore_expr`` argument to define expressions that may + contain opening or closing characters that should not be treated as + opening or closing characters for nesting, such as quoted_string or + a comment expression. Specify multiple expressions using an + :class:`Or` or :class:`MatchFirst`. The default is + :class:`quoted_string`, but if no expressions are to be ignored, then + pass ``None`` for this argument. + + Example:: + + data_type = one_of("void int short long char float double") + decl_data_type = Combine(data_type + Opt(Word('*'))) + ident = Word(alphas+'_', alphanums+'_') + number = pyparsing_common.number + arg = Group(decl_data_type + ident) + LPAR, RPAR = map(Suppress, "()") + + code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) + + c_function = (decl_data_type("type") + + ident("name") + + LPAR + Opt(delimited_list(arg), [])("args") + RPAR + + code_body("body")) + c_function.ignore(c_style_comment) + + source_code = ''' + int is_odd(int x) { + return (x%2); + } + + int dec_to_hex(char hchar) { + if (hchar >= '0' && hchar <= '9') { + return (ord(hchar)-ord('0')); + } else { + return (10+ord(hchar)-ord('A')); + } + } + ''' + for func in c_function.search_string(source_code): + print("%(name)s (%(type)s) args: %(args)s" % func) + + + prints:: + + is_odd (int) args: [['int', 'x']] + dec_to_hex (int) args: [['char', 'hchar']] + """ + if ignoreExpr != ignore_expr: + ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener, str_type) and isinstance(closer, str_type): + if len(opener) == 1 and len(closer) == 1: + if ignoreExpr is not None: + content = Combine( + OneOrMore( + ~ignoreExpr + + CharsNotIn( + opener + closer + ParserElement.DEFAULT_WHITE_CHARS, + exact=1, + ) + ) + ).set_parse_action(lambda t: t[0].strip()) + else: + content = empty.copy() + CharsNotIn( + opener + closer + ParserElement.DEFAULT_WHITE_CHARS + ).set_parse_action(lambda t: t[0].strip()) + else: + if ignoreExpr is not None: + content = Combine( + OneOrMore( + ~ignoreExpr + + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).set_parse_action(lambda t: t[0].strip()) + else: + content = Combine( + OneOrMore( + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).set_parse_action(lambda t: t[0].strip()) + else: + raise ValueError( + "opening and closing arguments must be strings if no content expression is given" + ) + ret = Forward() + if ignoreExpr is not None: + ret <<= Group( + Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) + ) + else: + ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) + ret.set_name("nested %s%s expression" % (opener, closer)) + return ret + + +def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr, str_type): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas, alphanums + "_-:") + if xml: + tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) + openTag = ( + suppress_LT + + tagStr("tag") + + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + + Opt("/", default=[False])("empty").set_parse_action( + lambda s, l, t: t[0] == "/" + ) + + suppress_GT + ) + else: + tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( + printables, exclude_chars=">" + ) + openTag = ( + suppress_LT + + tagStr("tag") + + Dict( + ZeroOrMore( + Group( + tagAttrName.set_parse_action(lambda t: t[0].lower()) + + Opt(Suppress("=") + tagAttrValue) + ) + ) + ) + + Opt("/", default=[False])("empty").set_parse_action( + lambda s, l, t: t[0] == "/" + ) + + suppress_GT + ) + closeTag = Combine(Literal("", adjacent=False) + + openTag.set_name("<%s>" % resname) + # add start results name in parse action now that ungrouped names are not reported at two levels + openTag.add_parse_action( + lambda t: t.__setitem__( + "start" + "".join(resname.replace(":", " ").title().split()), t.copy() + ) + ) + closeTag = closeTag( + "end" + "".join(resname.replace(":", " ").title().split()) + ).set_name("" % resname) + openTag.tag = resname + closeTag.tag = resname + openTag.tag_body = SkipTo(closeTag()) + return openTag, closeTag + + +def make_html_tags( + tag_str: Union[str, ParserElement] +) -> Tuple[ParserElement, ParserElement]: + """Helper to construct opening and closing tag expressions for HTML, + given a tag name. Matches tags in either upper or lower case, + attributes with namespaces and with quoted or unquoted values. + + Example:: + + text = 'More info at the pyparsing wiki page' + # make_html_tags returns pyparsing expressions for the opening and + # closing tags as a 2-tuple + a, a_end = make_html_tags("A") + link_expr = a + SkipTo(a_end)("link_text") + a_end + + for link in link_expr.search_string(text): + # attributes in the tag (like "href" shown here) are + # also accessible as named results + print(link.link_text, '->', link.href) + + prints:: + + pyparsing -> https://github.com/pyparsing/pyparsing/wiki + """ + return _makeTags(tag_str, False) + + +def make_xml_tags( + tag_str: Union[str, ParserElement] +) -> Tuple[ParserElement, ParserElement]: + """Helper to construct opening and closing tag expressions for XML, + given a tag name. Matches tags only in the given upper/lower case. + + Example: similar to :class:`make_html_tags` + """ + return _makeTags(tag_str, True) + + +any_open_tag: ParserElement +any_close_tag: ParserElement +any_open_tag, any_close_tag = make_html_tags( + Word(alphas, alphanums + "_:").set_name("any tag") +) + +_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} +common_html_entity = Regex("&(?P" + "|".join(_htmlEntityMap) + ");").set_name( + "common HTML entity" +) + + +def replace_html_entity(t): + """Helper parser action to replace common HTML entities with their special characters""" + return _htmlEntityMap.get(t.entity) + + +class OpAssoc(Enum): + LEFT = 1 + RIGHT = 2 + + +InfixNotationOperatorArgType = Union[ + ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] +] +InfixNotationOperatorSpec = Union[ + Tuple[ + InfixNotationOperatorArgType, + int, + OpAssoc, + typing.Optional[ParseAction], + ], + Tuple[ + InfixNotationOperatorArgType, + int, + OpAssoc, + ], +] + + +def infix_notation( + base_expr: ParserElement, + op_list: List[InfixNotationOperatorSpec], + lpar: Union[str, ParserElement] = Suppress("("), + rpar: Union[str, ParserElement] = Suppress(")"), +) -> ParserElement: + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary + or binary, left- or right-associative. Parse actions can also be + attached to operator expressions. The generated parser will also + recognize the use of parentheses to override operator precedences + (see example below). + + Note: if you define a deep operator list, you may see performance + issues when using infix_notation. See + :class:`ParserElement.enable_packrat` for a mechanism to potentially + improve your parser performance. + + Parameters: + - ``base_expr`` - expression representing the most basic operand to + be used in the expression + - ``op_list`` - list of tuples, one for each operator precedence level + in the expression grammar; each tuple is of the form ``(op_expr, + num_operands, right_left_assoc, (optional)parse_action)``, where: + + - ``op_expr`` is the pyparsing expression for the operator; may also + be a string, which will be converted to a Literal; if ``num_operands`` + is 3, ``op_expr`` is a tuple of two expressions, for the two + operators separating the 3 terms + - ``num_operands`` is the number of terms for this operator (must be 1, + 2, or 3) + - ``right_left_assoc`` is the indicator whether the operator is right + or left associative, using the pyparsing-defined constants + ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. + - ``parse_action`` is the parse action to be associated with + expressions matching this operator expression (the parse action + tuple member may be omitted); if the parse action is passed + a tuple or list of functions, this is equivalent to calling + ``set_parse_action(*fn)`` + (:class:`ParserElement.set_parse_action`) + - ``lpar`` - expression for matching left-parentheses; if passed as a + str, then will be parsed as Suppress(lpar). If lpar is passed as + an expression (such as ``Literal('(')``), then it will be kept in + the parsed results, and grouped with them. (default= ``Suppress('(')``) + - ``rpar`` - expression for matching right-parentheses; if passed as a + str, then will be parsed as Suppress(rpar). If rpar is passed as + an expression (such as ``Literal(')')``), then it will be kept in + the parsed results, and grouped with them. (default= ``Suppress(')')``) + + Example:: + + # simple example of four-function arithmetic with ints and + # variable names + integer = pyparsing_common.signed_integer + varname = pyparsing_common.identifier + + arith_expr = infix_notation(integer | varname, + [ + ('-', 1, OpAssoc.RIGHT), + (one_of('* /'), 2, OpAssoc.LEFT), + (one_of('+ -'), 2, OpAssoc.LEFT), + ]) + + arith_expr.run_tests(''' + 5+3*6 + (5+3)*6 + -2--11 + ''', full_dump=False) + + prints:: + + 5+3*6 + [[5, '+', [3, '*', 6]]] + + (5+3)*6 + [[[5, '+', 3], '*', 6]] + + -2--11 + [[['-', 2], '-', ['-', 11]]] + """ + # captive version of FollowedBy that does not do parse actions or capture results names + class _FB(FollowedBy): + def parseImpl(self, instring, loc, doActions=True): + self.expr.try_parse(instring, loc) + return loc, [] + + _FB.__name__ = "FollowedBy>" + + ret = Forward() + if isinstance(lpar, str): + lpar = Suppress(lpar) + if isinstance(rpar, str): + rpar = Suppress(rpar) + + # if lpar and rpar are not suppressed, wrap in group + if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): + lastExpr = base_expr | Group(lpar + ret + rpar) + else: + lastExpr = base_expr | (lpar + ret + rpar) + + for i, operDef in enumerate(op_list): + opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] + if isinstance(opExpr, str_type): + opExpr = ParserElement._literalStringClass(opExpr) + if arity == 3: + if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: + raise ValueError( + "if numterms=3, opExpr must be a tuple or list of two expressions" + ) + opExpr1, opExpr2 = opExpr + term_name = "{}{} term".format(opExpr1, opExpr2) + else: + term_name = "{} term".format(opExpr) + + if not 1 <= arity <= 3: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + + if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): + raise ValueError("operator must indicate right or left associativity") + + thisExpr: Forward = Forward().set_name(term_name) + if rightLeftAssoc is OpAssoc.LEFT: + if arity == 1: + matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) + elif arity == 2: + if opExpr is not None: + matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( + lastExpr + (opExpr + lastExpr)[1, ...] + ) + else: + matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) + elif arity == 3: + matchExpr = _FB( + lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr + ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) + elif rightLeftAssoc is OpAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Opt): + opExpr = Opt(opExpr) + matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) + elif arity == 2: + if opExpr is not None: + matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( + lastExpr + (opExpr + thisExpr)[1, ...] + ) + else: + matchExpr = _FB(lastExpr + thisExpr) + Group( + lastExpr + thisExpr[1, ...] + ) + elif arity == 3: + matchExpr = _FB( + lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr + ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + if pa: + if isinstance(pa, (tuple, list)): + matchExpr.set_parse_action(*pa) + else: + matchExpr.set_parse_action(pa) + thisExpr <<= (matchExpr | lastExpr).setName(term_name) + lastExpr = thisExpr + ret <<= lastExpr + return ret + + +def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): + """ + (DEPRECATED - use IndentedBlock class instead) + Helper method for defining space-delimited indentation blocks, + such as those used to define block statements in Python source code. + + Parameters: + + - ``blockStatementExpr`` - expression defining syntax of statement that + is repeated within the indented block + - ``indentStack`` - list created by caller to manage indentation stack + (multiple ``statementWithIndentedBlock`` expressions within a single + grammar should share a common ``indentStack``) + - ``indent`` - boolean indicating whether block must be indented beyond + the current level; set to ``False`` for block of left-most statements + (default= ``True``) + + A valid block must contain at least one ``blockStatement``. + + (Note that indentedBlock uses internal parse actions which make it + incompatible with packrat parsing.) + + Example:: + + data = ''' + def A(z): + A1 + B = 100 + G = A2 + A2 + A3 + B + def BB(a,b,c): + BB1 + def BBA(): + bba1 + bba2 + bba3 + C + D + def spam(x,y): + def eggs(z): + pass + ''' + + + indentStack = [1] + stmt = Forward() + + identifier = Word(alphas, alphanums) + funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") + func_body = indentedBlock(stmt, indentStack) + funcDef = Group(funcDecl + func_body) + + rvalue = Forward() + funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") + rvalue << (funcCall | identifier | Word(nums)) + assignment = Group(identifier + "=" + rvalue) + stmt << (funcDef | assignment | identifier) + + module_body = stmt[1, ...] + + parseTree = module_body.parseString(data) + parseTree.pprint() + + prints:: + + [['def', + 'A', + ['(', 'z', ')'], + ':', + [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], + 'B', + ['def', + 'BB', + ['(', 'a', 'b', 'c', ')'], + ':', + [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], + 'C', + 'D', + ['def', + 'spam', + ['(', 'x', 'y', ')'], + ':', + [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] + """ + backup_stacks.append(indentStack[:]) + + def reset_stack(): + indentStack[:] = backup_stacks[-1] + + def checkPeerIndent(s, l, t): + if l >= len(s): + return + curCol = col(l, s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseException(s, l, "illegal nesting") + raise ParseException(s, l, "not a peer entry") + + def checkSubIndent(s, l, t): + curCol = col(l, s) + if curCol > indentStack[-1]: + indentStack.append(curCol) + else: + raise ParseException(s, l, "not a subentry") + + def checkUnindent(s, l, t): + if l >= len(s): + return + curCol = col(l, s) + if not (indentStack and curCol in indentStack): + raise ParseException(s, l, "not an unindent") + if curCol < indentStack[-1]: + indentStack.pop() + + NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) + INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") + PEER = Empty().set_parse_action(checkPeerIndent).set_name("") + UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") + if indent: + smExpr = Group( + Opt(NL) + + INDENT + + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) + + UNDENT + ) + else: + smExpr = Group( + Opt(NL) + + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) + + Opt(UNDENT) + ) + + # add a parse action to remove backup_stack from list of backups + smExpr.add_parse_action( + lambda: backup_stacks.pop(-1) and None if backup_stacks else None + ) + smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr.set_name("indented block") + + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( + "C style comment" +) +"Comment of the form ``/* ... */``" + +html_comment = Regex(r"").set_name("HTML comment") +"Comment of the form ````" + +rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") +dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") +"Comment of the form ``// ... (to end of line)``" + +cpp_style_comment = Combine( + Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment +).set_name("C++ style comment") +"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" + +java_style_comment = cpp_style_comment +"Same as :class:`cpp_style_comment`" + +python_style_comment = Regex(r"#.*").set_name("Python style comment") +"Comment of the form ``# ... (to end of line)``" + + +# build list of built-in expressions, for future reference if a global default value +# gets updated +_builtin_exprs: List[ParserElement] = [ + v for v in vars().values() if isinstance(v, ParserElement) +] + + +# pre-PEP8 compatible names +delimitedList = delimited_list +countedArray = counted_array +matchPreviousLiteral = match_previous_literal +matchPreviousExpr = match_previous_expr +oneOf = one_of +dictOf = dict_of +originalTextFor = original_text_for +nestedExpr = nested_expr +makeHTMLTags = make_html_tags +makeXMLTags = make_xml_tags +anyOpenTag, anyCloseTag = any_open_tag, any_close_tag +commonHTMLEntity = common_html_entity +replaceHTMLEntity = replace_html_entity +opAssoc = OpAssoc +infixNotation = infix_notation +cStyleComment = c_style_comment +htmlComment = html_comment +restOfLine = rest_of_line +dblSlashComment = dbl_slash_comment +cppStyleComment = cpp_style_comment +javaStyleComment = java_style_comment +pythonStyleComment = python_style_comment diff --git a/libs/common/setuptools/_vendor/pyparsing/results.py b/libs/common/setuptools/_vendor/pyparsing/results.py new file mode 100644 index 00000000..00c9421d --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/results.py @@ -0,0 +1,760 @@ +# results.py +from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator +import pprint +from weakref import ref as wkref +from typing import Tuple, Any + +str_type: Tuple[type, ...] = (str, bytes) +_generator_type = type((_ for _ in ())) + + +class _ParseResultsWithOffset: + __slots__ = ["tup"] + + def __init__(self, p1, p2): + self.tup = (p1, p2) + + def __getitem__(self, i): + return self.tup[i] + + def __getstate__(self): + return self.tup + + def __setstate__(self, *args): + self.tup = args[0] + + +class ParseResults: + """Structured parse results, to provide multiple means of access to + the parsed data: + + - as a list (``len(results)``) + - by list index (``results[0], results[1]``, etc.) + - by attribute (``results.`` - see :class:`ParserElement.set_results_name`) + + Example:: + + integer = Word(nums) + date_str = (integer.set_results_name("year") + '/' + + integer.set_results_name("month") + '/' + + integer.set_results_name("day")) + # equivalent form: + # date_str = (integer("year") + '/' + # + integer("month") + '/' + # + integer("day")) + + # parse_string returns a ParseResults object + result = date_str.parse_string("1999/12/31") + + def test(s, fn=repr): + print("{} -> {}".format(s, fn(eval(s)))) + test("list(result)") + test("result[0]") + test("result['month']") + test("result.day") + test("'month' in result") + test("'minutes' in result") + test("result.dump()", str) + + prints:: + + list(result) -> ['1999', '/', '12', '/', '31'] + result[0] -> '1999' + result['month'] -> '12' + result.day -> '31' + 'month' in result -> True + 'minutes' in result -> False + result.dump() -> ['1999', '/', '12', '/', '31'] + - day: '31' + - month: '12' + - year: '1999' + """ + + _null_values: Tuple[Any, ...] = (None, [], "", ()) + + __slots__ = [ + "_name", + "_parent", + "_all_names", + "_modal", + "_toklist", + "_tokdict", + "__weakref__", + ] + + class List(list): + """ + Simple wrapper class to distinguish parsed list results that should be preserved + as actual Python lists, instead of being converted to :class:`ParseResults`: + + LBRACK, RBRACK = map(pp.Suppress, "[]") + element = pp.Forward() + item = ppc.integer + element_list = LBRACK + pp.delimited_list(element) + RBRACK + + # add parse actions to convert from ParseResults to actual Python collection types + def as_python_list(t): + return pp.ParseResults.List(t.as_list()) + element_list.add_parse_action(as_python_list) + + element <<= item | element_list + + element.run_tests(''' + 100 + [2,3,4] + [[2, 1],3,4] + [(2, 1),3,4] + (2,3,4) + ''', post_parse=lambda s, r: (r[0], type(r[0]))) + + prints: + + 100 + (100, ) + + [2,3,4] + ([2, 3, 4], ) + + [[2, 1],3,4] + ([[2, 1], 3, 4], ) + + (Used internally by :class:`Group` when `aslist=True`.) + """ + + def __new__(cls, contained=None): + if contained is None: + contained = [] + + if not isinstance(contained, list): + raise TypeError( + "{} may only be constructed with a list," + " not {}".format(cls.__name__, type(contained).__name__) + ) + + return list.__new__(cls) + + def __new__(cls, toklist=None, name=None, **kwargs): + if isinstance(toklist, ParseResults): + return toklist + self = object.__new__(cls) + self._name = None + self._parent = None + self._all_names = set() + + if toklist is None: + self._toklist = [] + elif isinstance(toklist, (list, _generator_type)): + self._toklist = ( + [toklist[:]] + if isinstance(toklist, ParseResults.List) + else list(toklist) + ) + else: + self._toklist = [toklist] + self._tokdict = dict() + return self + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( + self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance + ): + self._modal = modal + if name is not None and name != "": + if isinstance(name, int): + name = str(name) + if not modal: + self._all_names = {name} + self._name = name + if toklist not in self._null_values: + if isinstance(toklist, (str_type, type)): + toklist = [toklist] + if asList: + if isinstance(toklist, ParseResults): + self[name] = _ParseResultsWithOffset( + ParseResults(toklist._toklist), 0 + ) + else: + self[name] = _ParseResultsWithOffset( + ParseResults(toklist[0]), 0 + ) + self[name]._name = name + else: + try: + self[name] = toklist[0] + except (KeyError, TypeError, IndexError): + if toklist is not self: + self[name] = toklist + else: + self._name = name + + def __getitem__(self, i): + if isinstance(i, (int, slice)): + return self._toklist[i] + else: + if i not in self._all_names: + return self._tokdict[i][-1][0] + else: + return ParseResults([v[0] for v in self._tokdict[i]]) + + def __setitem__(self, k, v, isinstance=isinstance): + if isinstance(v, _ParseResultsWithOffset): + self._tokdict[k] = self._tokdict.get(k, list()) + [v] + sub = v[0] + elif isinstance(k, (int, slice)): + self._toklist[k] = v + sub = v + else: + self._tokdict[k] = self._tokdict.get(k, list()) + [ + _ParseResultsWithOffset(v, 0) + ] + sub = v + if isinstance(sub, ParseResults): + sub._parent = wkref(self) + + def __delitem__(self, i): + if isinstance(i, (int, slice)): + mylen = len(self._toklist) + del self._toklist[i] + + # convert int to slice + if isinstance(i, int): + if i < 0: + i += mylen + i = slice(i, i + 1) + # get removed indices + removed = list(range(*i.indices(mylen))) + removed.reverse() + # fixup indices in token dictionary + for name, occurrences in self._tokdict.items(): + for j in removed: + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset( + value, position - (position > j) + ) + else: + del self._tokdict[i] + + def __contains__(self, k) -> bool: + return k in self._tokdict + + def __len__(self) -> int: + return len(self._toklist) + + def __bool__(self) -> bool: + return not not (self._toklist or self._tokdict) + + def __iter__(self) -> Iterator: + return iter(self._toklist) + + def __reversed__(self) -> Iterator: + return iter(self._toklist[::-1]) + + def keys(self): + return iter(self._tokdict) + + def values(self): + return (self[k] for k in self.keys()) + + def items(self): + return ((k, self[k]) for k in self.keys()) + + def haskeys(self) -> bool: + """ + Since ``keys()`` returns an iterator, this method is helpful in bypassing + code that looks for the existence of any defined results names.""" + return bool(self._tokdict) + + def pop(self, *args, **kwargs): + """ + Removes and returns item at specified index (default= ``last``). + Supports both ``list`` and ``dict`` semantics for ``pop()``. If + passed no argument or an integer argument, it will use ``list`` + semantics and pop tokens from the list of parsed tokens. If passed + a non-integer argument (most likely a string), it will use ``dict`` + semantics and pop the corresponding value from any defined results + names. A second default return value argument is supported, just as in + ``dict.pop()``. + + Example:: + + numlist = Word(nums)[...] + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] + + def remove_first(tokens): + tokens.pop(0) + numlist.add_parse_action(remove_first) + print(numlist.parse_string("0 123 321")) # -> ['123', '321'] + + label = Word(alphas) + patt = label("LABEL") + Word(nums)[1, ...] + print(patt.parse_string("AAB 123 321").dump()) + + # Use pop() in a parse action to remove named result (note that corresponding value is not + # removed from list form of results) + def remove_LABEL(tokens): + tokens.pop("LABEL") + return tokens + patt.add_parse_action(remove_LABEL) + print(patt.parse_string("AAB 123 321").dump()) + + prints:: + + ['AAB', '123', '321'] + - LABEL: 'AAB' + + ['AAB', '123', '321'] + """ + if not args: + args = [-1] + for k, v in kwargs.items(): + if k == "default": + args = (args[0], v) + else: + raise TypeError( + "pop() got an unexpected keyword argument {!r}".format(k) + ) + if isinstance(args[0], int) or len(args) == 1 or args[0] in self: + index = args[0] + ret = self[index] + del self[index] + return ret + else: + defaultvalue = args[1] + return defaultvalue + + def get(self, key, default_value=None): + """ + Returns named result matching the given key, or if there is no + such name, then returns the given ``default_value`` or ``None`` if no + ``default_value`` is specified. + + Similar to ``dict.get()``. + + Example:: + + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parse_string("1999/12/31") + print(result.get("year")) # -> '1999' + print(result.get("hour", "not specified")) # -> 'not specified' + print(result.get("hour")) # -> None + """ + if key in self: + return self[key] + else: + return default_value + + def insert(self, index, ins_string): + """ + Inserts new element at location index in the list of parsed tokens. + + Similar to ``list.insert()``. + + Example:: + + numlist = Word(nums)[...] + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] + + # use a parse action to insert the parse location in the front of the parsed results + def insert_locn(locn, tokens): + tokens.insert(0, locn) + numlist.add_parse_action(insert_locn) + print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321'] + """ + self._toklist.insert(index, ins_string) + # fixup indices in token dictionary + for name, occurrences in self._tokdict.items(): + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset( + value, position + (position > index) + ) + + def append(self, item): + """ + Add single element to end of ``ParseResults`` list of elements. + + Example:: + + numlist = Word(nums)[...] + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] + + # use a parse action to compute the sum of the parsed integers, and add it to the end + def append_sum(tokens): + tokens.append(sum(map(int, tokens))) + numlist.add_parse_action(append_sum) + print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444] + """ + self._toklist.append(item) + + def extend(self, itemseq): + """ + Add sequence of elements to end of ``ParseResults`` list of elements. + + Example:: + + patt = Word(alphas)[1, ...] + + # use a parse action to append the reverse of the matched strings, to make a palindrome + def make_palindrome(tokens): + tokens.extend(reversed([t[::-1] for t in tokens])) + return ''.join(tokens) + patt.add_parse_action(make_palindrome) + print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' + """ + if isinstance(itemseq, ParseResults): + self.__iadd__(itemseq) + else: + self._toklist.extend(itemseq) + + def clear(self): + """ + Clear all elements and results names. + """ + del self._toklist[:] + self._tokdict.clear() + + def __getattr__(self, name): + try: + return self[name] + except KeyError: + if name.startswith("__"): + raise AttributeError(name) + return "" + + def __add__(self, other) -> "ParseResults": + ret = self.copy() + ret += other + return ret + + def __iadd__(self, other) -> "ParseResults": + if other._tokdict: + offset = len(self._toklist) + addoffset = lambda a: offset if a < 0 else a + offset + otheritems = other._tokdict.items() + otherdictitems = [ + (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) + for k, vlist in otheritems + for v in vlist + ] + for k, v in otherdictitems: + self[k] = v + if isinstance(v[0], ParseResults): + v[0]._parent = wkref(self) + + self._toklist += other._toklist + self._all_names |= other._all_names + return self + + def __radd__(self, other) -> "ParseResults": + if isinstance(other, int) and other == 0: + # useful for merging many ParseResults using sum() builtin + return self.copy() + else: + # this may raise a TypeError - so be it + return other + self + + def __repr__(self) -> str: + return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict()) + + def __str__(self) -> str: + return ( + "[" + + ", ".join( + [ + str(i) if isinstance(i, ParseResults) else repr(i) + for i in self._toklist + ] + ) + + "]" + ) + + def _asStringList(self, sep=""): + out = [] + for item in self._toklist: + if out and sep: + out.append(sep) + if isinstance(item, ParseResults): + out += item._asStringList() + else: + out.append(str(item)) + return out + + def as_list(self) -> list: + """ + Returns the parse results as a nested list of matching tokens, all converted to strings. + + Example:: + + patt = Word(alphas)[1, ...] + result = patt.parse_string("sldkj lsdkj sldkj") + # even though the result prints in string-like form, it is actually a pyparsing ParseResults + print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] + + # Use as_list() to create an actual list + result_list = result.as_list() + print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] + """ + return [ + res.as_list() if isinstance(res, ParseResults) else res + for res in self._toklist + ] + + def as_dict(self) -> dict: + """ + Returns the named parse results as a nested dictionary. + + Example:: + + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parse_string('12/31/1999') + print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) + + result_dict = result.as_dict() + print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} + + # even though a ParseResults supports dict-like access, sometime you just need to have a dict + import json + print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable + print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"} + """ + + def to_item(obj): + if isinstance(obj, ParseResults): + return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj] + else: + return obj + + return dict((k, to_item(v)) for k, v in self.items()) + + def copy(self) -> "ParseResults": + """ + Returns a new copy of a :class:`ParseResults` object. + """ + ret = ParseResults(self._toklist) + ret._tokdict = self._tokdict.copy() + ret._parent = self._parent + ret._all_names |= self._all_names + ret._name = self._name + return ret + + def get_name(self): + r""" + Returns the results name for this token expression. Useful when several + different expressions might match at a particular location. + + Example:: + + integer = Word(nums) + ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") + house_number_expr = Suppress('#') + Word(nums, alphanums) + user_data = (Group(house_number_expr)("house_number") + | Group(ssn_expr)("ssn") + | Group(integer)("age")) + user_info = user_data[1, ...] + + result = user_info.parse_string("22 111-22-3333 #221B") + for item in result: + print(item.get_name(), ':', item[0]) + + prints:: + + age : 22 + ssn : 111-22-3333 + house_number : 221B + """ + if self._name: + return self._name + elif self._parent: + par = self._parent() + + def find_in_parent(sub): + return next( + ( + k + for k, vlist in par._tokdict.items() + for v, loc in vlist + if sub is v + ), + None, + ) + + return find_in_parent(self) if par else None + elif ( + len(self) == 1 + and len(self._tokdict) == 1 + and next(iter(self._tokdict.values()))[0][1] in (0, -1) + ): + return next(iter(self._tokdict.keys())) + else: + return None + + def dump(self, indent="", full=True, include_list=True, _depth=0) -> str: + """ + Diagnostic method for listing out the contents of + a :class:`ParseResults`. Accepts an optional ``indent`` argument so + that this string can be embedded in a nested display of other data. + + Example:: + + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parse_string('1999/12/31') + print(result.dump()) + + prints:: + + ['1999', '/', '12', '/', '31'] + - day: '31' + - month: '12' + - year: '1999' + """ + out = [] + NL = "\n" + out.append(indent + str(self.as_list()) if include_list else "") + + if full: + if self.haskeys(): + items = sorted((str(k), v) for k, v in self.items()) + for k, v in items: + if out: + out.append(NL) + out.append("{}{}- {}: ".format(indent, (" " * _depth), k)) + if isinstance(v, ParseResults): + if v: + out.append( + v.dump( + indent=indent, + full=full, + include_list=include_list, + _depth=_depth + 1, + ) + ) + else: + out.append(str(v)) + else: + out.append(repr(v)) + if any(isinstance(vv, ParseResults) for vv in self): + v = self + for i, vv in enumerate(v): + if isinstance(vv, ParseResults): + out.append( + "\n{}{}[{}]:\n{}{}{}".format( + indent, + (" " * (_depth)), + i, + indent, + (" " * (_depth + 1)), + vv.dump( + indent=indent, + full=full, + include_list=include_list, + _depth=_depth + 1, + ), + ) + ) + else: + out.append( + "\n%s%s[%d]:\n%s%s%s" + % ( + indent, + (" " * (_depth)), + i, + indent, + (" " * (_depth + 1)), + str(vv), + ) + ) + + return "".join(out) + + def pprint(self, *args, **kwargs): + """ + Pretty-printer for parsed results as a list, using the + `pprint `_ module. + Accepts additional positional or keyword args as defined for + `pprint.pprint `_ . + + Example:: + + ident = Word(alphas, alphanums) + num = Word(nums) + func = Forward() + term = ident | num | Group('(' + func + ')') + func <<= ident + Group(Optional(delimited_list(term))) + result = func.parse_string("fna a,b,(fnb c,d,200),100") + result.pprint(width=40) + + prints:: + + ['fna', + ['a', + 'b', + ['(', 'fnb', ['c', 'd', '200'], ')'], + '100']] + """ + pprint.pprint(self.as_list(), *args, **kwargs) + + # add support for pickle protocol + def __getstate__(self): + return ( + self._toklist, + ( + self._tokdict.copy(), + self._parent is not None and self._parent() or None, + self._all_names, + self._name, + ), + ) + + def __setstate__(self, state): + self._toklist, (self._tokdict, par, inAccumNames, self._name) = state + self._all_names = set(inAccumNames) + if par is not None: + self._parent = wkref(par) + else: + self._parent = None + + def __getnewargs__(self): + return self._toklist, self._name + + def __dir__(self): + return dir(type(self)) + list(self.keys()) + + @classmethod + def from_dict(cls, other, name=None) -> "ParseResults": + """ + Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the + name-value relations as results names. If an optional ``name`` argument is + given, a nested ``ParseResults`` will be returned. + """ + + def is_iterable(obj): + try: + iter(obj) + except Exception: + return False + else: + return not isinstance(obj, str_type) + + ret = cls([]) + for k, v in other.items(): + if isinstance(v, Mapping): + ret += cls.from_dict(v, name=k) + else: + ret += cls([v], name=k, asList=is_iterable(v)) + if name is not None: + ret = cls([ret], name=name) + return ret + + asList = as_list + asDict = as_dict + getName = get_name + + +MutableMapping.register(ParseResults) +MutableSequence.register(ParseResults) diff --git a/libs/common/setuptools/_vendor/pyparsing/testing.py b/libs/common/setuptools/_vendor/pyparsing/testing.py new file mode 100644 index 00000000..84a0ef17 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/testing.py @@ -0,0 +1,331 @@ +# testing.py + +from contextlib import contextmanager +import typing + +from .core import ( + ParserElement, + ParseException, + Keyword, + __diag__, + __compat__, +) + + +class pyparsing_test: + """ + namespace class for classes useful in writing unit tests + """ + + class reset_pyparsing_context: + """ + Context manager to be used when writing unit tests that modify pyparsing config values: + - packrat parsing + - bounded recursion parsing + - default whitespace characters. + - default keyword characters + - literal string auto-conversion class + - __diag__ settings + + Example:: + + with reset_pyparsing_context(): + # test that literals used to construct a grammar are automatically suppressed + ParserElement.inlineLiteralsUsing(Suppress) + + term = Word(alphas) | Word(nums) + group = Group('(' + term[...] + ')') + + # assert that the '()' characters are not included in the parsed tokens + self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def']) + + # after exiting context manager, literals are converted to Literal expressions again + """ + + def __init__(self): + self._save_context = {} + + def save(self): + self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS + self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS + + self._save_context[ + "literal_string_class" + ] = ParserElement._literalStringClass + + self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace + + self._save_context["packrat_enabled"] = ParserElement._packratEnabled + if ParserElement._packratEnabled: + self._save_context[ + "packrat_cache_size" + ] = ParserElement.packrat_cache.size + else: + self._save_context["packrat_cache_size"] = None + self._save_context["packrat_parse"] = ParserElement._parse + self._save_context[ + "recursion_enabled" + ] = ParserElement._left_recursion_enabled + + self._save_context["__diag__"] = { + name: getattr(__diag__, name) for name in __diag__._all_names + } + + self._save_context["__compat__"] = { + "collect_all_And_tokens": __compat__.collect_all_And_tokens + } + + return self + + def restore(self): + # reset pyparsing global state + if ( + ParserElement.DEFAULT_WHITE_CHARS + != self._save_context["default_whitespace"] + ): + ParserElement.set_default_whitespace_chars( + self._save_context["default_whitespace"] + ) + + ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"] + + Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] + ParserElement.inlineLiteralsUsing( + self._save_context["literal_string_class"] + ) + + for name, value in self._save_context["__diag__"].items(): + (__diag__.enable if value else __diag__.disable)(name) + + ParserElement._packratEnabled = False + if self._save_context["packrat_enabled"]: + ParserElement.enable_packrat(self._save_context["packrat_cache_size"]) + else: + ParserElement._parse = self._save_context["packrat_parse"] + ParserElement._left_recursion_enabled = self._save_context[ + "recursion_enabled" + ] + + __compat__.collect_all_And_tokens = self._save_context["__compat__"] + + return self + + def copy(self): + ret = type(self)() + ret._save_context.update(self._save_context) + return ret + + def __enter__(self): + return self.save() + + def __exit__(self, *args): + self.restore() + + class TestParseResultsAsserts: + """ + A mixin class to add parse results assertion methods to normal unittest.TestCase classes. + """ + + def assertParseResultsEquals( + self, result, expected_list=None, expected_dict=None, msg=None + ): + """ + Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``, + and compare any defined results names with an optional ``expected_dict``. + """ + if expected_list is not None: + self.assertEqual(expected_list, result.as_list(), msg=msg) + if expected_dict is not None: + self.assertEqual(expected_dict, result.as_dict(), msg=msg) + + def assertParseAndCheckList( + self, expr, test_string, expected_list, msg=None, verbose=True + ): + """ + Convenience wrapper assert to test a parser element and input string, and assert that + the resulting ``ParseResults.asList()`` is equal to the ``expected_list``. + """ + result = expr.parse_string(test_string, parse_all=True) + if verbose: + print(result.dump()) + else: + print(result.as_list()) + self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) + + def assertParseAndCheckDict( + self, expr, test_string, expected_dict, msg=None, verbose=True + ): + """ + Convenience wrapper assert to test a parser element and input string, and assert that + the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``. + """ + result = expr.parse_string(test_string, parseAll=True) + if verbose: + print(result.dump()) + else: + print(result.as_list()) + self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) + + def assertRunTestResults( + self, run_tests_report, expected_parse_results=None, msg=None + ): + """ + Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of + list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped + with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``. + Finally, asserts that the overall ``runTests()`` success value is ``True``. + + :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests + :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] + """ + run_test_success, run_test_results = run_tests_report + + if expected_parse_results is not None: + merged = [ + (*rpt, expected) + for rpt, expected in zip(run_test_results, expected_parse_results) + ] + for test_string, result, expected in merged: + # expected should be a tuple containing a list and/or a dict or an exception, + # and optional failure message string + # an empty tuple will skip any result validation + fail_msg = next( + (exp for exp in expected if isinstance(exp, str)), None + ) + expected_exception = next( + ( + exp + for exp in expected + if isinstance(exp, type) and issubclass(exp, Exception) + ), + None, + ) + if expected_exception is not None: + with self.assertRaises( + expected_exception=expected_exception, msg=fail_msg or msg + ): + if isinstance(result, Exception): + raise result + else: + expected_list = next( + (exp for exp in expected if isinstance(exp, list)), None + ) + expected_dict = next( + (exp for exp in expected if isinstance(exp, dict)), None + ) + if (expected_list, expected_dict) != (None, None): + self.assertParseResultsEquals( + result, + expected_list=expected_list, + expected_dict=expected_dict, + msg=fail_msg or msg, + ) + else: + # warning here maybe? + print("no validation for {!r}".format(test_string)) + + # do this last, in case some specific test results can be reported instead + self.assertTrue( + run_test_success, msg=msg if msg is not None else "failed runTests" + ) + + @contextmanager + def assertRaisesParseException(self, exc_type=ParseException, msg=None): + with self.assertRaises(exc_type, msg=msg): + yield + + @staticmethod + def with_line_numbers( + s: str, + start_line: typing.Optional[int] = None, + end_line: typing.Optional[int] = None, + expand_tabs: bool = True, + eol_mark: str = "|", + mark_spaces: typing.Optional[str] = None, + mark_control: typing.Optional[str] = None, + ) -> str: + """ + Helpful method for debugging a parser - prints a string with line and column numbers. + (Line and column numbers are 1-based.) + + :param s: tuple(bool, str - string to be printed with line and column numbers + :param start_line: int - (optional) starting line number in s to print (default=1) + :param end_line: int - (optional) ending line number in s to print (default=len(s)) + :param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default + :param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|") + :param mark_spaces: str - (optional) special character to display in place of spaces + :param mark_control: str - (optional) convert non-printing control characters to a placeholding + character; valid values: + - "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊" + - any single character string - replace control characters with given string + - None (default) - string is displayed as-is + + :return: str - input string with leading line numbers and column number headers + """ + if expand_tabs: + s = s.expandtabs() + if mark_control is not None: + if mark_control == "unicode": + tbl = str.maketrans( + {c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))} + | {127: 0x2421} + ) + eol_mark = "" + else: + tbl = str.maketrans( + {c: mark_control for c in list(range(0, 32)) + [127]} + ) + s = s.translate(tbl) + if mark_spaces is not None and mark_spaces != " ": + if mark_spaces == "unicode": + tbl = str.maketrans({9: 0x2409, 32: 0x2423}) + s = s.translate(tbl) + else: + s = s.replace(" ", mark_spaces) + if start_line is None: + start_line = 1 + if end_line is None: + end_line = len(s) + end_line = min(end_line, len(s)) + start_line = min(max(1, start_line), end_line) + + if mark_control != "unicode": + s_lines = s.splitlines()[start_line - 1 : end_line] + else: + s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]] + if not s_lines: + return "" + + lineno_width = len(str(end_line)) + max_line_len = max(len(line) for line in s_lines) + lead = " " * (lineno_width + 1) + if max_line_len >= 99: + header0 = ( + lead + + "".join( + "{}{}".format(" " * 99, (i + 1) % 100) + for i in range(max(max_line_len // 100, 1)) + ) + + "\n" + ) + else: + header0 = "" + header1 = ( + header0 + + lead + + "".join( + " {}".format((i + 1) % 10) + for i in range(-(-max_line_len // 10)) + ) + + "\n" + ) + header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n" + return ( + header1 + + header2 + + "\n".join( + "{:{}d}:{}{}".format(i, lineno_width, line, eol_mark) + for i, line in enumerate(s_lines, start=start_line) + ) + + "\n" + ) diff --git a/libs/common/setuptools/_vendor/pyparsing/unicode.py b/libs/common/setuptools/_vendor/pyparsing/unicode.py new file mode 100644 index 00000000..06526203 --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/unicode.py @@ -0,0 +1,352 @@ +# unicode.py + +import sys +from itertools import filterfalse +from typing import List, Tuple, Union + + +class _lazyclassproperty: + def __init__(self, fn): + self.fn = fn + self.__doc__ = fn.__doc__ + self.__name__ = fn.__name__ + + def __get__(self, obj, cls): + if cls is None: + cls = type(obj) + if not hasattr(cls, "_intern") or any( + cls._intern is getattr(superclass, "_intern", []) + for superclass in cls.__mro__[1:] + ): + cls._intern = {} + attrname = self.fn.__name__ + if attrname not in cls._intern: + cls._intern[attrname] = self.fn(cls) + return cls._intern[attrname] + + +UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]] + + +class unicode_set: + """ + A set of Unicode characters, for language-specific strings for + ``alphas``, ``nums``, ``alphanums``, and ``printables``. + A unicode_set is defined by a list of ranges in the Unicode character + set, in a class attribute ``_ranges``. Ranges can be specified using + 2-tuples or a 1-tuple, such as:: + + _ranges = [ + (0x0020, 0x007e), + (0x00a0, 0x00ff), + (0x0100,), + ] + + Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x). + + A unicode set can also be defined using multiple inheritance of other unicode sets:: + + class CJK(Chinese, Japanese, Korean): + pass + """ + + _ranges: UnicodeRangeList = [] + + @_lazyclassproperty + def _chars_for_ranges(cls): + ret = [] + for cc in cls.__mro__: + if cc is unicode_set: + break + for rr in getattr(cc, "_ranges", ()): + ret.extend(range(rr[0], rr[-1] + 1)) + return [chr(c) for c in sorted(set(ret))] + + @_lazyclassproperty + def printables(cls): + "all non-whitespace characters in this range" + return "".join(filterfalse(str.isspace, cls._chars_for_ranges)) + + @_lazyclassproperty + def alphas(cls): + "all alphabetic characters in this range" + return "".join(filter(str.isalpha, cls._chars_for_ranges)) + + @_lazyclassproperty + def nums(cls): + "all numeric digit characters in this range" + return "".join(filter(str.isdigit, cls._chars_for_ranges)) + + @_lazyclassproperty + def alphanums(cls): + "all alphanumeric characters in this range" + return cls.alphas + cls.nums + + @_lazyclassproperty + def identchars(cls): + "all characters in this range that are valid identifier characters, plus underscore '_'" + return "".join( + sorted( + set( + "".join(filter(str.isidentifier, cls._chars_for_ranges)) + + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº" + + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" + + "_" + ) + ) + ) + + @_lazyclassproperty + def identbodychars(cls): + """ + all characters in this range that are valid identifier body characters, + plus the digits 0-9 + """ + return "".join( + sorted( + set( + cls.identchars + + "0123456789" + + "".join( + [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()] + ) + ) + ) + ) + + +class pyparsing_unicode(unicode_set): + """ + A namespace class for defining common language unicode_sets. + """ + + # fmt: off + + # define ranges in language character sets + _ranges: UnicodeRangeList = [ + (0x0020, sys.maxunicode), + ] + + class BasicMultilingualPlane(unicode_set): + "Unicode set for the Basic Multilingual Plane" + _ranges: UnicodeRangeList = [ + (0x0020, 0xFFFF), + ] + + class Latin1(unicode_set): + "Unicode set for Latin-1 Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0020, 0x007E), + (0x00A0, 0x00FF), + ] + + class LatinA(unicode_set): + "Unicode set for Latin-A Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0100, 0x017F), + ] + + class LatinB(unicode_set): + "Unicode set for Latin-B Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0180, 0x024F), + ] + + class Greek(unicode_set): + "Unicode set for Greek Unicode Character Ranges" + _ranges: UnicodeRangeList = [ + (0x0342, 0x0345), + (0x0370, 0x0377), + (0x037A, 0x037F), + (0x0384, 0x038A), + (0x038C,), + (0x038E, 0x03A1), + (0x03A3, 0x03E1), + (0x03F0, 0x03FF), + (0x1D26, 0x1D2A), + (0x1D5E,), + (0x1D60,), + (0x1D66, 0x1D6A), + (0x1F00, 0x1F15), + (0x1F18, 0x1F1D), + (0x1F20, 0x1F45), + (0x1F48, 0x1F4D), + (0x1F50, 0x1F57), + (0x1F59,), + (0x1F5B,), + (0x1F5D,), + (0x1F5F, 0x1F7D), + (0x1F80, 0x1FB4), + (0x1FB6, 0x1FC4), + (0x1FC6, 0x1FD3), + (0x1FD6, 0x1FDB), + (0x1FDD, 0x1FEF), + (0x1FF2, 0x1FF4), + (0x1FF6, 0x1FFE), + (0x2129,), + (0x2719, 0x271A), + (0xAB65,), + (0x10140, 0x1018D), + (0x101A0,), + (0x1D200, 0x1D245), + (0x1F7A1, 0x1F7A7), + ] + + class Cyrillic(unicode_set): + "Unicode set for Cyrillic Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0400, 0x052F), + (0x1C80, 0x1C88), + (0x1D2B,), + (0x1D78,), + (0x2DE0, 0x2DFF), + (0xA640, 0xA672), + (0xA674, 0xA69F), + (0xFE2E, 0xFE2F), + ] + + class Chinese(unicode_set): + "Unicode set for Chinese Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x2E80, 0x2E99), + (0x2E9B, 0x2EF3), + (0x31C0, 0x31E3), + (0x3400, 0x4DB5), + (0x4E00, 0x9FEF), + (0xA700, 0xA707), + (0xF900, 0xFA6D), + (0xFA70, 0xFAD9), + (0x16FE2, 0x16FE3), + (0x1F210, 0x1F212), + (0x1F214, 0x1F23B), + (0x1F240, 0x1F248), + (0x20000, 0x2A6D6), + (0x2A700, 0x2B734), + (0x2B740, 0x2B81D), + (0x2B820, 0x2CEA1), + (0x2CEB0, 0x2EBE0), + (0x2F800, 0x2FA1D), + ] + + class Japanese(unicode_set): + "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" + _ranges: UnicodeRangeList = [] + + class Kanji(unicode_set): + "Unicode set for Kanji Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x4E00, 0x9FBF), + (0x3000, 0x303F), + ] + + class Hiragana(unicode_set): + "Unicode set for Hiragana Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x3041, 0x3096), + (0x3099, 0x30A0), + (0x30FC,), + (0xFF70,), + (0x1B001,), + (0x1B150, 0x1B152), + (0x1F200,), + ] + + class Katakana(unicode_set): + "Unicode set for Katakana Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x3099, 0x309C), + (0x30A0, 0x30FF), + (0x31F0, 0x31FF), + (0x32D0, 0x32FE), + (0xFF65, 0xFF9F), + (0x1B000,), + (0x1B164, 0x1B167), + (0x1F201, 0x1F202), + (0x1F213,), + ] + + class Hangul(unicode_set): + "Unicode set for Hangul (Korean) Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x1100, 0x11FF), + (0x302E, 0x302F), + (0x3131, 0x318E), + (0x3200, 0x321C), + (0x3260, 0x327B), + (0x327E,), + (0xA960, 0xA97C), + (0xAC00, 0xD7A3), + (0xD7B0, 0xD7C6), + (0xD7CB, 0xD7FB), + (0xFFA0, 0xFFBE), + (0xFFC2, 0xFFC7), + (0xFFCA, 0xFFCF), + (0xFFD2, 0xFFD7), + (0xFFDA, 0xFFDC), + ] + + Korean = Hangul + + class CJK(Chinese, Japanese, Hangul): + "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" + + class Thai(unicode_set): + "Unicode set for Thai Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0E01, 0x0E3A), + (0x0E3F, 0x0E5B) + ] + + class Arabic(unicode_set): + "Unicode set for Arabic Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0600, 0x061B), + (0x061E, 0x06FF), + (0x0700, 0x077F), + ] + + class Hebrew(unicode_set): + "Unicode set for Hebrew Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0591, 0x05C7), + (0x05D0, 0x05EA), + (0x05EF, 0x05F4), + (0xFB1D, 0xFB36), + (0xFB38, 0xFB3C), + (0xFB3E,), + (0xFB40, 0xFB41), + (0xFB43, 0xFB44), + (0xFB46, 0xFB4F), + ] + + class Devanagari(unicode_set): + "Unicode set for Devanagari Unicode Character Range" + _ranges: UnicodeRangeList = [ + (0x0900, 0x097F), + (0xA8E0, 0xA8FF) + ] + + # fmt: on + + +pyparsing_unicode.Japanese._ranges = ( + pyparsing_unicode.Japanese.Kanji._ranges + + pyparsing_unicode.Japanese.Hiragana._ranges + + pyparsing_unicode.Japanese.Katakana._ranges +) + +pyparsing_unicode.BMP = pyparsing_unicode.BasicMultilingualPlane + +# add language identifiers using language Unicode +pyparsing_unicode.العربية = pyparsing_unicode.Arabic +pyparsing_unicode.中文 = pyparsing_unicode.Chinese +pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic +pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek +pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew +pyparsing_unicode.日本語 = pyparsing_unicode.Japanese +pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji +pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana +pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana +pyparsing_unicode.한국어 = pyparsing_unicode.Korean +pyparsing_unicode.ไทย = pyparsing_unicode.Thai +pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari diff --git a/libs/common/setuptools/_vendor/pyparsing/util.py b/libs/common/setuptools/_vendor/pyparsing/util.py new file mode 100644 index 00000000..34ce092c --- /dev/null +++ b/libs/common/setuptools/_vendor/pyparsing/util.py @@ -0,0 +1,235 @@ +# util.py +import warnings +import types +import collections +import itertools +from functools import lru_cache +from typing import List, Union, Iterable + +_bslash = chr(92) + + +class __config_flags: + """Internal class for defining compatibility and debugging flags""" + + _all_names: List[str] = [] + _fixed_names: List[str] = [] + _type_desc = "configuration" + + @classmethod + def _set(cls, dname, value): + if dname in cls._fixed_names: + warnings.warn( + "{}.{} {} is {} and cannot be overridden".format( + cls.__name__, + dname, + cls._type_desc, + str(getattr(cls, dname)).upper(), + ) + ) + return + if dname in cls._all_names: + setattr(cls, dname, value) + else: + raise ValueError("no such {} {!r}".format(cls._type_desc, dname)) + + enable = classmethod(lambda cls, name: cls._set(name, True)) + disable = classmethod(lambda cls, name: cls._set(name, False)) + + +@lru_cache(maxsize=128) +def col(loc: int, strg: str) -> int: + """ + Returns current column within a string, counting newlines as line separators. + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See + :class:`ParserElement.parseString` for more + information on parsing strings containing ```` s, and suggested + methods to maintain a consistent view of the parsed string, the parse + location, and line and column positions within the parsed string. + """ + s = strg + return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) + + +@lru_cache(maxsize=128) +def lineno(loc: int, strg: str) -> int: + """Returns current line number within a string, counting newlines as line separators. + The first line is number 1. + + Note - the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See :class:`ParserElement.parseString` + for more information on parsing strings containing ```` s, and + suggested methods to maintain a consistent view of the parsed string, the + parse location, and line and column positions within the parsed string. + """ + return strg.count("\n", 0, loc) + 1 + + +@lru_cache(maxsize=128) +def line(loc: int, strg: str) -> str: + """ + Returns the line of text containing loc within a string, counting newlines as line separators. + """ + last_cr = strg.rfind("\n", 0, loc) + next_cr = strg.find("\n", loc) + return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :] + + +class _UnboundedCache: + def __init__(self): + cache = {} + cache_get = cache.get + self.not_in_cache = not_in_cache = object() + + def get(_, key): + return cache_get(key, not_in_cache) + + def set_(_, key, value): + cache[key] = value + + def clear(_): + cache.clear() + + self.size = None + self.get = types.MethodType(get, self) + self.set = types.MethodType(set_, self) + self.clear = types.MethodType(clear, self) + + +class _FifoCache: + def __init__(self, size): + self.not_in_cache = not_in_cache = object() + cache = collections.OrderedDict() + cache_get = cache.get + + def get(_, key): + return cache_get(key, not_in_cache) + + def set_(_, key, value): + cache[key] = value + while len(cache) > size: + cache.popitem(last=False) + + def clear(_): + cache.clear() + + self.size = size + self.get = types.MethodType(get, self) + self.set = types.MethodType(set_, self) + self.clear = types.MethodType(clear, self) + + +class LRUMemo: + """ + A memoizing mapping that retains `capacity` deleted items + + The memo tracks retained items by their access order; once `capacity` items + are retained, the least recently used item is discarded. + """ + + def __init__(self, capacity): + self._capacity = capacity + self._active = {} + self._memory = collections.OrderedDict() + + def __getitem__(self, key): + try: + return self._active[key] + except KeyError: + self._memory.move_to_end(key) + return self._memory[key] + + def __setitem__(self, key, value): + self._memory.pop(key, None) + self._active[key] = value + + def __delitem__(self, key): + try: + value = self._active.pop(key) + except KeyError: + pass + else: + while len(self._memory) >= self._capacity: + self._memory.popitem(last=False) + self._memory[key] = value + + def clear(self): + self._active.clear() + self._memory.clear() + + +class UnboundedMemo(dict): + """ + A memoizing mapping that retains all deleted items + """ + + def __delitem__(self, key): + pass + + +def _escape_regex_range_chars(s: str) -> str: + # escape these chars: ^-[] + for c in r"\^-[]": + s = s.replace(c, _bslash + c) + s = s.replace("\n", r"\n") + s = s.replace("\t", r"\t") + return str(s) + + +def _collapse_string_to_ranges( + s: Union[str, Iterable[str]], re_escape: bool = True +) -> str: + def is_consecutive(c): + c_int = ord(c) + is_consecutive.prev, prev = c_int, is_consecutive.prev + if c_int - prev > 1: + is_consecutive.value = next(is_consecutive.counter) + return is_consecutive.value + + is_consecutive.prev = 0 + is_consecutive.counter = itertools.count() + is_consecutive.value = -1 + + def escape_re_range_char(c): + return "\\" + c if c in r"\^-][" else c + + def no_escape_re_range_char(c): + return c + + if not re_escape: + escape_re_range_char = no_escape_re_range_char + + ret = [] + s = "".join(sorted(set(s))) + if len(s) > 3: + for _, chars in itertools.groupby(s, key=is_consecutive): + first = last = next(chars) + last = collections.deque( + itertools.chain(iter([last]), chars), maxlen=1 + ).pop() + if first == last: + ret.append(escape_re_range_char(first)) + else: + sep = "" if ord(last) == ord(first) + 1 else "-" + ret.append( + "{}{}{}".format( + escape_re_range_char(first), sep, escape_re_range_char(last) + ) + ) + else: + ret = [escape_re_range_char(c) for c in s] + + return "".join(ret) + + +def _flatten(ll: list) -> list: + ret = [] + for i in ll: + if isinstance(i, list): + ret.extend(_flatten(i)) + else: + ret.append(i) + return ret diff --git a/libs/common/setuptools/_vendor/six.py b/libs/common/setuptools/_vendor/six.py deleted file mode 100644 index 190c0239..00000000 --- a/libs/common/setuptools/_vendor/six.py +++ /dev/null @@ -1,868 +0,0 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2015 Benjamin Peterson -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from __future__ import absolute_import - -import functools -import itertools -import operator -import sys -import types - -__author__ = "Benjamin Peterson " -__version__ = "1.10.0" - - -# Useful for very coarse version differentiation. -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 -PY34 = sys.version_info[0:2] >= (3, 4) - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) - else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X - - -def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc - - -def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] - - -class _LazyDescr(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, tp): - result = self._resolve() - setattr(obj, self.name, result) # Invokes __set__. - try: - # This is a bit ugly, but it avoids running this again by - # removing this descriptor. - delattr(obj.__class__, self.name) - except AttributeError: - pass - return result - - -class MovedModule(_LazyDescr): - - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old - - def _resolve(self): - return _import_module(self.mod) - - def __getattr__(self, attr): - _module = self._resolve() - value = getattr(_module, attr) - setattr(self, attr, value) - return value - - -class _LazyModule(types.ModuleType): - - def __init__(self, name): - super(_LazyModule, self).__init__(name) - self.__doc__ = self.__class__.__doc__ - - def __dir__(self): - attrs = ["__doc__", "__name__"] - attrs += [attr.name for attr in self._moved_attributes] - return attrs - - # Subclasses should override this - _moved_attributes = [] - - -class MovedAttribute(_LazyDescr): - - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name - else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr - - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) - - -class _SixMetaPathImporter(object): - - """ - A meta path importer to import six.moves and its submodules. - - This class implements a PEP302 finder and loader. It should be compatible - with Python 2.5 and all existing versions of Python3 - """ - - def __init__(self, six_module_name): - self.name = six_module_name - self.known_modules = {} - - def _add_module(self, mod, *fullnames): - for fullname in fullnames: - self.known_modules[self.name + "." + fullname] = mod - - def _get_module(self, fullname): - return self.known_modules[self.name + "." + fullname] - - def find_module(self, fullname, path=None): - if fullname in self.known_modules: - return self - return None - - def __get_module(self, fullname): - try: - return self.known_modules[fullname] - except KeyError: - raise ImportError("This loader does not know module " + fullname) - - def load_module(self, fullname): - try: - # in case of a reload - return sys.modules[fullname] - except KeyError: - pass - mod = self.__get_module(fullname) - if isinstance(mod, MovedModule): - mod = mod._resolve() - else: - mod.__loader__ = self - sys.modules[fullname] = mod - return mod - - def is_package(self, fullname): - """ - Return true, if the named module is a package. - - We need this method to get correct spec objects with - Python 3.4 (see PEP451) - """ - return hasattr(self.__get_module(fullname), "__path__") - - def get_code(self, fullname): - """Return None - - Required, if is_package is implemented""" - self.__get_module(fullname) # eventually raises ImportError - return None - get_source = get_code # same as get_code - -_importer = _SixMetaPathImporter(__name__) - - -class _MovedItems(_LazyModule): - - """Lazy loading of moved objects""" - __path__ = [] # mark as package - - -_moved_attributes = [ - MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), - MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), - MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), - MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), - MovedAttribute("intern", "__builtin__", "sys"), - MovedAttribute("map", "itertools", "builtins", "imap", "map"), - MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), - MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), - MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), - MovedAttribute("reduce", "__builtin__", "functools"), - MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), - MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("UserDict", "UserDict", "collections"), - MovedAttribute("UserList", "UserList", "collections"), - MovedAttribute("UserString", "UserString", "collections"), - MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), - MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), - MovedModule("builtins", "__builtin__"), - MovedModule("configparser", "ConfigParser"), - MovedModule("copyreg", "copy_reg"), - MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), - MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), - MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), - MovedModule("http_cookies", "Cookie", "http.cookies"), - MovedModule("html_entities", "htmlentitydefs", "html.entities"), - MovedModule("html_parser", "HTMLParser", "html.parser"), - MovedModule("http_client", "httplib", "http.client"), - MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), - MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), - MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), - MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), - MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), - MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("queue", "Queue"), - MovedModule("reprlib", "repr"), - MovedModule("socketserver", "SocketServer"), - MovedModule("_thread", "thread", "_thread"), - MovedModule("tkinter", "Tkinter"), - MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), - MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), - MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), - MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), - MovedModule("tkinter_tix", "Tix", "tkinter.tix"), - MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), - MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), - MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), - MovedModule("tkinter_colorchooser", "tkColorChooser", - "tkinter.colorchooser"), - MovedModule("tkinter_commondialog", "tkCommonDialog", - "tkinter.commondialog"), - MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), - MovedModule("tkinter_font", "tkFont", "tkinter.font"), - MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), - MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", - "tkinter.simpledialog"), - MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), - MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), - MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), - MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), - MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), - MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), -] -# Add windows specific modules. -if sys.platform == "win32": - _moved_attributes += [ - MovedModule("winreg", "_winreg"), - ] - -for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) - if isinstance(attr, MovedModule): - _importer._add_module(attr, "moves." + attr.name) -del attr - -_MovedItems._moved_attributes = _moved_attributes - -moves = _MovedItems(__name__ + ".moves") -_importer._add_module(moves, "moves") - - -class Module_six_moves_urllib_parse(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_parse""" - - -_urllib_parse_moved_attributes = [ - MovedAttribute("ParseResult", "urlparse", "urllib.parse"), - MovedAttribute("SplitResult", "urlparse", "urllib.parse"), - MovedAttribute("parse_qs", "urlparse", "urllib.parse"), - MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), - MovedAttribute("urldefrag", "urlparse", "urllib.parse"), - MovedAttribute("urljoin", "urlparse", "urllib.parse"), - MovedAttribute("urlparse", "urlparse", "urllib.parse"), - MovedAttribute("urlsplit", "urlparse", "urllib.parse"), - MovedAttribute("urlunparse", "urlparse", "urllib.parse"), - MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), - MovedAttribute("quote", "urllib", "urllib.parse"), - MovedAttribute("quote_plus", "urllib", "urllib.parse"), - MovedAttribute("unquote", "urllib", "urllib.parse"), - MovedAttribute("unquote_plus", "urllib", "urllib.parse"), - MovedAttribute("urlencode", "urllib", "urllib.parse"), - MovedAttribute("splitquery", "urllib", "urllib.parse"), - MovedAttribute("splittag", "urllib", "urllib.parse"), - MovedAttribute("splituser", "urllib", "urllib.parse"), - MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), - MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), - MovedAttribute("uses_params", "urlparse", "urllib.parse"), - MovedAttribute("uses_query", "urlparse", "urllib.parse"), - MovedAttribute("uses_relative", "urlparse", "urllib.parse"), -] -for attr in _urllib_parse_moved_attributes: - setattr(Module_six_moves_urllib_parse, attr.name, attr) -del attr - -Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes - -_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), - "moves.urllib_parse", "moves.urllib.parse") - - -class Module_six_moves_urllib_error(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_error""" - - -_urllib_error_moved_attributes = [ - MovedAttribute("URLError", "urllib2", "urllib.error"), - MovedAttribute("HTTPError", "urllib2", "urllib.error"), - MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), -] -for attr in _urllib_error_moved_attributes: - setattr(Module_six_moves_urllib_error, attr.name, attr) -del attr - -Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes - -_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), - "moves.urllib_error", "moves.urllib.error") - - -class Module_six_moves_urllib_request(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_request""" - - -_urllib_request_moved_attributes = [ - MovedAttribute("urlopen", "urllib2", "urllib.request"), - MovedAttribute("install_opener", "urllib2", "urllib.request"), - MovedAttribute("build_opener", "urllib2", "urllib.request"), - MovedAttribute("pathname2url", "urllib", "urllib.request"), - MovedAttribute("url2pathname", "urllib", "urllib.request"), - MovedAttribute("getproxies", "urllib", "urllib.request"), - MovedAttribute("Request", "urllib2", "urllib.request"), - MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), - MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), - MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), - MovedAttribute("BaseHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), - MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), - MovedAttribute("FileHandler", "urllib2", "urllib.request"), - MovedAttribute("FTPHandler", "urllib2", "urllib.request"), - MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), - MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), - MovedAttribute("urlretrieve", "urllib", "urllib.request"), - MovedAttribute("urlcleanup", "urllib", "urllib.request"), - MovedAttribute("URLopener", "urllib", "urllib.request"), - MovedAttribute("FancyURLopener", "urllib", "urllib.request"), - MovedAttribute("proxy_bypass", "urllib", "urllib.request"), -] -for attr in _urllib_request_moved_attributes: - setattr(Module_six_moves_urllib_request, attr.name, attr) -del attr - -Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes - -_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), - "moves.urllib_request", "moves.urllib.request") - - -class Module_six_moves_urllib_response(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_response""" - - -_urllib_response_moved_attributes = [ - MovedAttribute("addbase", "urllib", "urllib.response"), - MovedAttribute("addclosehook", "urllib", "urllib.response"), - MovedAttribute("addinfo", "urllib", "urllib.response"), - MovedAttribute("addinfourl", "urllib", "urllib.response"), -] -for attr in _urllib_response_moved_attributes: - setattr(Module_six_moves_urllib_response, attr.name, attr) -del attr - -Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes - -_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), - "moves.urllib_response", "moves.urllib.response") - - -class Module_six_moves_urllib_robotparser(_LazyModule): - - """Lazy loading of moved objects in six.moves.urllib_robotparser""" - - -_urllib_robotparser_moved_attributes = [ - MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), -] -for attr in _urllib_robotparser_moved_attributes: - setattr(Module_six_moves_urllib_robotparser, attr.name, attr) -del attr - -Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes - -_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), - "moves.urllib_robotparser", "moves.urllib.robotparser") - - -class Module_six_moves_urllib(types.ModuleType): - - """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" - __path__ = [] # mark as package - parse = _importer._get_module("moves.urllib_parse") - error = _importer._get_module("moves.urllib_error") - request = _importer._get_module("moves.urllib_request") - response = _importer._get_module("moves.urllib_response") - robotparser = _importer._get_module("moves.urllib_robotparser") - - def __dir__(self): - return ['parse', 'error', 'request', 'response', 'robotparser'] - -_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), - "moves.urllib") - - -def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) - - -def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: - try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name,)) - - -if PY3: - _meth_func = "__func__" - _meth_self = "__self__" - - _func_closure = "__closure__" - _func_code = "__code__" - _func_defaults = "__defaults__" - _func_globals = "__globals__" -else: - _meth_func = "im_func" - _meth_self = "im_self" - - _func_closure = "func_closure" - _func_code = "func_code" - _func_defaults = "func_defaults" - _func_globals = "func_globals" - - -try: - advance_iterator = next -except NameError: - def advance_iterator(it): - return it.next() -next = advance_iterator - - -try: - callable = callable -except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - - -if PY3: - def get_unbound_function(unbound): - return unbound - - create_bound_method = types.MethodType - - def create_unbound_method(func, cls): - return func - - Iterator = object -else: - def get_unbound_function(unbound): - return unbound.im_func - - def create_bound_method(func, obj): - return types.MethodType(func, obj, obj.__class__) - - def create_unbound_method(func, cls): - return types.MethodType(func, None, cls) - - class Iterator(object): - - def next(self): - return type(self).__next__(self) - - callable = callable -_add_doc(get_unbound_function, - """Get the function out of a possibly unbound function""") - - -get_method_function = operator.attrgetter(_meth_func) -get_method_self = operator.attrgetter(_meth_self) -get_function_closure = operator.attrgetter(_func_closure) -get_function_code = operator.attrgetter(_func_code) -get_function_defaults = operator.attrgetter(_func_defaults) -get_function_globals = operator.attrgetter(_func_globals) - - -if PY3: - def iterkeys(d, **kw): - return iter(d.keys(**kw)) - - def itervalues(d, **kw): - return iter(d.values(**kw)) - - def iteritems(d, **kw): - return iter(d.items(**kw)) - - def iterlists(d, **kw): - return iter(d.lists(**kw)) - - viewkeys = operator.methodcaller("keys") - - viewvalues = operator.methodcaller("values") - - viewitems = operator.methodcaller("items") -else: - def iterkeys(d, **kw): - return d.iterkeys(**kw) - - def itervalues(d, **kw): - return d.itervalues(**kw) - - def iteritems(d, **kw): - return d.iteritems(**kw) - - def iterlists(d, **kw): - return d.iterlists(**kw) - - viewkeys = operator.methodcaller("viewkeys") - - viewvalues = operator.methodcaller("viewvalues") - - viewitems = operator.methodcaller("viewitems") - -_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") -_add_doc(itervalues, "Return an iterator over the values of a dictionary.") -_add_doc(iteritems, - "Return an iterator over the (key, value) pairs of a dictionary.") -_add_doc(iterlists, - "Return an iterator over the (key, [values]) pairs of a dictionary.") - - -if PY3: - def b(s): - return s.encode("latin-1") - - def u(s): - return s - unichr = chr - import struct - int2byte = struct.Struct(">B").pack - del struct - byte2int = operator.itemgetter(0) - indexbytes = operator.getitem - iterbytes = iter - import io - StringIO = io.StringIO - BytesIO = io.BytesIO - _assertCountEqual = "assertCountEqual" - if sys.version_info[1] <= 1: - _assertRaisesRegex = "assertRaisesRegexp" - _assertRegex = "assertRegexpMatches" - else: - _assertRaisesRegex = "assertRaisesRegex" - _assertRegex = "assertRegex" -else: - def b(s): - return s - # Workaround for standalone backslash - - def u(s): - return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") - unichr = unichr - int2byte = chr - - def byte2int(bs): - return ord(bs[0]) - - def indexbytes(buf, i): - return ord(buf[i]) - iterbytes = functools.partial(itertools.imap, ord) - import StringIO - StringIO = BytesIO = StringIO.StringIO - _assertCountEqual = "assertItemsEqual" - _assertRaisesRegex = "assertRaisesRegexp" - _assertRegex = "assertRegexpMatches" -_add_doc(b, """Byte literal""") -_add_doc(u, """Text literal""") - - -def assertCountEqual(self, *args, **kwargs): - return getattr(self, _assertCountEqual)(*args, **kwargs) - - -def assertRaisesRegex(self, *args, **kwargs): - return getattr(self, _assertRaisesRegex)(*args, **kwargs) - - -def assertRegex(self, *args, **kwargs): - return getattr(self, _assertRegex)(*args, **kwargs) - - -if PY3: - exec_ = getattr(moves.builtins, "exec") - - def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - -else: - def exec_(_code_, _globs_=None, _locs_=None): - """Execute code in a namespace.""" - if _globs_ is None: - frame = sys._getframe(1) - _globs_ = frame.f_globals - if _locs_ is None: - _locs_ = frame.f_locals - del frame - elif _locs_ is None: - _locs_ = _globs_ - exec("""exec _code_ in _globs_, _locs_""") - - exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb -""") - - -if sys.version_info[:2] == (3, 2): - exec_("""def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value -""") -elif sys.version_info[:2] > (3, 2): - exec_("""def raise_from(value, from_value): - raise value from from_value -""") -else: - def raise_from(value, from_value): - raise value - - -print_ = getattr(moves.builtins, "print", None) -if print_ is None: - def print_(*args, **kwargs): - """The new-style print function for Python 2.4 and 2.5.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return - - def write(data): - if not isinstance(data, basestring): - data = str(data) - # If the file has an encoding, encode unicode with it. - if (isinstance(fp, file) and - isinstance(data, unicode) and - fp.encoding is not None): - errors = getattr(fp, "errors", None) - if errors is None: - errors = "strict" - data = data.encode(fp.encoding, errors) - fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) -if sys.version_info[:2] < (3, 3): - _print = print_ - - def print_(*args, **kwargs): - fp = kwargs.get("file", sys.stdout) - flush = kwargs.pop("flush", False) - _print(*args, **kwargs) - if flush and fp is not None: - fp.flush() - -_add_doc(reraise, """Reraise an exception.""") - -if sys.version_info[0:2] < (3, 4): - def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, - updated=functools.WRAPPER_UPDATES): - def wrapper(f): - f = functools.wraps(wrapped, assigned, updated)(f) - f.__wrapped__ = wrapped - return f - return wrapper -else: - wraps = functools.wraps - - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): - - def __new__(cls, name, this_bases, d): - return meta(name, bases, d) - return type.__new__(metaclass, 'temporary_class', (), {}) - - -def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - slots = orig_vars.get('__slots__') - if slots is not None: - if isinstance(slots, str): - slots = [slots] - for slots_var in slots: - orig_vars.pop(slots_var) - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper - - -def python_2_unicode_compatible(klass): - """ - A decorator that defines __unicode__ and __str__ methods under Python 2. - Under Python 3 it does nothing. - - To support Python 2 and 3 with a single code base, define a __str__ method - returning text and apply this decorator to the class. - """ - if PY2: - if '__str__' not in klass.__dict__: - raise ValueError("@python_2_unicode_compatible cannot be applied " - "to %s because it doesn't define __str__()." % - klass.__name__) - klass.__unicode__ = klass.__str__ - klass.__str__ = lambda self: self.__unicode__().encode('utf-8') - return klass - - -# Complete the moves implementation. -# This code is at the end of this module to speed up module loading. -# Turn this module into a package. -__path__ = [] # required for PEP 302 and PEP 451 -__package__ = __name__ # see PEP 366 @ReservedAssignment -if globals().get("__spec__") is not None: - __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable -# Remove other six meta path importers, since they cause problems. This can -# happen if six is removed from sys.modules and then reloaded. (Setuptools does -# this for some reason.) -if sys.meta_path: - for i, importer in enumerate(sys.meta_path): - # Here's some real nastiness: Another "instance" of the six module might - # be floating around. Therefore, we can't use isinstance() to check for - # the six meta path importer, since the other six instance will have - # inserted an importer with different class. - if (type(importer).__name__ == "_SixMetaPathImporter" and - importer.name == __name__): - del sys.meta_path[i] - break - del i, importer -# Finally, add the importer to the meta path import hook. -sys.meta_path.append(_importer) diff --git a/libs/common/setuptools/_vendor/tomli/__init__.py b/libs/common/setuptools/_vendor/tomli/__init__.py new file mode 100644 index 00000000..4c6ec97e --- /dev/null +++ b/libs/common/setuptools/_vendor/tomli/__init__.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +__all__ = ("loads", "load", "TOMLDecodeError") +__version__ = "2.0.1" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT + +from ._parser import TOMLDecodeError, load, loads + +# Pretend this exception was created here. +TOMLDecodeError.__module__ = __name__ diff --git a/libs/common/setuptools/_vendor/tomli/_parser.py b/libs/common/setuptools/_vendor/tomli/_parser.py new file mode 100644 index 00000000..f1bb0aa1 --- /dev/null +++ b/libs/common/setuptools/_vendor/tomli/_parser.py @@ -0,0 +1,691 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +from __future__ import annotations + +from collections.abc import Iterable +import string +from types import MappingProxyType +from typing import Any, BinaryIO, NamedTuple + +from ._re import ( + RE_DATETIME, + RE_LOCALTIME, + RE_NUMBER, + match_to_datetime, + match_to_localtime, + match_to_number, +) +from ._types import Key, ParseFloat, Pos + +ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) + +# Neither of these sets include quotation mark or backslash. They are +# currently handled as separate cases in the parser functions. +ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t") +ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n") + +ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS +ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS + +ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS + +TOML_WS = frozenset(" \t") +TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n") +BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_") +KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'") +HEXDIGIT_CHARS = frozenset(string.hexdigits) + +BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType( + { + "\\b": "\u0008", # backspace + "\\t": "\u0009", # tab + "\\n": "\u000A", # linefeed + "\\f": "\u000C", # form feed + "\\r": "\u000D", # carriage return + '\\"': "\u0022", # quote + "\\\\": "\u005C", # backslash + } +) + + +class TOMLDecodeError(ValueError): + """An error raised if a document is not valid TOML.""" + + +def load(__fp: BinaryIO, *, parse_float: ParseFloat = float) -> dict[str, Any]: + """Parse TOML from a binary file object.""" + b = __fp.read() + try: + s = b.decode() + except AttributeError: + raise TypeError( + "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`" + ) from None + return loads(s, parse_float=parse_float) + + +def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901 + """Parse TOML from a string.""" + + # The spec allows converting "\r\n" to "\n", even in string + # literals. Let's do so to simplify parsing. + src = __s.replace("\r\n", "\n") + pos = 0 + out = Output(NestedDict(), Flags()) + header: Key = () + parse_float = make_safe_parse_float(parse_float) + + # Parse one statement at a time + # (typically means one line in TOML source) + while True: + # 1. Skip line leading whitespace + pos = skip_chars(src, pos, TOML_WS) + + # 2. Parse rules. Expect one of the following: + # - end of file + # - end of line + # - comment + # - key/value pair + # - append dict to list (and move to its namespace) + # - create dict (and move to its namespace) + # Skip trailing whitespace when applicable. + try: + char = src[pos] + except IndexError: + break + if char == "\n": + pos += 1 + continue + if char in KEY_INITIAL_CHARS: + pos = key_value_rule(src, pos, out, header, parse_float) + pos = skip_chars(src, pos, TOML_WS) + elif char == "[": + try: + second_char: str | None = src[pos + 1] + except IndexError: + second_char = None + out.flags.finalize_pending() + if second_char == "[": + pos, header = create_list_rule(src, pos, out) + else: + pos, header = create_dict_rule(src, pos, out) + pos = skip_chars(src, pos, TOML_WS) + elif char != "#": + raise suffixed_err(src, pos, "Invalid statement") + + # 3. Skip comment + pos = skip_comment(src, pos) + + # 4. Expect end of line or end of file + try: + char = src[pos] + except IndexError: + break + if char != "\n": + raise suffixed_err( + src, pos, "Expected newline or end of document after a statement" + ) + pos += 1 + + return out.data.dict + + +class Flags: + """Flags that map to parsed keys/namespaces.""" + + # Marks an immutable namespace (inline array or inline table). + FROZEN = 0 + # Marks a nest that has been explicitly created and can no longer + # be opened using the "[table]" syntax. + EXPLICIT_NEST = 1 + + def __init__(self) -> None: + self._flags: dict[str, dict] = {} + self._pending_flags: set[tuple[Key, int]] = set() + + def add_pending(self, key: Key, flag: int) -> None: + self._pending_flags.add((key, flag)) + + def finalize_pending(self) -> None: + for key, flag in self._pending_flags: + self.set(key, flag, recursive=False) + self._pending_flags.clear() + + def unset_all(self, key: Key) -> None: + cont = self._flags + for k in key[:-1]: + if k not in cont: + return + cont = cont[k]["nested"] + cont.pop(key[-1], None) + + def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 + cont = self._flags + key_parent, key_stem = key[:-1], key[-1] + for k in key_parent: + if k not in cont: + cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} + cont = cont[k]["nested"] + if key_stem not in cont: + cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} + cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) + + def is_(self, key: Key, flag: int) -> bool: + if not key: + return False # document root has no flags + cont = self._flags + for k in key[:-1]: + if k not in cont: + return False + inner_cont = cont[k] + if flag in inner_cont["recursive_flags"]: + return True + cont = inner_cont["nested"] + key_stem = key[-1] + if key_stem in cont: + cont = cont[key_stem] + return flag in cont["flags"] or flag in cont["recursive_flags"] + return False + + +class NestedDict: + def __init__(self) -> None: + # The parsed content of the TOML document + self.dict: dict[str, Any] = {} + + def get_or_create_nest( + self, + key: Key, + *, + access_lists: bool = True, + ) -> dict: + cont: Any = self.dict + for k in key: + if k not in cont: + cont[k] = {} + cont = cont[k] + if access_lists and isinstance(cont, list): + cont = cont[-1] + if not isinstance(cont, dict): + raise KeyError("There is no nest behind this key") + return cont + + def append_nest_to_list(self, key: Key) -> None: + cont = self.get_or_create_nest(key[:-1]) + last_key = key[-1] + if last_key in cont: + list_ = cont[last_key] + if not isinstance(list_, list): + raise KeyError("An object other than list found behind this key") + list_.append({}) + else: + cont[last_key] = [{}] + + +class Output(NamedTuple): + data: NestedDict + flags: Flags + + +def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: + try: + while src[pos] in chars: + pos += 1 + except IndexError: + pass + return pos + + +def skip_until( + src: str, + pos: Pos, + expect: str, + *, + error_on: frozenset[str], + error_on_eof: bool, +) -> Pos: + try: + new_pos = src.index(expect, pos) + except ValueError: + new_pos = len(src) + if error_on_eof: + raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None + + if not error_on.isdisjoint(src[pos:new_pos]): + while src[pos] not in error_on: + pos += 1 + raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}") + return new_pos + + +def skip_comment(src: str, pos: Pos) -> Pos: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char == "#": + return skip_until( + src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False + ) + return pos + + +def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: + while True: + pos_before_skip = pos + pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) + pos = skip_comment(src, pos) + if pos == pos_before_skip: + return pos + + +def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: + pos += 1 # Skip "[" + pos = skip_chars(src, pos, TOML_WS) + pos, key = parse_key(src, pos) + + if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot declare {key} twice") + out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) + try: + out.data.get_or_create_nest(key) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + + if not src.startswith("]", pos): + raise suffixed_err(src, pos, "Expected ']' at the end of a table declaration") + return pos + 1, key + + +def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: + pos += 2 # Skip "[[" + pos = skip_chars(src, pos, TOML_WS) + pos, key = parse_key(src, pos) + + if out.flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") + # Free the namespace now that it points to another empty list item... + out.flags.unset_all(key) + # ...but this key precisely is still prohibited from table declaration + out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) + try: + out.data.append_nest_to_list(key) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + + if not src.startswith("]]", pos): + raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration") + return pos + 2, key + + +def key_value_rule( + src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat +) -> Pos: + pos, key, value = parse_key_value_pair(src, pos, parse_float) + key_parent, key_stem = key[:-1], key[-1] + abs_key_parent = header + key_parent + + relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) + for cont_key in relative_path_cont_keys: + # Check that dotted key syntax does not redefine an existing table + if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): + raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}") + # Containers in the relative path can't be opened with the table syntax or + # dotted key/value syntax in following table sections. + out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) + + if out.flags.is_(abs_key_parent, Flags.FROZEN): + raise suffixed_err( + src, pos, f"Cannot mutate immutable namespace {abs_key_parent}" + ) + + try: + nest = out.data.get_or_create_nest(abs_key_parent) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + if key_stem in nest: + raise suffixed_err(src, pos, "Cannot overwrite a value") + # Mark inline table and array namespaces recursively immutable + if isinstance(value, (dict, list)): + out.flags.set(header + key, Flags.FROZEN, recursive=True) + nest[key_stem] = value + return pos + + +def parse_key_value_pair( + src: str, pos: Pos, parse_float: ParseFloat +) -> tuple[Pos, Key, Any]: + pos, key = parse_key(src, pos) + try: + char: str | None = src[pos] + except IndexError: + char = None + if char != "=": + raise suffixed_err(src, pos, "Expected '=' after a key in a key/value pair") + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + pos, value = parse_value(src, pos, parse_float) + return pos, key, value + + +def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]: + pos, key_part = parse_key_part(src, pos) + key: Key = (key_part,) + pos = skip_chars(src, pos, TOML_WS) + while True: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char != ".": + return pos, key + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + pos, key_part = parse_key_part(src, pos) + key += (key_part,) + pos = skip_chars(src, pos, TOML_WS) + + +def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char in BARE_KEY_CHARS: + start_pos = pos + pos = skip_chars(src, pos, BARE_KEY_CHARS) + return pos, src[start_pos:pos] + if char == "'": + return parse_literal_str(src, pos) + if char == '"': + return parse_one_line_basic_str(src, pos) + raise suffixed_err(src, pos, "Invalid initial character for a key part") + + +def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: + pos += 1 + return parse_basic_str(src, pos, multiline=False) + + +def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]: + pos += 1 + array: list = [] + + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("]", pos): + return pos + 1, array + while True: + pos, val = parse_value(src, pos, parse_float) + array.append(val) + pos = skip_comments_and_array_ws(src, pos) + + c = src[pos : pos + 1] + if c == "]": + return pos + 1, array + if c != ",": + raise suffixed_err(src, pos, "Unclosed array") + pos += 1 + + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("]", pos): + return pos + 1, array + + +def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]: + pos += 1 + nested_dict = NestedDict() + flags = Flags() + + pos = skip_chars(src, pos, TOML_WS) + if src.startswith("}", pos): + return pos + 1, nested_dict.dict + while True: + pos, key, value = parse_key_value_pair(src, pos, parse_float) + key_parent, key_stem = key[:-1], key[-1] + if flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") + try: + nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + if key_stem in nest: + raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}") + nest[key_stem] = value + pos = skip_chars(src, pos, TOML_WS) + c = src[pos : pos + 1] + if c == "}": + return pos + 1, nested_dict.dict + if c != ",": + raise suffixed_err(src, pos, "Unclosed inline table") + if isinstance(value, (dict, list)): + flags.set(key, Flags.FROZEN, recursive=True) + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + + +def parse_basic_str_escape( + src: str, pos: Pos, *, multiline: bool = False +) -> tuple[Pos, str]: + escape_id = src[pos : pos + 2] + pos += 2 + if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: + # Skip whitespace until next non-whitespace character or end of + # the doc. Error if non-whitespace is found before newline. + if escape_id != "\\\n": + pos = skip_chars(src, pos, TOML_WS) + try: + char = src[pos] + except IndexError: + return pos, "" + if char != "\n": + raise suffixed_err(src, pos, "Unescaped '\\' in a string") + pos += 1 + pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) + return pos, "" + if escape_id == "\\u": + return parse_hex_char(src, pos, 4) + if escape_id == "\\U": + return parse_hex_char(src, pos, 8) + try: + return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] + except KeyError: + raise suffixed_err(src, pos, "Unescaped '\\' in a string") from None + + +def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]: + return parse_basic_str_escape(src, pos, multiline=True) + + +def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]: + hex_str = src[pos : pos + hex_len] + if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): + raise suffixed_err(src, pos, "Invalid hex value") + pos += hex_len + hex_int = int(hex_str, 16) + if not is_unicode_scalar_value(hex_int): + raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value") + return pos, chr(hex_int) + + +def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]: + pos += 1 # Skip starting apostrophe + start_pos = pos + pos = skip_until( + src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True + ) + return pos + 1, src[start_pos:pos] # Skip ending apostrophe + + +def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]: + pos += 3 + if src.startswith("\n", pos): + pos += 1 + + if literal: + delim = "'" + end_pos = skip_until( + src, + pos, + "'''", + error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, + error_on_eof=True, + ) + result = src[pos:end_pos] + pos = end_pos + 3 + else: + delim = '"' + pos, result = parse_basic_str(src, pos, multiline=True) + + # Add at maximum two extra apostrophes/quotes if the end sequence + # is 4 or 5 chars long instead of just 3. + if not src.startswith(delim, pos): + return pos, result + pos += 1 + if not src.startswith(delim, pos): + return pos, result + delim + pos += 1 + return pos, result + (delim * 2) + + +def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: + if multiline: + error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS + parse_escapes = parse_basic_str_escape_multiline + else: + error_on = ILLEGAL_BASIC_STR_CHARS + parse_escapes = parse_basic_str_escape + result = "" + start_pos = pos + while True: + try: + char = src[pos] + except IndexError: + raise suffixed_err(src, pos, "Unterminated string") from None + if char == '"': + if not multiline: + return pos + 1, result + src[start_pos:pos] + if src.startswith('"""', pos): + return pos + 3, result + src[start_pos:pos] + pos += 1 + continue + if char == "\\": + result += src[start_pos:pos] + pos, parsed_escape = parse_escapes(src, pos) + result += parsed_escape + start_pos = pos + continue + if char in error_on: + raise suffixed_err(src, pos, f"Illegal character {char!r}") + pos += 1 + + +def parse_value( # noqa: C901 + src: str, pos: Pos, parse_float: ParseFloat +) -> tuple[Pos, Any]: + try: + char: str | None = src[pos] + except IndexError: + char = None + + # IMPORTANT: order conditions based on speed of checking and likelihood + + # Basic strings + if char == '"': + if src.startswith('"""', pos): + return parse_multiline_str(src, pos, literal=False) + return parse_one_line_basic_str(src, pos) + + # Literal strings + if char == "'": + if src.startswith("'''", pos): + return parse_multiline_str(src, pos, literal=True) + return parse_literal_str(src, pos) + + # Booleans + if char == "t": + if src.startswith("true", pos): + return pos + 4, True + if char == "f": + if src.startswith("false", pos): + return pos + 5, False + + # Arrays + if char == "[": + return parse_array(src, pos, parse_float) + + # Inline tables + if char == "{": + return parse_inline_table(src, pos, parse_float) + + # Dates and times + datetime_match = RE_DATETIME.match(src, pos) + if datetime_match: + try: + datetime_obj = match_to_datetime(datetime_match) + except ValueError as e: + raise suffixed_err(src, pos, "Invalid date or datetime") from e + return datetime_match.end(), datetime_obj + localtime_match = RE_LOCALTIME.match(src, pos) + if localtime_match: + return localtime_match.end(), match_to_localtime(localtime_match) + + # Integers and "normal" floats. + # The regex will greedily match any type starting with a decimal + # char, so needs to be located after handling of dates and times. + number_match = RE_NUMBER.match(src, pos) + if number_match: + return number_match.end(), match_to_number(number_match, parse_float) + + # Special floats + first_three = src[pos : pos + 3] + if first_three in {"inf", "nan"}: + return pos + 3, parse_float(first_three) + first_four = src[pos : pos + 4] + if first_four in {"-inf", "+inf", "-nan", "+nan"}: + return pos + 4, parse_float(first_four) + + raise suffixed_err(src, pos, "Invalid value") + + +def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError: + """Return a `TOMLDecodeError` where error message is suffixed with + coordinates in source.""" + + def coord_repr(src: str, pos: Pos) -> str: + if pos >= len(src): + return "end of document" + line = src.count("\n", 0, pos) + 1 + if line == 1: + column = pos + 1 + else: + column = pos - src.rindex("\n", 0, pos) + return f"line {line}, column {column}" + + return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})") + + +def is_unicode_scalar_value(codepoint: int) -> bool: + return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) + + +def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: + """A decorator to make `parse_float` safe. + + `parse_float` must not return dicts or lists, because these types + would be mixed with parsed TOML tables and arrays, thus confusing + the parser. The returned decorated callable raises `ValueError` + instead of returning illegal types. + """ + # The default `float` callable never returns illegal types. Optimize it. + if parse_float is float: # type: ignore[comparison-overlap] + return float + + def safe_parse_float(float_str: str) -> Any: + float_value = parse_float(float_str) + if isinstance(float_value, (dict, list)): + raise ValueError("parse_float must not return dicts or lists") + return float_value + + return safe_parse_float diff --git a/libs/common/setuptools/_vendor/tomli/_re.py b/libs/common/setuptools/_vendor/tomli/_re.py new file mode 100644 index 00000000..994bb749 --- /dev/null +++ b/libs/common/setuptools/_vendor/tomli/_re.py @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +from __future__ import annotations + +from datetime import date, datetime, time, timedelta, timezone, tzinfo +from functools import lru_cache +import re +from typing import Any + +from ._types import ParseFloat + +# E.g. +# - 00:32:00.999999 +# - 00:32:00 +_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?" + +RE_NUMBER = re.compile( + r""" +0 +(?: + x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex + | + b[01](?:_?[01])* # bin + | + o[0-7](?:_?[0-7])* # oct +) +| +[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part +(?P + (?:\.[0-9](?:_?[0-9])*)? # optional fractional part + (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part +) +""", + flags=re.VERBOSE, +) +RE_LOCALTIME = re.compile(_TIME_RE_STR) +RE_DATETIME = re.compile( + rf""" +([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 +(?: + [Tt ] + {_TIME_RE_STR} + (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset +)? +""", + flags=re.VERBOSE, +) + + +def match_to_datetime(match: re.Match) -> datetime | date: + """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`. + + Raises ValueError if the match does not correspond to a valid date + or datetime. + """ + ( + year_str, + month_str, + day_str, + hour_str, + minute_str, + sec_str, + micros_str, + zulu_time, + offset_sign_str, + offset_hour_str, + offset_minute_str, + ) = match.groups() + year, month, day = int(year_str), int(month_str), int(day_str) + if hour_str is None: + return date(year, month, day) + hour, minute, sec = int(hour_str), int(minute_str), int(sec_str) + micros = int(micros_str.ljust(6, "0")) if micros_str else 0 + if offset_sign_str: + tz: tzinfo | None = cached_tz( + offset_hour_str, offset_minute_str, offset_sign_str + ) + elif zulu_time: + tz = timezone.utc + else: # local date-time + tz = None + return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz) + + +@lru_cache(maxsize=None) +def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone: + sign = 1 if sign_str == "+" else -1 + return timezone( + timedelta( + hours=sign * int(hour_str), + minutes=sign * int(minute_str), + ) + ) + + +def match_to_localtime(match: re.Match) -> time: + hour_str, minute_str, sec_str, micros_str = match.groups() + micros = int(micros_str.ljust(6, "0")) if micros_str else 0 + return time(int(hour_str), int(minute_str), int(sec_str), micros) + + +def match_to_number(match: re.Match, parse_float: ParseFloat) -> Any: + if match.group("floatpart"): + return parse_float(match.group()) + return int(match.group(), 0) diff --git a/libs/common/setuptools/_vendor/tomli/_types.py b/libs/common/setuptools/_vendor/tomli/_types.py new file mode 100644 index 00000000..d949412e --- /dev/null +++ b/libs/common/setuptools/_vendor/tomli/_types.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +from typing import Any, Callable, Tuple + +# Type annotations +ParseFloat = Callable[[str], Any] +Key = Tuple[str, ...] +Pos = int diff --git a/libs/common/setuptools/_vendor/typing_extensions.py b/libs/common/setuptools/_vendor/typing_extensions.py new file mode 100644 index 00000000..9f1c7aa3 --- /dev/null +++ b/libs/common/setuptools/_vendor/typing_extensions.py @@ -0,0 +1,2296 @@ +import abc +import collections +import collections.abc +import operator +import sys +import typing + +# After PEP 560, internal typing API was substantially reworked. +# This is especially important for Protocol class which uses internal APIs +# quite extensively. +PEP_560 = sys.version_info[:3] >= (3, 7, 0) + +if PEP_560: + GenericMeta = type +else: + # 3.6 + from typing import GenericMeta, _type_vars # noqa + +# The two functions below are copies of typing internal helpers. +# They are needed by _ProtocolMeta + + +def _no_slots_copy(dct): + dict_copy = dict(dct) + if '__slots__' in dict_copy: + for slot in dict_copy['__slots__']: + dict_copy.pop(slot, None) + return dict_copy + + +def _check_generic(cls, parameters): + if not cls.__parameters__: + raise TypeError(f"{cls} is not a generic class") + alen = len(parameters) + elen = len(cls.__parameters__) + if alen != elen: + raise TypeError(f"Too {'many' if alen > elen else 'few'} arguments for {cls};" + f" actual {alen}, expected {elen}") + + +# Please keep __all__ alphabetized within each category. +__all__ = [ + # Super-special typing primitives. + 'ClassVar', + 'Concatenate', + 'Final', + 'ParamSpec', + 'Self', + 'Type', + + # ABCs (from collections.abc). + 'Awaitable', + 'AsyncIterator', + 'AsyncIterable', + 'Coroutine', + 'AsyncGenerator', + 'AsyncContextManager', + 'ChainMap', + + # Concrete collection types. + 'ContextManager', + 'Counter', + 'Deque', + 'DefaultDict', + 'OrderedDict', + 'TypedDict', + + # Structural checks, a.k.a. protocols. + 'SupportsIndex', + + # One-off things. + 'Annotated', + 'final', + 'IntVar', + 'Literal', + 'NewType', + 'overload', + 'Protocol', + 'runtime', + 'runtime_checkable', + 'Text', + 'TypeAlias', + 'TypeGuard', + 'TYPE_CHECKING', +] + +if PEP_560: + __all__.extend(["get_args", "get_origin", "get_type_hints"]) + +# 3.6.2+ +if hasattr(typing, 'NoReturn'): + NoReturn = typing.NoReturn +# 3.6.0-3.6.1 +else: + class _NoReturn(typing._FinalTypingBase, _root=True): + """Special type indicating functions that never return. + Example:: + + from typing import NoReturn + + def stop() -> NoReturn: + raise Exception('no way') + + This type is invalid in other positions, e.g., ``List[NoReturn]`` + will fail in static type checkers. + """ + __slots__ = () + + def __instancecheck__(self, obj): + raise TypeError("NoReturn cannot be used with isinstance().") + + def __subclasscheck__(self, cls): + raise TypeError("NoReturn cannot be used with issubclass().") + + NoReturn = _NoReturn(_root=True) + +# Some unconstrained type variables. These are used by the container types. +# (These are not for export.) +T = typing.TypeVar('T') # Any type. +KT = typing.TypeVar('KT') # Key type. +VT = typing.TypeVar('VT') # Value type. +T_co = typing.TypeVar('T_co', covariant=True) # Any type covariant containers. +T_contra = typing.TypeVar('T_contra', contravariant=True) # Ditto contravariant. + +ClassVar = typing.ClassVar + +# On older versions of typing there is an internal class named "Final". +# 3.8+ +if hasattr(typing, 'Final') and sys.version_info[:2] >= (3, 7): + Final = typing.Final +# 3.7 +elif sys.version_info[:2] >= (3, 7): + class _FinalForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only single type') + return typing._GenericAlias(self, (item,)) + + Final = _FinalForm('Final', + doc="""A special typing construct to indicate that a name + cannot be re-assigned or overridden in a subclass. + For example: + + MAX_SIZE: Final = 9000 + MAX_SIZE += 1 # Error reported by type checker + + class Connection: + TIMEOUT: Final[int] = 10 + class FastConnector(Connection): + TIMEOUT = 1 # Error reported by type checker + + There is no runtime checking of these properties.""") +# 3.6 +else: + class _Final(typing._FinalTypingBase, _root=True): + """A special typing construct to indicate that a name + cannot be re-assigned or overridden in a subclass. + For example: + + MAX_SIZE: Final = 9000 + MAX_SIZE += 1 # Error reported by type checker + + class Connection: + TIMEOUT: Final[int] = 10 + class FastConnector(Connection): + TIMEOUT = 1 # Error reported by type checker + + There is no runtime checking of these properties. + """ + + __slots__ = ('__type__',) + + def __init__(self, tp=None, **kwds): + self.__type__ = tp + + def __getitem__(self, item): + cls = type(self) + if self.__type__ is None: + return cls(typing._type_check(item, + f'{cls.__name__[1:]} accepts only single type.'), + _root=True) + raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted') + + def _eval_type(self, globalns, localns): + new_tp = typing._eval_type(self.__type__, globalns, localns) + if new_tp == self.__type__: + return self + return type(self)(new_tp, _root=True) + + def __repr__(self): + r = super().__repr__() + if self.__type__ is not None: + r += f'[{typing._type_repr(self.__type__)}]' + return r + + def __hash__(self): + return hash((type(self).__name__, self.__type__)) + + def __eq__(self, other): + if not isinstance(other, _Final): + return NotImplemented + if self.__type__ is not None: + return self.__type__ == other.__type__ + return self is other + + Final = _Final(_root=True) + + +# 3.8+ +if hasattr(typing, 'final'): + final = typing.final +# 3.6-3.7 +else: + def final(f): + """This decorator can be used to indicate to type checkers that + the decorated method cannot be overridden, and decorated class + cannot be subclassed. For example: + + class Base: + @final + def done(self) -> None: + ... + class Sub(Base): + def done(self) -> None: # Error reported by type checker + ... + @final + class Leaf: + ... + class Other(Leaf): # Error reported by type checker + ... + + There is no runtime checking of these properties. + """ + return f + + +def IntVar(name): + return typing.TypeVar(name) + + +# 3.8+: +if hasattr(typing, 'Literal'): + Literal = typing.Literal +# 3.7: +elif sys.version_info[:2] >= (3, 7): + class _LiteralForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + return typing._GenericAlias(self, parameters) + + Literal = _LiteralForm('Literal', + doc="""A type that can be used to indicate to type checkers + that the corresponding value has a value literally equivalent + to the provided parameter. For example: + + var: Literal[4] = 4 + + The type checker understands that 'var' is literally equal to + the value 4 and no other value. + + Literal[...] cannot be subclassed. There is no runtime + checking verifying that the parameter is actually a value + instead of a type.""") +# 3.6: +else: + class _Literal(typing._FinalTypingBase, _root=True): + """A type that can be used to indicate to type checkers that the + corresponding value has a value literally equivalent to the + provided parameter. For example: + + var: Literal[4] = 4 + + The type checker understands that 'var' is literally equal to the + value 4 and no other value. + + Literal[...] cannot be subclassed. There is no runtime checking + verifying that the parameter is actually a value instead of a type. + """ + + __slots__ = ('__values__',) + + def __init__(self, values=None, **kwds): + self.__values__ = values + + def __getitem__(self, values): + cls = type(self) + if self.__values__ is None: + if not isinstance(values, tuple): + values = (values,) + return cls(values, _root=True) + raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted') + + def _eval_type(self, globalns, localns): + return self + + def __repr__(self): + r = super().__repr__() + if self.__values__ is not None: + r += f'[{", ".join(map(typing._type_repr, self.__values__))}]' + return r + + def __hash__(self): + return hash((type(self).__name__, self.__values__)) + + def __eq__(self, other): + if not isinstance(other, _Literal): + return NotImplemented + if self.__values__ is not None: + return self.__values__ == other.__values__ + return self is other + + Literal = _Literal(_root=True) + + +_overload_dummy = typing._overload_dummy # noqa +overload = typing.overload + + +# This is not a real generic class. Don't use outside annotations. +Type = typing.Type + +# Various ABCs mimicking those in collections.abc. +# A few are simply re-exported for completeness. + + +class _ExtensionsGenericMeta(GenericMeta): + def __subclasscheck__(self, subclass): + """This mimics a more modern GenericMeta.__subclasscheck__() logic + (that does not have problems with recursion) to work around interactions + between collections, typing, and typing_extensions on older + versions of Python, see https://github.com/python/typing/issues/501. + """ + if self.__origin__ is not None: + if sys._getframe(1).f_globals['__name__'] not in ['abc', 'functools']: + raise TypeError("Parameterized generics cannot be used with class " + "or instance checks") + return False + if not self.__extra__: + return super().__subclasscheck__(subclass) + res = self.__extra__.__subclasshook__(subclass) + if res is not NotImplemented: + return res + if self.__extra__ in subclass.__mro__: + return True + for scls in self.__extra__.__subclasses__(): + if isinstance(scls, GenericMeta): + continue + if issubclass(subclass, scls): + return True + return False + + +Awaitable = typing.Awaitable +Coroutine = typing.Coroutine +AsyncIterable = typing.AsyncIterable +AsyncIterator = typing.AsyncIterator + +# 3.6.1+ +if hasattr(typing, 'Deque'): + Deque = typing.Deque +# 3.6.0 +else: + class Deque(collections.deque, typing.MutableSequence[T], + metaclass=_ExtensionsGenericMeta, + extra=collections.deque): + __slots__ = () + + def __new__(cls, *args, **kwds): + if cls._gorg is Deque: + return collections.deque(*args, **kwds) + return typing._generic_new(collections.deque, cls, *args, **kwds) + +ContextManager = typing.ContextManager +# 3.6.2+ +if hasattr(typing, 'AsyncContextManager'): + AsyncContextManager = typing.AsyncContextManager +# 3.6.0-3.6.1 +else: + from _collections_abc import _check_methods as _check_methods_in_mro # noqa + + class AsyncContextManager(typing.Generic[T_co]): + __slots__ = () + + async def __aenter__(self): + return self + + @abc.abstractmethod + async def __aexit__(self, exc_type, exc_value, traceback): + return None + + @classmethod + def __subclasshook__(cls, C): + if cls is AsyncContextManager: + return _check_methods_in_mro(C, "__aenter__", "__aexit__") + return NotImplemented + +DefaultDict = typing.DefaultDict + +# 3.7.2+ +if hasattr(typing, 'OrderedDict'): + OrderedDict = typing.OrderedDict +# 3.7.0-3.7.2 +elif (3, 7, 0) <= sys.version_info[:3] < (3, 7, 2): + OrderedDict = typing._alias(collections.OrderedDict, (KT, VT)) +# 3.6 +else: + class OrderedDict(collections.OrderedDict, typing.MutableMapping[KT, VT], + metaclass=_ExtensionsGenericMeta, + extra=collections.OrderedDict): + + __slots__ = () + + def __new__(cls, *args, **kwds): + if cls._gorg is OrderedDict: + return collections.OrderedDict(*args, **kwds) + return typing._generic_new(collections.OrderedDict, cls, *args, **kwds) + +# 3.6.2+ +if hasattr(typing, 'Counter'): + Counter = typing.Counter +# 3.6.0-3.6.1 +else: + class Counter(collections.Counter, + typing.Dict[T, int], + metaclass=_ExtensionsGenericMeta, extra=collections.Counter): + + __slots__ = () + + def __new__(cls, *args, **kwds): + if cls._gorg is Counter: + return collections.Counter(*args, **kwds) + return typing._generic_new(collections.Counter, cls, *args, **kwds) + +# 3.6.1+ +if hasattr(typing, 'ChainMap'): + ChainMap = typing.ChainMap +elif hasattr(collections, 'ChainMap'): + class ChainMap(collections.ChainMap, typing.MutableMapping[KT, VT], + metaclass=_ExtensionsGenericMeta, + extra=collections.ChainMap): + + __slots__ = () + + def __new__(cls, *args, **kwds): + if cls._gorg is ChainMap: + return collections.ChainMap(*args, **kwds) + return typing._generic_new(collections.ChainMap, cls, *args, **kwds) + +# 3.6.1+ +if hasattr(typing, 'AsyncGenerator'): + AsyncGenerator = typing.AsyncGenerator +# 3.6.0 +else: + class AsyncGenerator(AsyncIterator[T_co], typing.Generic[T_co, T_contra], + metaclass=_ExtensionsGenericMeta, + extra=collections.abc.AsyncGenerator): + __slots__ = () + +NewType = typing.NewType +Text = typing.Text +TYPE_CHECKING = typing.TYPE_CHECKING + + +def _gorg(cls): + """This function exists for compatibility with old typing versions.""" + assert isinstance(cls, GenericMeta) + if hasattr(cls, '_gorg'): + return cls._gorg + while cls.__origin__ is not None: + cls = cls.__origin__ + return cls + + +_PROTO_WHITELIST = ['Callable', 'Awaitable', + 'Iterable', 'Iterator', 'AsyncIterable', 'AsyncIterator', + 'Hashable', 'Sized', 'Container', 'Collection', 'Reversible', + 'ContextManager', 'AsyncContextManager'] + + +def _get_protocol_attrs(cls): + attrs = set() + for base in cls.__mro__[:-1]: # without object + if base.__name__ in ('Protocol', 'Generic'): + continue + annotations = getattr(base, '__annotations__', {}) + for attr in list(base.__dict__.keys()) + list(annotations.keys()): + if (not attr.startswith('_abc_') and attr not in ( + '__abstractmethods__', '__annotations__', '__weakref__', + '_is_protocol', '_is_runtime_protocol', '__dict__', + '__args__', '__slots__', + '__next_in_mro__', '__parameters__', '__origin__', + '__orig_bases__', '__extra__', '__tree_hash__', + '__doc__', '__subclasshook__', '__init__', '__new__', + '__module__', '_MutableMapping__marker', '_gorg')): + attrs.add(attr) + return attrs + + +def _is_callable_members_only(cls): + return all(callable(getattr(cls, attr, None)) for attr in _get_protocol_attrs(cls)) + + +# 3.8+ +if hasattr(typing, 'Protocol'): + Protocol = typing.Protocol +# 3.7 +elif PEP_560: + from typing import _collect_type_vars # noqa + + def _no_init(self, *args, **kwargs): + if type(self)._is_protocol: + raise TypeError('Protocols cannot be instantiated') + + class _ProtocolMeta(abc.ABCMeta): + # This metaclass is a bit unfortunate and exists only because of the lack + # of __instancehook__. + def __instancecheck__(cls, instance): + # We need this method for situations where attributes are + # assigned in __init__. + if ((not getattr(cls, '_is_protocol', False) or + _is_callable_members_only(cls)) and + issubclass(instance.__class__, cls)): + return True + if cls._is_protocol: + if all(hasattr(instance, attr) and + (not callable(getattr(cls, attr, None)) or + getattr(instance, attr) is not None) + for attr in _get_protocol_attrs(cls)): + return True + return super().__instancecheck__(instance) + + class Protocol(metaclass=_ProtocolMeta): + # There is quite a lot of overlapping code with typing.Generic. + # Unfortunately it is hard to avoid this while these live in two different + # modules. The duplicated code will be removed when Protocol is moved to typing. + """Base class for protocol classes. Protocol classes are defined as:: + + class Proto(Protocol): + def meth(self) -> int: + ... + + Such classes are primarily used with static type checkers that recognize + structural subtyping (static duck-typing), for example:: + + class C: + def meth(self) -> int: + return 0 + + def func(x: Proto) -> int: + return x.meth() + + func(C()) # Passes static type check + + See PEP 544 for details. Protocol classes decorated with + @typing_extensions.runtime act as simple-minded runtime protocol that checks + only the presence of given attributes, ignoring their type signatures. + + Protocol classes can be generic, they are defined as:: + + class GenProto(Protocol[T]): + def meth(self) -> T: + ... + """ + __slots__ = () + _is_protocol = True + + def __new__(cls, *args, **kwds): + if cls is Protocol: + raise TypeError("Type Protocol cannot be instantiated; " + "it can only be used as a base class") + return super().__new__(cls) + + @typing._tp_cache + def __class_getitem__(cls, params): + if not isinstance(params, tuple): + params = (params,) + if not params and cls is not typing.Tuple: + raise TypeError( + f"Parameter list to {cls.__qualname__}[...] cannot be empty") + msg = "Parameters to generic types must be types." + params = tuple(typing._type_check(p, msg) for p in params) # noqa + if cls is Protocol: + # Generic can only be subscripted with unique type variables. + if not all(isinstance(p, typing.TypeVar) for p in params): + i = 0 + while isinstance(params[i], typing.TypeVar): + i += 1 + raise TypeError( + "Parameters to Protocol[...] must all be type variables." + f" Parameter {i + 1} is {params[i]}") + if len(set(params)) != len(params): + raise TypeError( + "Parameters to Protocol[...] must all be unique") + else: + # Subscripting a regular Generic subclass. + _check_generic(cls, params) + return typing._GenericAlias(cls, params) + + def __init_subclass__(cls, *args, **kwargs): + tvars = [] + if '__orig_bases__' in cls.__dict__: + error = typing.Generic in cls.__orig_bases__ + else: + error = typing.Generic in cls.__bases__ + if error: + raise TypeError("Cannot inherit from plain Generic") + if '__orig_bases__' in cls.__dict__: + tvars = _collect_type_vars(cls.__orig_bases__) + # Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn]. + # If found, tvars must be a subset of it. + # If not found, tvars is it. + # Also check for and reject plain Generic, + # and reject multiple Generic[...] and/or Protocol[...]. + gvars = None + for base in cls.__orig_bases__: + if (isinstance(base, typing._GenericAlias) and + base.__origin__ in (typing.Generic, Protocol)): + # for error messages + the_base = base.__origin__.__name__ + if gvars is not None: + raise TypeError( + "Cannot inherit from Generic[...]" + " and/or Protocol[...] multiple types.") + gvars = base.__parameters__ + if gvars is None: + gvars = tvars + else: + tvarset = set(tvars) + gvarset = set(gvars) + if not tvarset <= gvarset: + s_vars = ', '.join(str(t) for t in tvars if t not in gvarset) + s_args = ', '.join(str(g) for g in gvars) + raise TypeError(f"Some type variables ({s_vars}) are" + f" not listed in {the_base}[{s_args}]") + tvars = gvars + cls.__parameters__ = tuple(tvars) + + # Determine if this is a protocol or a concrete subclass. + if not cls.__dict__.get('_is_protocol', None): + cls._is_protocol = any(b is Protocol for b in cls.__bases__) + + # Set (or override) the protocol subclass hook. + def _proto_hook(other): + if not cls.__dict__.get('_is_protocol', None): + return NotImplemented + if not getattr(cls, '_is_runtime_protocol', False): + if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']: + return NotImplemented + raise TypeError("Instance and class checks can only be used with" + " @runtime protocols") + if not _is_callable_members_only(cls): + if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']: + return NotImplemented + raise TypeError("Protocols with non-method members" + " don't support issubclass()") + if not isinstance(other, type): + # Same error as for issubclass(1, int) + raise TypeError('issubclass() arg 1 must be a class') + for attr in _get_protocol_attrs(cls): + for base in other.__mro__: + if attr in base.__dict__: + if base.__dict__[attr] is None: + return NotImplemented + break + annotations = getattr(base, '__annotations__', {}) + if (isinstance(annotations, typing.Mapping) and + attr in annotations and + isinstance(other, _ProtocolMeta) and + other._is_protocol): + break + else: + return NotImplemented + return True + if '__subclasshook__' not in cls.__dict__: + cls.__subclasshook__ = _proto_hook + + # We have nothing more to do for non-protocols. + if not cls._is_protocol: + return + + # Check consistency of bases. + for base in cls.__bases__: + if not (base in (object, typing.Generic) or + base.__module__ == 'collections.abc' and + base.__name__ in _PROTO_WHITELIST or + isinstance(base, _ProtocolMeta) and base._is_protocol): + raise TypeError('Protocols can only inherit from other' + f' protocols, got {repr(base)}') + cls.__init__ = _no_init +# 3.6 +else: + from typing import _next_in_mro, _type_check # noqa + + def _no_init(self, *args, **kwargs): + if type(self)._is_protocol: + raise TypeError('Protocols cannot be instantiated') + + class _ProtocolMeta(GenericMeta): + """Internal metaclass for Protocol. + + This exists so Protocol classes can be generic without deriving + from Generic. + """ + def __new__(cls, name, bases, namespace, + tvars=None, args=None, origin=None, extra=None, orig_bases=None): + # This is just a version copied from GenericMeta.__new__ that + # includes "Protocol" special treatment. (Comments removed for brevity.) + assert extra is None # Protocols should not have extra + if tvars is not None: + assert origin is not None + assert all(isinstance(t, typing.TypeVar) for t in tvars), tvars + else: + tvars = _type_vars(bases) + gvars = None + for base in bases: + if base is typing.Generic: + raise TypeError("Cannot inherit from plain Generic") + if (isinstance(base, GenericMeta) and + base.__origin__ in (typing.Generic, Protocol)): + if gvars is not None: + raise TypeError( + "Cannot inherit from Generic[...] or" + " Protocol[...] multiple times.") + gvars = base.__parameters__ + if gvars is None: + gvars = tvars + else: + tvarset = set(tvars) + gvarset = set(gvars) + if not tvarset <= gvarset: + s_vars = ", ".join(str(t) for t in tvars if t not in gvarset) + s_args = ", ".join(str(g) for g in gvars) + cls_name = "Generic" if any(b.__origin__ is typing.Generic + for b in bases) else "Protocol" + raise TypeError(f"Some type variables ({s_vars}) are" + f" not listed in {cls_name}[{s_args}]") + tvars = gvars + + initial_bases = bases + if (extra is not None and type(extra) is abc.ABCMeta and + extra not in bases): + bases = (extra,) + bases + bases = tuple(_gorg(b) if isinstance(b, GenericMeta) else b + for b in bases) + if any(isinstance(b, GenericMeta) and b is not typing.Generic for b in bases): + bases = tuple(b for b in bases if b is not typing.Generic) + namespace.update({'__origin__': origin, '__extra__': extra}) + self = super(GenericMeta, cls).__new__(cls, name, bases, namespace, + _root=True) + super(GenericMeta, self).__setattr__('_gorg', + self if not origin else + _gorg(origin)) + self.__parameters__ = tvars + self.__args__ = tuple(... if a is typing._TypingEllipsis else + () if a is typing._TypingEmpty else + a for a in args) if args else None + self.__next_in_mro__ = _next_in_mro(self) + if orig_bases is None: + self.__orig_bases__ = initial_bases + elif origin is not None: + self._abc_registry = origin._abc_registry + self._abc_cache = origin._abc_cache + if hasattr(self, '_subs_tree'): + self.__tree_hash__ = (hash(self._subs_tree()) if origin else + super(GenericMeta, self).__hash__()) + return self + + def __init__(cls, *args, **kwargs): + super().__init__(*args, **kwargs) + if not cls.__dict__.get('_is_protocol', None): + cls._is_protocol = any(b is Protocol or + isinstance(b, _ProtocolMeta) and + b.__origin__ is Protocol + for b in cls.__bases__) + if cls._is_protocol: + for base in cls.__mro__[1:]: + if not (base in (object, typing.Generic) or + base.__module__ == 'collections.abc' and + base.__name__ in _PROTO_WHITELIST or + isinstance(base, typing.TypingMeta) and base._is_protocol or + isinstance(base, GenericMeta) and + base.__origin__ is typing.Generic): + raise TypeError(f'Protocols can only inherit from other' + f' protocols, got {repr(base)}') + + cls.__init__ = _no_init + + def _proto_hook(other): + if not cls.__dict__.get('_is_protocol', None): + return NotImplemented + if not isinstance(other, type): + # Same error as for issubclass(1, int) + raise TypeError('issubclass() arg 1 must be a class') + for attr in _get_protocol_attrs(cls): + for base in other.__mro__: + if attr in base.__dict__: + if base.__dict__[attr] is None: + return NotImplemented + break + annotations = getattr(base, '__annotations__', {}) + if (isinstance(annotations, typing.Mapping) and + attr in annotations and + isinstance(other, _ProtocolMeta) and + other._is_protocol): + break + else: + return NotImplemented + return True + if '__subclasshook__' not in cls.__dict__: + cls.__subclasshook__ = _proto_hook + + def __instancecheck__(self, instance): + # We need this method for situations where attributes are + # assigned in __init__. + if ((not getattr(self, '_is_protocol', False) or + _is_callable_members_only(self)) and + issubclass(instance.__class__, self)): + return True + if self._is_protocol: + if all(hasattr(instance, attr) and + (not callable(getattr(self, attr, None)) or + getattr(instance, attr) is not None) + for attr in _get_protocol_attrs(self)): + return True + return super(GenericMeta, self).__instancecheck__(instance) + + def __subclasscheck__(self, cls): + if self.__origin__ is not None: + if sys._getframe(1).f_globals['__name__'] not in ['abc', 'functools']: + raise TypeError("Parameterized generics cannot be used with class " + "or instance checks") + return False + if (self.__dict__.get('_is_protocol', None) and + not self.__dict__.get('_is_runtime_protocol', None)): + if sys._getframe(1).f_globals['__name__'] in ['abc', + 'functools', + 'typing']: + return False + raise TypeError("Instance and class checks can only be used with" + " @runtime protocols") + if (self.__dict__.get('_is_runtime_protocol', None) and + not _is_callable_members_only(self)): + if sys._getframe(1).f_globals['__name__'] in ['abc', + 'functools', + 'typing']: + return super(GenericMeta, self).__subclasscheck__(cls) + raise TypeError("Protocols with non-method members" + " don't support issubclass()") + return super(GenericMeta, self).__subclasscheck__(cls) + + @typing._tp_cache + def __getitem__(self, params): + # We also need to copy this from GenericMeta.__getitem__ to get + # special treatment of "Protocol". (Comments removed for brevity.) + if not isinstance(params, tuple): + params = (params,) + if not params and _gorg(self) is not typing.Tuple: + raise TypeError( + f"Parameter list to {self.__qualname__}[...] cannot be empty") + msg = "Parameters to generic types must be types." + params = tuple(_type_check(p, msg) for p in params) + if self in (typing.Generic, Protocol): + if not all(isinstance(p, typing.TypeVar) for p in params): + raise TypeError( + f"Parameters to {repr(self)}[...] must all be type variables") + if len(set(params)) != len(params): + raise TypeError( + f"Parameters to {repr(self)}[...] must all be unique") + tvars = params + args = params + elif self in (typing.Tuple, typing.Callable): + tvars = _type_vars(params) + args = params + elif self.__origin__ in (typing.Generic, Protocol): + raise TypeError(f"Cannot subscript already-subscripted {repr(self)}") + else: + _check_generic(self, params) + tvars = _type_vars(params) + args = params + + prepend = (self,) if self.__origin__ is None else () + return self.__class__(self.__name__, + prepend + self.__bases__, + _no_slots_copy(self.__dict__), + tvars=tvars, + args=args, + origin=self, + extra=self.__extra__, + orig_bases=self.__orig_bases__) + + class Protocol(metaclass=_ProtocolMeta): + """Base class for protocol classes. Protocol classes are defined as:: + + class Proto(Protocol): + def meth(self) -> int: + ... + + Such classes are primarily used with static type checkers that recognize + structural subtyping (static duck-typing), for example:: + + class C: + def meth(self) -> int: + return 0 + + def func(x: Proto) -> int: + return x.meth() + + func(C()) # Passes static type check + + See PEP 544 for details. Protocol classes decorated with + @typing_extensions.runtime act as simple-minded runtime protocol that checks + only the presence of given attributes, ignoring their type signatures. + + Protocol classes can be generic, they are defined as:: + + class GenProto(Protocol[T]): + def meth(self) -> T: + ... + """ + __slots__ = () + _is_protocol = True + + def __new__(cls, *args, **kwds): + if _gorg(cls) is Protocol: + raise TypeError("Type Protocol cannot be instantiated; " + "it can be used only as a base class") + return typing._generic_new(cls.__next_in_mro__, cls, *args, **kwds) + + +# 3.8+ +if hasattr(typing, 'runtime_checkable'): + runtime_checkable = typing.runtime_checkable +# 3.6-3.7 +else: + def runtime_checkable(cls): + """Mark a protocol class as a runtime protocol, so that it + can be used with isinstance() and issubclass(). Raise TypeError + if applied to a non-protocol class. + + This allows a simple-minded structural check very similar to the + one-offs in collections.abc such as Hashable. + """ + if not isinstance(cls, _ProtocolMeta) or not cls._is_protocol: + raise TypeError('@runtime_checkable can be only applied to protocol classes,' + f' got {cls!r}') + cls._is_runtime_protocol = True + return cls + + +# Exists for backwards compatibility. +runtime = runtime_checkable + + +# 3.8+ +if hasattr(typing, 'SupportsIndex'): + SupportsIndex = typing.SupportsIndex +# 3.6-3.7 +else: + @runtime_checkable + class SupportsIndex(Protocol): + __slots__ = () + + @abc.abstractmethod + def __index__(self) -> int: + pass + + +if sys.version_info >= (3, 9, 2): + # The standard library TypedDict in Python 3.8 does not store runtime information + # about which (if any) keys are optional. See https://bugs.python.org/issue38834 + # The standard library TypedDict in Python 3.9.0/1 does not honour the "total" + # keyword with old-style TypedDict(). See https://bugs.python.org/issue42059 + TypedDict = typing.TypedDict +else: + def _check_fails(cls, other): + try: + if sys._getframe(1).f_globals['__name__'] not in ['abc', + 'functools', + 'typing']: + # Typed dicts are only for static structural subtyping. + raise TypeError('TypedDict does not support instance and class checks') + except (AttributeError, ValueError): + pass + return False + + def _dict_new(*args, **kwargs): + if not args: + raise TypeError('TypedDict.__new__(): not enough arguments') + _, args = args[0], args[1:] # allow the "cls" keyword be passed + return dict(*args, **kwargs) + + _dict_new.__text_signature__ = '($cls, _typename, _fields=None, /, **kwargs)' + + def _typeddict_new(*args, total=True, **kwargs): + if not args: + raise TypeError('TypedDict.__new__(): not enough arguments') + _, args = args[0], args[1:] # allow the "cls" keyword be passed + if args: + typename, args = args[0], args[1:] # allow the "_typename" keyword be passed + elif '_typename' in kwargs: + typename = kwargs.pop('_typename') + import warnings + warnings.warn("Passing '_typename' as keyword argument is deprecated", + DeprecationWarning, stacklevel=2) + else: + raise TypeError("TypedDict.__new__() missing 1 required positional " + "argument: '_typename'") + if args: + try: + fields, = args # allow the "_fields" keyword be passed + except ValueError: + raise TypeError('TypedDict.__new__() takes from 2 to 3 ' + f'positional arguments but {len(args) + 2} ' + 'were given') + elif '_fields' in kwargs and len(kwargs) == 1: + fields = kwargs.pop('_fields') + import warnings + warnings.warn("Passing '_fields' as keyword argument is deprecated", + DeprecationWarning, stacklevel=2) + else: + fields = None + + if fields is None: + fields = kwargs + elif kwargs: + raise TypeError("TypedDict takes either a dict or keyword arguments," + " but not both") + + ns = {'__annotations__': dict(fields)} + try: + # Setting correct module is necessary to make typed dict classes pickleable. + ns['__module__'] = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + + return _TypedDictMeta(typename, (), ns, total=total) + + _typeddict_new.__text_signature__ = ('($cls, _typename, _fields=None,' + ' /, *, total=True, **kwargs)') + + class _TypedDictMeta(type): + def __init__(cls, name, bases, ns, total=True): + super().__init__(name, bases, ns) + + def __new__(cls, name, bases, ns, total=True): + # Create new typed dict class object. + # This method is called directly when TypedDict is subclassed, + # or via _typeddict_new when TypedDict is instantiated. This way + # TypedDict supports all three syntaxes described in its docstring. + # Subclasses and instances of TypedDict return actual dictionaries + # via _dict_new. + ns['__new__'] = _typeddict_new if name == 'TypedDict' else _dict_new + tp_dict = super().__new__(cls, name, (dict,), ns) + + annotations = {} + own_annotations = ns.get('__annotations__', {}) + own_annotation_keys = set(own_annotations.keys()) + msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type" + own_annotations = { + n: typing._type_check(tp, msg) for n, tp in own_annotations.items() + } + required_keys = set() + optional_keys = set() + + for base in bases: + annotations.update(base.__dict__.get('__annotations__', {})) + required_keys.update(base.__dict__.get('__required_keys__', ())) + optional_keys.update(base.__dict__.get('__optional_keys__', ())) + + annotations.update(own_annotations) + if total: + required_keys.update(own_annotation_keys) + else: + optional_keys.update(own_annotation_keys) + + tp_dict.__annotations__ = annotations + tp_dict.__required_keys__ = frozenset(required_keys) + tp_dict.__optional_keys__ = frozenset(optional_keys) + if not hasattr(tp_dict, '__total__'): + tp_dict.__total__ = total + return tp_dict + + __instancecheck__ = __subclasscheck__ = _check_fails + + TypedDict = _TypedDictMeta('TypedDict', (dict,), {}) + TypedDict.__module__ = __name__ + TypedDict.__doc__ = \ + """A simple typed name space. At runtime it is equivalent to a plain dict. + + TypedDict creates a dictionary type that expects all of its + instances to have a certain set of keys, with each key + associated with a value of a consistent type. This expectation + is not checked at runtime but is only enforced by type checkers. + Usage:: + + class Point2D(TypedDict): + x: int + y: int + label: str + + a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK + b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check + + assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first') + + The type info can be accessed via the Point2D.__annotations__ dict, and + the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets. + TypedDict supports two additional equivalent forms:: + + Point2D = TypedDict('Point2D', x=int, y=int, label=str) + Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str}) + + The class syntax is only supported in Python 3.6+, while two other + syntax forms work for Python 2.7 and 3.2+ + """ + + +# Python 3.9+ has PEP 593 (Annotated and modified get_type_hints) +if hasattr(typing, 'Annotated'): + Annotated = typing.Annotated + get_type_hints = typing.get_type_hints + # Not exported and not a public API, but needed for get_origin() and get_args() + # to work. + _AnnotatedAlias = typing._AnnotatedAlias +# 3.7-3.8 +elif PEP_560: + class _AnnotatedAlias(typing._GenericAlias, _root=True): + """Runtime representation of an annotated type. + + At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't' + with extra annotations. The alias behaves like a normal typing alias, + instantiating is the same as instantiating the underlying type, binding + it to types is also the same. + """ + def __init__(self, origin, metadata): + if isinstance(origin, _AnnotatedAlias): + metadata = origin.__metadata__ + metadata + origin = origin.__origin__ + super().__init__(origin, origin) + self.__metadata__ = metadata + + def copy_with(self, params): + assert len(params) == 1 + new_type = params[0] + return _AnnotatedAlias(new_type, self.__metadata__) + + def __repr__(self): + return (f"typing_extensions.Annotated[{typing._type_repr(self.__origin__)}, " + f"{', '.join(repr(a) for a in self.__metadata__)}]") + + def __reduce__(self): + return operator.getitem, ( + Annotated, (self.__origin__,) + self.__metadata__ + ) + + def __eq__(self, other): + if not isinstance(other, _AnnotatedAlias): + return NotImplemented + if self.__origin__ != other.__origin__: + return False + return self.__metadata__ == other.__metadata__ + + def __hash__(self): + return hash((self.__origin__, self.__metadata__)) + + class Annotated: + """Add context specific metadata to a type. + + Example: Annotated[int, runtime_check.Unsigned] indicates to the + hypothetical runtime_check module that this type is an unsigned int. + Every other consumer of this type can ignore this metadata and treat + this type as int. + + The first argument to Annotated must be a valid type (and will be in + the __origin__ field), the remaining arguments are kept as a tuple in + the __extra__ field. + + Details: + + - It's an error to call `Annotated` with less than two arguments. + - Nested Annotated are flattened:: + + Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3] + + - Instantiating an annotated type is equivalent to instantiating the + underlying type:: + + Annotated[C, Ann1](5) == C(5) + + - Annotated can be used as a generic type alias:: + + Optimized = Annotated[T, runtime.Optimize()] + Optimized[int] == Annotated[int, runtime.Optimize()] + + OptimizedList = Annotated[List[T], runtime.Optimize()] + OptimizedList[int] == Annotated[List[int], runtime.Optimize()] + """ + + __slots__ = () + + def __new__(cls, *args, **kwargs): + raise TypeError("Type Annotated cannot be instantiated.") + + @typing._tp_cache + def __class_getitem__(cls, params): + if not isinstance(params, tuple) or len(params) < 2: + raise TypeError("Annotated[...] should be used " + "with at least two arguments (a type and an " + "annotation).") + msg = "Annotated[t, ...]: t must be a type." + origin = typing._type_check(params[0], msg) + metadata = tuple(params[1:]) + return _AnnotatedAlias(origin, metadata) + + def __init_subclass__(cls, *args, **kwargs): + raise TypeError( + f"Cannot subclass {cls.__module__}.Annotated" + ) + + def _strip_annotations(t): + """Strips the annotations from a given type. + """ + if isinstance(t, _AnnotatedAlias): + return _strip_annotations(t.__origin__) + if isinstance(t, typing._GenericAlias): + stripped_args = tuple(_strip_annotations(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + res = t.copy_with(stripped_args) + res._special = t._special + return res + return t + + def get_type_hints(obj, globalns=None, localns=None, include_extras=False): + """Return type hints for an object. + + This is often the same as obj.__annotations__, but it handles + forward references encoded as string literals, adds Optional[t] if a + default value equal to None is set and recursively replaces all + 'Annotated[T, ...]' with 'T' (unless 'include_extras=True'). + + The argument may be a module, class, method, or function. The annotations + are returned as a dictionary. For classes, annotations include also + inherited members. + + TypeError is raised if the argument is not of a type that can contain + annotations, and an empty dictionary is returned if no annotations are + present. + + BEWARE -- the behavior of globalns and localns is counterintuitive + (unless you are familiar with how eval() and exec() work). The + search order is locals first, then globals. + + - If no dict arguments are passed, an attempt is made to use the + globals from obj (or the respective module's globals for classes), + and these are also used as the locals. If the object does not appear + to have globals, an empty dictionary is used. + + - If one dict argument is passed, it is used for both globals and + locals. + + - If two dict arguments are passed, they specify globals and + locals, respectively. + """ + hint = typing.get_type_hints(obj, globalns=globalns, localns=localns) + if include_extras: + return hint + return {k: _strip_annotations(t) for k, t in hint.items()} +# 3.6 +else: + + def _is_dunder(name): + """Returns True if name is a __dunder_variable_name__.""" + return len(name) > 4 and name.startswith('__') and name.endswith('__') + + # Prior to Python 3.7 types did not have `copy_with`. A lot of the equality + # checks, argument expansion etc. are done on the _subs_tre. As a result we + # can't provide a get_type_hints function that strips out annotations. + + class AnnotatedMeta(typing.GenericMeta): + """Metaclass for Annotated""" + + def __new__(cls, name, bases, namespace, **kwargs): + if any(b is not object for b in bases): + raise TypeError("Cannot subclass " + str(Annotated)) + return super().__new__(cls, name, bases, namespace, **kwargs) + + @property + def __metadata__(self): + return self._subs_tree()[2] + + def _tree_repr(self, tree): + cls, origin, metadata = tree + if not isinstance(origin, tuple): + tp_repr = typing._type_repr(origin) + else: + tp_repr = origin[0]._tree_repr(origin) + metadata_reprs = ", ".join(repr(arg) for arg in metadata) + return f'{cls}[{tp_repr}, {metadata_reprs}]' + + def _subs_tree(self, tvars=None, args=None): # noqa + if self is Annotated: + return Annotated + res = super()._subs_tree(tvars=tvars, args=args) + # Flatten nested Annotated + if isinstance(res[1], tuple) and res[1][0] is Annotated: + sub_tp = res[1][1] + sub_annot = res[1][2] + return (Annotated, sub_tp, sub_annot + res[2]) + return res + + def _get_cons(self): + """Return the class used to create instance of this type.""" + if self.__origin__ is None: + raise TypeError("Cannot get the underlying type of a " + "non-specialized Annotated type.") + tree = self._subs_tree() + while isinstance(tree, tuple) and tree[0] is Annotated: + tree = tree[1] + if isinstance(tree, tuple): + return tree[0] + else: + return tree + + @typing._tp_cache + def __getitem__(self, params): + if not isinstance(params, tuple): + params = (params,) + if self.__origin__ is not None: # specializing an instantiated type + return super().__getitem__(params) + elif not isinstance(params, tuple) or len(params) < 2: + raise TypeError("Annotated[...] should be instantiated " + "with at least two arguments (a type and an " + "annotation).") + else: + msg = "Annotated[t, ...]: t must be a type." + tp = typing._type_check(params[0], msg) + metadata = tuple(params[1:]) + return self.__class__( + self.__name__, + self.__bases__, + _no_slots_copy(self.__dict__), + tvars=_type_vars((tp,)), + # Metadata is a tuple so it won't be touched by _replace_args et al. + args=(tp, metadata), + origin=self, + ) + + def __call__(self, *args, **kwargs): + cons = self._get_cons() + result = cons(*args, **kwargs) + try: + result.__orig_class__ = self + except AttributeError: + pass + return result + + def __getattr__(self, attr): + # For simplicity we just don't relay all dunder names + if self.__origin__ is not None and not _is_dunder(attr): + return getattr(self._get_cons(), attr) + raise AttributeError(attr) + + def __setattr__(self, attr, value): + if _is_dunder(attr) or attr.startswith('_abc_'): + super().__setattr__(attr, value) + elif self.__origin__ is None: + raise AttributeError(attr) + else: + setattr(self._get_cons(), attr, value) + + def __instancecheck__(self, obj): + raise TypeError("Annotated cannot be used with isinstance().") + + def __subclasscheck__(self, cls): + raise TypeError("Annotated cannot be used with issubclass().") + + class Annotated(metaclass=AnnotatedMeta): + """Add context specific metadata to a type. + + Example: Annotated[int, runtime_check.Unsigned] indicates to the + hypothetical runtime_check module that this type is an unsigned int. + Every other consumer of this type can ignore this metadata and treat + this type as int. + + The first argument to Annotated must be a valid type, the remaining + arguments are kept as a tuple in the __metadata__ field. + + Details: + + - It's an error to call `Annotated` with less than two arguments. + - Nested Annotated are flattened:: + + Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3] + + - Instantiating an annotated type is equivalent to instantiating the + underlying type:: + + Annotated[C, Ann1](5) == C(5) + + - Annotated can be used as a generic type alias:: + + Optimized = Annotated[T, runtime.Optimize()] + Optimized[int] == Annotated[int, runtime.Optimize()] + + OptimizedList = Annotated[List[T], runtime.Optimize()] + OptimizedList[int] == Annotated[List[int], runtime.Optimize()] + """ + +# Python 3.8 has get_origin() and get_args() but those implementations aren't +# Annotated-aware, so we can't use those. Python 3.9's versions don't support +# ParamSpecArgs and ParamSpecKwargs, so only Python 3.10's versions will do. +if sys.version_info[:2] >= (3, 10): + get_origin = typing.get_origin + get_args = typing.get_args +# 3.7-3.9 +elif PEP_560: + try: + # 3.9+ + from typing import _BaseGenericAlias + except ImportError: + _BaseGenericAlias = typing._GenericAlias + try: + # 3.9+ + from typing import GenericAlias + except ImportError: + GenericAlias = typing._GenericAlias + + def get_origin(tp): + """Get the unsubscripted version of a type. + + This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar + and Annotated. Return None for unsupported types. Examples:: + + get_origin(Literal[42]) is Literal + get_origin(int) is None + get_origin(ClassVar[int]) is ClassVar + get_origin(Generic) is Generic + get_origin(Generic[T]) is Generic + get_origin(Union[T, int]) is Union + get_origin(List[Tuple[T, T]][int]) == list + get_origin(P.args) is P + """ + if isinstance(tp, _AnnotatedAlias): + return Annotated + if isinstance(tp, (typing._GenericAlias, GenericAlias, _BaseGenericAlias, + ParamSpecArgs, ParamSpecKwargs)): + return tp.__origin__ + if tp is typing.Generic: + return typing.Generic + return None + + def get_args(tp): + """Get type arguments with all substitutions performed. + + For unions, basic simplifications used by Union constructor are performed. + Examples:: + get_args(Dict[str, int]) == (str, int) + get_args(int) == () + get_args(Union[int, Union[T, int], str][int]) == (int, str) + get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int]) + get_args(Callable[[], T][int]) == ([], int) + """ + if isinstance(tp, _AnnotatedAlias): + return (tp.__origin__,) + tp.__metadata__ + if isinstance(tp, (typing._GenericAlias, GenericAlias)): + if getattr(tp, "_special", False): + return () + res = tp.__args__ + if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis: + res = (list(res[:-1]), res[-1]) + return res + return () + + +# 3.10+ +if hasattr(typing, 'TypeAlias'): + TypeAlias = typing.TypeAlias +# 3.9 +elif sys.version_info[:2] >= (3, 9): + class _TypeAliasForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_TypeAliasForm + def TypeAlias(self, parameters): + """Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example above. + """ + raise TypeError(f"{self} is not subscriptable") +# 3.7-3.8 +elif sys.version_info[:2] >= (3, 7): + class _TypeAliasForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + TypeAlias = _TypeAliasForm('TypeAlias', + doc="""Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example + above.""") +# 3.6 +else: + class _TypeAliasMeta(typing.TypingMeta): + """Metaclass for TypeAlias""" + + def __repr__(self): + return 'typing_extensions.TypeAlias' + + class _TypeAliasBase(typing._FinalTypingBase, metaclass=_TypeAliasMeta, _root=True): + """Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example above. + """ + __slots__ = () + + def __instancecheck__(self, obj): + raise TypeError("TypeAlias cannot be used with isinstance().") + + def __subclasscheck__(self, cls): + raise TypeError("TypeAlias cannot be used with issubclass().") + + def __repr__(self): + return 'typing_extensions.TypeAlias' + + TypeAlias = _TypeAliasBase(_root=True) + + +# Python 3.10+ has PEP 612 +if hasattr(typing, 'ParamSpecArgs'): + ParamSpecArgs = typing.ParamSpecArgs + ParamSpecKwargs = typing.ParamSpecKwargs +# 3.6-3.9 +else: + class _Immutable: + """Mixin to indicate that object should not be copied.""" + __slots__ = () + + def __copy__(self): + return self + + def __deepcopy__(self, memo): + return self + + class ParamSpecArgs(_Immutable): + """The args for a ParamSpec object. + + Given a ParamSpec object P, P.args is an instance of ParamSpecArgs. + + ParamSpecArgs objects have a reference back to their ParamSpec: + + P.args.__origin__ is P + + This type is meant for runtime introspection and has no special meaning to + static type checkers. + """ + def __init__(self, origin): + self.__origin__ = origin + + def __repr__(self): + return f"{self.__origin__.__name__}.args" + + class ParamSpecKwargs(_Immutable): + """The kwargs for a ParamSpec object. + + Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs. + + ParamSpecKwargs objects have a reference back to their ParamSpec: + + P.kwargs.__origin__ is P + + This type is meant for runtime introspection and has no special meaning to + static type checkers. + """ + def __init__(self, origin): + self.__origin__ = origin + + def __repr__(self): + return f"{self.__origin__.__name__}.kwargs" + +# 3.10+ +if hasattr(typing, 'ParamSpec'): + ParamSpec = typing.ParamSpec +# 3.6-3.9 +else: + + # Inherits from list as a workaround for Callable checks in Python < 3.9.2. + class ParamSpec(list): + """Parameter specification variable. + + Usage:: + + P = ParamSpec('P') + + Parameter specification variables exist primarily for the benefit of static + type checkers. They are used to forward the parameter types of one + callable to another callable, a pattern commonly found in higher order + functions and decorators. They are only valid when used in ``Concatenate``, + or s the first argument to ``Callable``. In Python 3.10 and higher, + they are also supported in user-defined Generics at runtime. + See class Generic for more information on generic types. An + example for annotating a decorator:: + + T = TypeVar('T') + P = ParamSpec('P') + + def add_logging(f: Callable[P, T]) -> Callable[P, T]: + '''A type-safe decorator to add logging to a function.''' + def inner(*args: P.args, **kwargs: P.kwargs) -> T: + logging.info(f'{f.__name__} was called') + return f(*args, **kwargs) + return inner + + @add_logging + def add_two(x: float, y: float) -> float: + '''Add two numbers together.''' + return x + y + + Parameter specification variables defined with covariant=True or + contravariant=True can be used to declare covariant or contravariant + generic types. These keyword arguments are valid, but their actual semantics + are yet to be decided. See PEP 612 for details. + + Parameter specification variables can be introspected. e.g.: + + P.__name__ == 'T' + P.__bound__ == None + P.__covariant__ == False + P.__contravariant__ == False + + Note that only parameter specification variables defined in global scope can + be pickled. + """ + + # Trick Generic __parameters__. + __class__ = typing.TypeVar + + @property + def args(self): + return ParamSpecArgs(self) + + @property + def kwargs(self): + return ParamSpecKwargs(self) + + def __init__(self, name, *, bound=None, covariant=False, contravariant=False): + super().__init__([self]) + self.__name__ = name + self.__covariant__ = bool(covariant) + self.__contravariant__ = bool(contravariant) + if bound: + self.__bound__ = typing._type_check(bound, 'Bound must be a type.') + else: + self.__bound__ = None + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + def __repr__(self): + if self.__covariant__: + prefix = '+' + elif self.__contravariant__: + prefix = '-' + else: + prefix = '~' + return prefix + self.__name__ + + def __hash__(self): + return object.__hash__(self) + + def __eq__(self, other): + return self is other + + def __reduce__(self): + return self.__name__ + + # Hack to get typing._type_check to pass. + def __call__(self, *args, **kwargs): + pass + + if not PEP_560: + # Only needed in 3.6. + def _get_type_vars(self, tvars): + if self not in tvars: + tvars.append(self) + + +# 3.6-3.9 +if not hasattr(typing, 'Concatenate'): + # Inherits from list as a workaround for Callable checks in Python < 3.9.2. + class _ConcatenateGenericAlias(list): + + # Trick Generic into looking into this for __parameters__. + if PEP_560: + __class__ = typing._GenericAlias + else: + __class__ = typing._TypingBase + + # Flag in 3.8. + _special = False + # Attribute in 3.6 and earlier. + _gorg = typing.Generic + + def __init__(self, origin, args): + super().__init__(args) + self.__origin__ = origin + self.__args__ = args + + def __repr__(self): + _type_repr = typing._type_repr + return (f'{_type_repr(self.__origin__)}' + f'[{", ".join(_type_repr(arg) for arg in self.__args__)}]') + + def __hash__(self): + return hash((self.__origin__, self.__args__)) + + # Hack to get typing._type_check to pass in Generic. + def __call__(self, *args, **kwargs): + pass + + @property + def __parameters__(self): + return tuple( + tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec)) + ) + + if not PEP_560: + # Only required in 3.6. + def _get_type_vars(self, tvars): + if self.__origin__ and self.__parameters__: + typing._get_type_vars(self.__parameters__, tvars) + + +# 3.6-3.9 +@typing._tp_cache +def _concatenate_getitem(self, parameters): + if parameters == (): + raise TypeError("Cannot take a Concatenate of no types.") + if not isinstance(parameters, tuple): + parameters = (parameters,) + if not isinstance(parameters[-1], ParamSpec): + raise TypeError("The last parameter to Concatenate should be a " + "ParamSpec variable.") + msg = "Concatenate[arg, ...]: each arg must be a type." + parameters = tuple(typing._type_check(p, msg) for p in parameters) + return _ConcatenateGenericAlias(self, parameters) + + +# 3.10+ +if hasattr(typing, 'Concatenate'): + Concatenate = typing.Concatenate + _ConcatenateGenericAlias = typing._ConcatenateGenericAlias # noqa +# 3.9 +elif sys.version_info[:2] >= (3, 9): + @_TypeAliasForm + def Concatenate(self, parameters): + """Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """ + return _concatenate_getitem(self, parameters) +# 3.7-8 +elif sys.version_info[:2] >= (3, 7): + class _ConcatenateForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + return _concatenate_getitem(self, parameters) + + Concatenate = _ConcatenateForm( + 'Concatenate', + doc="""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """) +# 3.6 +else: + class _ConcatenateAliasMeta(typing.TypingMeta): + """Metaclass for Concatenate.""" + + def __repr__(self): + return 'typing_extensions.Concatenate' + + class _ConcatenateAliasBase(typing._FinalTypingBase, + metaclass=_ConcatenateAliasMeta, + _root=True): + """Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """ + __slots__ = () + + def __instancecheck__(self, obj): + raise TypeError("Concatenate cannot be used with isinstance().") + + def __subclasscheck__(self, cls): + raise TypeError("Concatenate cannot be used with issubclass().") + + def __repr__(self): + return 'typing_extensions.Concatenate' + + def __getitem__(self, parameters): + return _concatenate_getitem(self, parameters) + + Concatenate = _ConcatenateAliasBase(_root=True) + +# 3.10+ +if hasattr(typing, 'TypeGuard'): + TypeGuard = typing.TypeGuard +# 3.9 +elif sys.version_info[:2] >= (3, 9): + class _TypeGuardForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_TypeGuardForm + def TypeGuard(self, parameters): + """Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """ + item = typing._type_check(parameters, f'{self} accepts only single type.') + return typing._GenericAlias(self, (item,)) +# 3.7-3.8 +elif sys.version_info[:2] >= (3, 7): + class _TypeGuardForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type') + return typing._GenericAlias(self, (item,)) + + TypeGuard = _TypeGuardForm( + 'TypeGuard', + doc="""Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """) +# 3.6 +else: + class _TypeGuard(typing._FinalTypingBase, _root=True): + """Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """ + + __slots__ = ('__type__',) + + def __init__(self, tp=None, **kwds): + self.__type__ = tp + + def __getitem__(self, item): + cls = type(self) + if self.__type__ is None: + return cls(typing._type_check(item, + f'{cls.__name__[1:]} accepts only a single type.'), + _root=True) + raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted') + + def _eval_type(self, globalns, localns): + new_tp = typing._eval_type(self.__type__, globalns, localns) + if new_tp == self.__type__: + return self + return type(self)(new_tp, _root=True) + + def __repr__(self): + r = super().__repr__() + if self.__type__ is not None: + r += f'[{typing._type_repr(self.__type__)}]' + return r + + def __hash__(self): + return hash((type(self).__name__, self.__type__)) + + def __eq__(self, other): + if not isinstance(other, _TypeGuard): + return NotImplemented + if self.__type__ is not None: + return self.__type__ == other.__type__ + return self is other + + TypeGuard = _TypeGuard(_root=True) + +if hasattr(typing, "Self"): + Self = typing.Self +elif sys.version_info[:2] >= (3, 7): + # Vendored from cpython typing._SpecialFrom + class _SpecialForm(typing._Final, _root=True): + __slots__ = ('_name', '__doc__', '_getitem') + + def __init__(self, getitem): + self._getitem = getitem + self._name = getitem.__name__ + self.__doc__ = getitem.__doc__ + + def __getattr__(self, item): + if item in {'__name__', '__qualname__'}: + return self._name + + raise AttributeError(item) + + def __mro_entries__(self, bases): + raise TypeError(f"Cannot subclass {self!r}") + + def __repr__(self): + return f'typing_extensions.{self._name}' + + def __reduce__(self): + return self._name + + def __call__(self, *args, **kwds): + raise TypeError(f"Cannot instantiate {self!r}") + + def __or__(self, other): + return typing.Union[self, other] + + def __ror__(self, other): + return typing.Union[other, self] + + def __instancecheck__(self, obj): + raise TypeError(f"{self} cannot be used with isinstance()") + + def __subclasscheck__(self, cls): + raise TypeError(f"{self} cannot be used with issubclass()") + + @typing._tp_cache + def __getitem__(self, parameters): + return self._getitem(self, parameters) + + @_SpecialForm + def Self(self, params): + """Used to spell the type of "self" in classes. + + Example:: + + from typing import Self + + class ReturnsSelf: + def parse(self, data: bytes) -> Self: + ... + return self + + """ + + raise TypeError(f"{self} is not subscriptable") +else: + class _Self(typing._FinalTypingBase, _root=True): + """Used to spell the type of "self" in classes. + + Example:: + + from typing import Self + + class ReturnsSelf: + def parse(self, data: bytes) -> Self: + ... + return self + + """ + + __slots__ = () + + def __instancecheck__(self, obj): + raise TypeError(f"{self} cannot be used with isinstance().") + + def __subclasscheck__(self, cls): + raise TypeError(f"{self} cannot be used with issubclass().") + + Self = _Self(_root=True) + + +if hasattr(typing, 'Required'): + Required = typing.Required + NotRequired = typing.NotRequired +elif sys.version_info[:2] >= (3, 9): + class _ExtensionsSpecialForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_ExtensionsSpecialForm + def Required(self, parameters): + """A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """ + item = typing._type_check(parameters, f'{self._name} accepts only single type') + return typing._GenericAlias(self, (item,)) + + @_ExtensionsSpecialForm + def NotRequired(self, parameters): + """A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """ + item = typing._type_check(parameters, f'{self._name} accepts only single type') + return typing._GenericAlias(self, (item,)) + +elif sys.version_info[:2] >= (3, 7): + class _RequiredForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + '{} accepts only single type'.format(self._name)) + return typing._GenericAlias(self, (item,)) + + Required = _RequiredForm( + 'Required', + doc="""A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """) + NotRequired = _RequiredForm( + 'NotRequired', + doc="""A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """) +else: + # NOTE: Modeled after _Final's implementation when _FinalTypingBase available + class _MaybeRequired(typing._FinalTypingBase, _root=True): + __slots__ = ('__type__',) + + def __init__(self, tp=None, **kwds): + self.__type__ = tp + + def __getitem__(self, item): + cls = type(self) + if self.__type__ is None: + return cls(typing._type_check(item, + '{} accepts only single type.'.format(cls.__name__[1:])), + _root=True) + raise TypeError('{} cannot be further subscripted' + .format(cls.__name__[1:])) + + def _eval_type(self, globalns, localns): + new_tp = typing._eval_type(self.__type__, globalns, localns) + if new_tp == self.__type__: + return self + return type(self)(new_tp, _root=True) + + def __repr__(self): + r = super().__repr__() + if self.__type__ is not None: + r += '[{}]'.format(typing._type_repr(self.__type__)) + return r + + def __hash__(self): + return hash((type(self).__name__, self.__type__)) + + def __eq__(self, other): + if not isinstance(other, type(self)): + return NotImplemented + if self.__type__ is not None: + return self.__type__ == other.__type__ + return self is other + + class _Required(_MaybeRequired, _root=True): + """A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """ + + class _NotRequired(_MaybeRequired, _root=True): + """A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """ + + Required = _Required(_root=True) + NotRequired = _NotRequired(_root=True) diff --git a/libs/common/setuptools/_vendor/zipp.py b/libs/common/setuptools/_vendor/zipp.py new file mode 100644 index 00000000..26b723c1 --- /dev/null +++ b/libs/common/setuptools/_vendor/zipp.py @@ -0,0 +1,329 @@ +import io +import posixpath +import zipfile +import itertools +import contextlib +import sys +import pathlib + +if sys.version_info < (3, 7): + from collections import OrderedDict +else: + OrderedDict = dict + + +__all__ = ['Path'] + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + +_dedupe = OrderedDict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +class CompleteDirs(zipfile.ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) + + def namelist(self): + names = super(CompleteDirs, self).namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, zipfile.ZipFile): + return cls(_pathlib_compat(source)) + + # Only allow for FastLookup when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + source.__class__ = cls + return source + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super(FastLookup, self).namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super(FastLookup, self)._name_set() + return self.__lookup + + +def _pathlib_compat(path): + """ + For path-like objects, convert to a filename for compatibility + on Python 3.6.1 and earlier. + """ + try: + return path.__fspath__() + except AttributeError: + return str(path) + + +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = zipfile.ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'mem/abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('mem/abcde.zip', 'a.txt') + >>> b + Path('mem/abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('mem/abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text() + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coercion to string: + + >>> import os + >>> str(c).replace(os.sep, posixpath.sep) + 'mem/abcde.zip/b/c.txt' + + At the root, ``name``, ``filename``, and ``parent`` + resolve to the zipfile. Note these attributes are not + valid and will raise a ``ValueError`` if the zipfile + has no filename. + + >>> root.name + 'abcde.zip' + >>> str(root.filename).replace(os.sep, posixpath.sep) + 'mem/abcde.zip' + >>> str(root.parent) + 'mem' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + """ + Construct a Path from a ZipFile or filename. + + Note: When the source is an existing ZipFile object, + its type (__class__) will be mutated to a + specialized type. If the caller wishes to retain the + original type, the caller should either create a + separate ZipFile object or pass a filename. + """ + self.root = FastLookup.make(root) + self.at = at + + def open(self, mode='r', *args, pwd=None, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + if self.is_dir(): + raise IsADirectoryError(self) + zip_mode = mode[0] + if not self.exists() and zip_mode == 'r': + raise FileNotFoundError(self) + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + return io.TextIOWrapper(stream, *args, **kwargs) + + @property + def name(self): + return pathlib.Path(self.at).name or self.filename.name + + @property + def suffix(self): + return pathlib.Path(self.at).suffix or self.filename.suffix + + @property + def suffixes(self): + return pathlib.Path(self.at).suffixes or self.filename.suffixes + + @property + def stem(self): + return pathlib.Path(self.at).stem or self.filename.stem + + @property + def filename(self): + return pathlib.Path(self.root.filename).joinpath(self.at) + + def read_text(self, *args, **kwargs): + with self.open('r', *args, **kwargs) as strm: + return strm.read() + + def read_bytes(self): + with self.open('rb') as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return self.__class__(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return self.exists() and not self.is_dir() + + def exists(self): + return self.at in self.root._name_set() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self.root.namelist()) + return filter(self._is_child, subs) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def joinpath(self, *other): + next = posixpath.join(self.at, *map(_pathlib_compat, other)) + return self._next(self.root.resolve_dir(next)) + + __truediv__ = joinpath + + @property + def parent(self): + if not self.at: + return self.filename.parent + parent_at = posixpath.dirname(self.at.rstrip('/')) + if parent_at: + parent_at += '/' + return self._next(parent_at) diff --git a/libs/common/setuptools/archive_util.py b/libs/common/setuptools/archive_util.py index 81436044..d8e10c13 100644 --- a/libs/common/setuptools/archive_util.py +++ b/libs/common/setuptools/archive_util.py @@ -8,7 +8,7 @@ import posixpath import contextlib from distutils.errors import DistutilsError -from pkg_resources import ensure_directory +from ._path import ensure_directory __all__ = [ "unpack_archive", "unpack_zipfile", "unpack_tarfile", "default_filter", @@ -25,7 +25,8 @@ def default_filter(src, dst): return dst -def unpack_archive(filename, extract_dir, progress_filter=default_filter, +def unpack_archive( + filename, extract_dir, progress_filter=default_filter, drivers=None): """Unpack `filename` to `extract_dir`, or raise ``UnrecognizedFormat`` @@ -99,29 +100,87 @@ def unpack_zipfile(filename, extract_dir, progress_filter=default_filter): raise UnrecognizedFormat("%s is not a zip file" % (filename,)) with zipfile.ZipFile(filename) as z: - for info in z.infolist(): - name = info.filename + _unpack_zipfile_obj(z, extract_dir, progress_filter) + +def _unpack_zipfile_obj(zipfile_obj, extract_dir, progress_filter=default_filter): + """Internal/private API used by other parts of setuptools. + Similar to ``unpack_zipfile``, but receives an already opened :obj:`zipfile.ZipFile` + object instead of a filename. + """ + for info in zipfile_obj.infolist(): + name = info.filename + + # don't extract absolute paths or ones with .. in them + if name.startswith('/') or '..' in name.split('/'): + continue + + target = os.path.join(extract_dir, *name.split('/')) + target = progress_filter(name, target) + if not target: + continue + if name.endswith('/'): + # directory + ensure_directory(target) + else: + # file + ensure_directory(target) + data = zipfile_obj.read(info.filename) + with open(target, 'wb') as f: + f.write(data) + unix_attributes = info.external_attr >> 16 + if unix_attributes: + os.chmod(target, unix_attributes) + + +def _resolve_tar_file_or_dir(tar_obj, tar_member_obj): + """Resolve any links and extract link targets as normal files.""" + while tar_member_obj is not None and ( + tar_member_obj.islnk() or tar_member_obj.issym()): + linkpath = tar_member_obj.linkname + if tar_member_obj.issym(): + base = posixpath.dirname(tar_member_obj.name) + linkpath = posixpath.join(base, linkpath) + linkpath = posixpath.normpath(linkpath) + tar_member_obj = tar_obj._getmember(linkpath) + + is_file_or_dir = ( + tar_member_obj is not None and + (tar_member_obj.isfile() or tar_member_obj.isdir()) + ) + if is_file_or_dir: + return tar_member_obj + + raise LookupError('Got unknown file type') + + +def _iter_open_tar(tar_obj, extract_dir, progress_filter): + """Emit member-destination pairs from a tar archive.""" + # don't do any chowning! + tar_obj.chown = lambda *args: None + + with contextlib.closing(tar_obj): + for member in tar_obj: + name = member.name # don't extract absolute paths or ones with .. in them if name.startswith('/') or '..' in name.split('/'): continue - target = os.path.join(extract_dir, *name.split('/')) - target = progress_filter(name, target) - if not target: + prelim_dst = os.path.join(extract_dir, *name.split('/')) + + try: + member = _resolve_tar_file_or_dir(tar_obj, member) + except LookupError: continue - if name.endswith('/'): - # directory - ensure_directory(target) - else: - # file - ensure_directory(target) - data = z.read(info.filename) - with open(target, 'wb') as f: - f.write(data) - unix_attributes = info.external_attr >> 16 - if unix_attributes: - os.chmod(target, unix_attributes) + + final_dst = progress_filter(name, prelim_dst) + if not final_dst: + continue + + if final_dst.endswith(os.sep): + final_dst = final_dst[:-1] + + yield member, final_dst def unpack_tarfile(filename, extract_dir, progress_filter=default_filter): @@ -133,41 +192,22 @@ def unpack_tarfile(filename, extract_dir, progress_filter=default_filter): """ try: tarobj = tarfile.open(filename) - except tarfile.TarError: + except tarfile.TarError as e: raise UnrecognizedFormat( "%s is not a compressed or uncompressed tar file" % (filename,) - ) - with contextlib.closing(tarobj): - # don't do any chowning! - tarobj.chown = lambda *args: None - for member in tarobj: - name = member.name - # don't extract absolute paths or ones with .. in them - if not name.startswith('/') and '..' not in name.split('/'): - prelim_dst = os.path.join(extract_dir, *name.split('/')) + ) from e - # resolve any links and to extract the link targets as normal - # files - while member is not None and (member.islnk() or member.issym()): - linkpath = member.linkname - if member.issym(): - base = posixpath.dirname(member.name) - linkpath = posixpath.join(base, linkpath) - linkpath = posixpath.normpath(linkpath) - member = tarobj._getmember(linkpath) + for member, final_dst in _iter_open_tar( + tarobj, extract_dir, progress_filter, + ): + try: + # XXX Ugh + tarobj._extract_member(member, final_dst) + except tarfile.ExtractError: + # chown/chmod/mkfifo/mknode/makedev failed + pass - if member is not None and (member.isfile() or member.isdir()): - final_dst = progress_filter(name, prelim_dst) - if final_dst: - if final_dst.endswith(os.sep): - final_dst = final_dst[:-1] - try: - # XXX Ugh - tarobj._extract_member(member, final_dst) - except tarfile.ExtractError: - # chown/chmod/mkfifo/mknode/makedev failed - pass - return True + return True extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile diff --git a/libs/common/setuptools/build_meta.py b/libs/common/setuptools/build_meta.py index eb9e815e..1fb4c3b1 100644 --- a/libs/common/setuptools/build_meta.py +++ b/libs/common/setuptools/build_meta.py @@ -28,26 +28,40 @@ Again, this is not a formal definition! Just a "taste" of the module. import io import os +import shlex import sys import tokenize import shutil import contextlib +import tempfile +import warnings +from pathlib import Path +from typing import Dict, Iterator, List, Optional, Union import setuptools import distutils -from setuptools.py31compat import TemporaryDirectory +from . import errors +from ._path import same_path +from ._reqs import parse_strings +from ._deprecation_warning import SetuptoolsDeprecationWarning +from distutils.util import strtobool -from pkg_resources import parse_requirements -from pkg_resources.py31compat import makedirs __all__ = ['get_requires_for_build_sdist', 'get_requires_for_build_wheel', 'prepare_metadata_for_build_wheel', 'build_wheel', 'build_sdist', + 'get_requires_for_build_editable', + 'prepare_metadata_for_build_editable', + 'build_editable', '__legacy__', 'SetupRequirementsError'] +SETUPTOOLS_ENABLE_FEATURES = os.getenv("SETUPTOOLS_ENABLE_FEATURES", "").lower() +LEGACY_EDITABLE = "legacy-editable" in SETUPTOOLS_ENABLE_FEATURES.replace("_", "-") + + class SetupRequirementsError(BaseException): def __init__(self, specifiers): self.specifiers = specifiers @@ -55,7 +69,7 @@ class SetupRequirementsError(BaseException): class Distribution(setuptools.dist.Distribution): def fetch_build_eggs(self, specifiers): - specifier_list = list(map(str, parse_requirements(specifiers))) + specifier_list = list(parse_strings(specifiers)) raise SetupRequirementsError(specifier_list) @@ -75,17 +89,20 @@ class Distribution(setuptools.dist.Distribution): distutils.core.Distribution = orig -def _to_str(s): +@contextlib.contextmanager +def no_install_setup_requires(): + """Temporarily disable installing setup_requires + + Under PEP 517, the backend reports build dependencies to the frontend, + and the frontend is responsible for ensuring they're installed. + So setuptools (acting as a backend) should not try to install them. """ - Convert a filename to a string (on Python 2, explicitly - a byte string, not Unicode) as distutils checks for the - exact type str. - """ - if sys.version_info[0] == 2 and not isinstance(s, str): - # Assume it's Unicode, as that's what the PEP says - # should be provided. - return s.encode(sys.getfilesystemencoding()) - return s + orig = setuptools._install_setup_requires + setuptools._install_setup_requires = lambda attrs: None + try: + yield + finally: + setuptools._install_setup_requires = orig def _get_immediate_subdirectories(a_dir): @@ -98,7 +115,12 @@ def _file_with_extension(directory, extension): f for f in os.listdir(directory) if f.endswith(extension) ) - file, = matching + try: + file, = matching + except ValueError: + raise ValueError( + 'No distribution was found. Ensure that `setup.py` ' + 'is not empty and that it calls `setup()`.') return file @@ -110,18 +132,189 @@ def _open_setup_script(setup_script): return getattr(tokenize, 'open', open)(setup_script) -class _BuildMetaBackend(object): +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'setup.py install is deprecated') + yield - def _fix_config(self, config_settings): - config_settings = config_settings or {} - config_settings.setdefault('--global-option', []) - return config_settings +_ConfigSettings = Optional[Dict[str, Union[str, List[str], None]]] +""" +Currently the user can run:: + + pip install -e . --config-settings key=value + python -m build -C--key=value -C key=value + +- pip will pass both key and value as strings and overwriting repeated keys + (pypa/pip#11059). +- build will accumulate values associated with repeated keys in a list. + It will also accept keys with no associated value. + This means that an option passed by build can be ``str | list[str] | None``. +- PEP 517 specifies that ``config_settings`` is an optional dict. +""" + + +class _ConfigSettingsTranslator: + """Translate ``config_settings`` into distutils-style command arguments. + Only a limited number of options is currently supported. + """ + # See pypa/setuptools#1928 pypa/setuptools#2491 + + def _get_config(self, key: str, config_settings: _ConfigSettings) -> List[str]: + """ + Get the value of a specific key in ``config_settings`` as a list of strings. + + >>> fn = _ConfigSettingsTranslator()._get_config + >>> fn("--global-option", None) + [] + >>> fn("--global-option", {}) + [] + >>> fn("--global-option", {'--global-option': 'foo'}) + ['foo'] + >>> fn("--global-option", {'--global-option': ['foo']}) + ['foo'] + >>> fn("--global-option", {'--global-option': 'foo'}) + ['foo'] + >>> fn("--global-option", {'--global-option': 'foo bar'}) + ['foo', 'bar'] + """ + cfg = config_settings or {} + opts = cfg.get(key) or [] + return shlex.split(opts) if isinstance(opts, str) else opts + + def _valid_global_options(self): + """Global options accepted by setuptools (e.g. quiet or verbose).""" + options = (opt[:2] for opt in setuptools.dist.Distribution.global_options) + return {flag for long_and_short in options for flag in long_and_short if flag} + + def _global_args(self, config_settings: _ConfigSettings) -> Iterator[str]: + """ + Let the user specify ``verbose`` or ``quiet`` + escape hatch via + ``--global-option``. + Note: ``-v``, ``-vv``, ``-vvv`` have similar effects in setuptools, + so we just have to cover the basic scenario ``-v``. + + >>> fn = _ConfigSettingsTranslator()._global_args + >>> list(fn(None)) + [] + >>> list(fn({"verbose": "False"})) + ['-q'] + >>> list(fn({"verbose": "1"})) + ['-v'] + >>> list(fn({"--verbose": None})) + ['-v'] + >>> list(fn({"verbose": "true", "--global-option": "-q --no-user-cfg"})) + ['-v', '-q', '--no-user-cfg'] + >>> list(fn({"--quiet": None})) + ['-q'] + """ + cfg = config_settings or {} + falsey = {"false", "no", "0", "off"} + if "verbose" in cfg or "--verbose" in cfg: + level = str(cfg.get("verbose") or cfg.get("--verbose") or "1") + yield ("-q" if level.lower() in falsey else "-v") + if "quiet" in cfg or "--quiet" in cfg: + level = str(cfg.get("quiet") or cfg.get("--quiet") or "1") + yield ("-v" if level.lower() in falsey else "-q") + + valid = self._valid_global_options() + args = self._get_config("--global-option", config_settings) + yield from (arg for arg in args if arg.strip("-") in valid) + + def __dist_info_args(self, config_settings: _ConfigSettings) -> Iterator[str]: + """ + The ``dist_info`` command accepts ``tag-date`` and ``tag-build``. + + .. warning:: + We cannot use this yet as it requires the ``sdist`` and ``bdist_wheel`` + commands run in ``build_sdist`` and ``build_wheel`` to re-use the egg-info + directory created in ``prepare_metadata_for_build_wheel``. + + >>> fn = _ConfigSettingsTranslator()._ConfigSettingsTranslator__dist_info_args + >>> list(fn(None)) + [] + >>> list(fn({"tag-date": "False"})) + ['--no-date'] + >>> list(fn({"tag-date": None})) + ['--no-date'] + >>> list(fn({"tag-date": "true", "tag-build": ".a"})) + ['--tag-date', '--tag-build', '.a'] + """ + cfg = config_settings or {} + if "tag-date" in cfg: + val = strtobool(str(cfg["tag-date"] or "false")) + yield ("--tag-date" if val else "--no-date") + if "tag-build" in cfg: + yield from ["--tag-build", str(cfg["tag-build"])] + + def _editable_args(self, config_settings: _ConfigSettings) -> Iterator[str]: + """ + The ``editable_wheel`` command accepts ``editable-mode=strict``. + + >>> fn = _ConfigSettingsTranslator()._editable_args + >>> list(fn(None)) + [] + >>> list(fn({"editable-mode": "strict"})) + ['--mode', 'strict'] + """ + cfg = config_settings or {} + mode = cfg.get("editable-mode") or cfg.get("editable_mode") + if not mode: + return + yield from ["--mode", str(mode)] + + def _arbitrary_args(self, config_settings: _ConfigSettings) -> Iterator[str]: + """ + Users may expect to pass arbitrary lists of arguments to a command + via "--global-option" (example provided in PEP 517 of a "escape hatch"). + + >>> fn = _ConfigSettingsTranslator()._arbitrary_args + >>> list(fn(None)) + [] + >>> list(fn({})) + [] + >>> list(fn({'--build-option': 'foo'})) + ['foo'] + >>> list(fn({'--build-option': ['foo']})) + ['foo'] + >>> list(fn({'--build-option': 'foo'})) + ['foo'] + >>> list(fn({'--build-option': 'foo bar'})) + ['foo', 'bar'] + >>> warnings.simplefilter('error', SetuptoolsDeprecationWarning) + >>> list(fn({'--global-option': 'foo'})) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + SetuptoolsDeprecationWarning: ...arguments given via `--global-option`... + """ + args = self._get_config("--global-option", config_settings) + global_opts = self._valid_global_options() + bad_args = [] + + for arg in args: + if arg.strip("-") not in global_opts: + bad_args.append(arg) + yield arg + + yield from self._get_config("--build-option", config_settings) + + if bad_args: + msg = f""" + The arguments {bad_args!r} were given via `--global-option`. + Please use `--build-option` instead, + `--global-option` is reserved to flags like `--verbose` or `--quiet`. + """ + warnings.warn(msg, SetuptoolsDeprecationWarning) + + +class _BuildMetaBackend(_ConfigSettingsTranslator): def _get_build_requires(self, config_settings, requirements): - config_settings = self._fix_config(config_settings) - - sys.argv = sys.argv[:1] + ['egg_info'] + \ - config_settings["--global-option"] + sys.argv = [ + *sys.argv[:1], + *self._global_args(config_settings), + "egg_info", + *self._arbitrary_args(config_settings), + ] try: with Distribution.patch(): self.run_setup() @@ -139,61 +332,73 @@ class _BuildMetaBackend(object): with _open_setup_script(__file__) as f: code = f.read().replace(r'\r\n', r'\n') - exec(compile(code, __file__, 'exec'), locals()) + exec(code, locals()) def get_requires_for_build_wheel(self, config_settings=None): - config_settings = self._fix_config(config_settings) return self._get_build_requires(config_settings, requirements=['wheel']) def get_requires_for_build_sdist(self, config_settings=None): - config_settings = self._fix_config(config_settings) return self._get_build_requires(config_settings, requirements=[]) + def _bubble_up_info_directory(self, metadata_directory: str, suffix: str) -> str: + """ + PEP 517 requires that the .dist-info directory be placed in the + metadata_directory. To comply, we MUST copy the directory to the root. + + Returns the basename of the info directory, e.g. `proj-0.0.0.dist-info`. + """ + info_dir = self._find_info_directory(metadata_directory, suffix) + if not same_path(info_dir.parent, metadata_directory): + shutil.move(str(info_dir), metadata_directory) + # PEP 517 allow other files and dirs to exist in metadata_directory + return info_dir.name + + def _find_info_directory(self, metadata_directory: str, suffix: str) -> Path: + for parent, dirs, _ in os.walk(metadata_directory): + candidates = [f for f in dirs if f.endswith(suffix)] + + if len(candidates) != 0 or len(dirs) != 1: + assert len(candidates) == 1, f"Multiple {suffix} directories found" + return Path(parent, candidates[0]) + + msg = f"No {suffix} directory found in {metadata_directory}" + raise errors.InternalError(msg) + def prepare_metadata_for_build_wheel(self, metadata_directory, config_settings=None): - sys.argv = sys.argv[:1] + ['dist_info', '--egg-base', - _to_str(metadata_directory)] - self.run_setup() + sys.argv = [ + *sys.argv[:1], + *self._global_args(config_settings), + "dist_info", + "--output-dir", metadata_directory, + "--keep-egg-info", + ] + with no_install_setup_requires(): + self.run_setup() - dist_info_directory = metadata_directory - while True: - dist_infos = [f for f in os.listdir(dist_info_directory) - if f.endswith('.dist-info')] - - if (len(dist_infos) == 0 and - len(_get_immediate_subdirectories(dist_info_directory)) == 1): - - dist_info_directory = os.path.join( - dist_info_directory, os.listdir(dist_info_directory)[0]) - continue - - assert len(dist_infos) == 1 - break - - # PEP 517 requires that the .dist-info directory be placed in the - # metadata_directory. To comply, we MUST copy the directory to the root - if dist_info_directory != metadata_directory: - shutil.move( - os.path.join(dist_info_directory, dist_infos[0]), - metadata_directory) - shutil.rmtree(dist_info_directory, ignore_errors=True) - - return dist_infos[0] + self._bubble_up_info_directory(metadata_directory, ".egg-info") + return self._bubble_up_info_directory(metadata_directory, ".dist-info") def _build_with_temp_dir(self, setup_command, result_extension, result_directory, config_settings): - config_settings = self._fix_config(config_settings) result_directory = os.path.abspath(result_directory) # Build in a temporary directory, then copy to the target. - makedirs(result_directory, exist_ok=True) - with TemporaryDirectory(dir=result_directory) as tmp_dist_dir: - sys.argv = (sys.argv[:1] + setup_command + - ['--dist-dir', tmp_dist_dir] + - config_settings["--global-option"]) - self.run_setup() + os.makedirs(result_directory, exist_ok=True) + temp_opts = {"prefix": ".tmp-", "dir": result_directory} + with tempfile.TemporaryDirectory(**temp_opts) as tmp_dist_dir: + sys.argv = [ + *sys.argv[:1], + *self._global_args(config_settings), + *setup_command, + "--dist-dir", tmp_dist_dir, + *self._arbitrary_args(config_settings), + ] + with no_install_setup_requires(): + self.run_setup() - result_basename = _file_with_extension(tmp_dist_dir, result_extension) + result_basename = _file_with_extension( + tmp_dist_dir, result_extension) result_path = os.path.join(result_directory, result_basename) if os.path.exists(result_path): # os.rename will fail overwriting on non-Unix. @@ -202,24 +407,61 @@ class _BuildMetaBackend(object): return result_basename - def build_wheel(self, wheel_directory, config_settings=None, metadata_directory=None): - return self._build_with_temp_dir(['bdist_wheel'], '.whl', - wheel_directory, config_settings) + with suppress_known_deprecation(): + return self._build_with_temp_dir(['bdist_wheel'], '.whl', + wheel_directory, config_settings) def build_sdist(self, sdist_directory, config_settings=None): return self._build_with_temp_dir(['sdist', '--formats', 'gztar'], '.tar.gz', sdist_directory, config_settings) + def _get_dist_info_dir(self, metadata_directory: Optional[str]) -> Optional[str]: + if not metadata_directory: + return None + dist_info_candidates = list(Path(metadata_directory).glob("*.dist-info")) + assert len(dist_info_candidates) <= 1 + return str(dist_info_candidates[0]) if dist_info_candidates else None + + if not LEGACY_EDITABLE: + + # PEP660 hooks: + # build_editable + # get_requires_for_build_editable + # prepare_metadata_for_build_editable + def build_editable( + self, wheel_directory, config_settings=None, metadata_directory=None + ): + # XXX can or should we hide our editable_wheel command normally? + info_dir = self._get_dist_info_dir(metadata_directory) + opts = ["--dist-info-dir", info_dir] if info_dir else [] + cmd = ["editable_wheel", *opts, *self._editable_args(config_settings)] + with suppress_known_deprecation(): + return self._build_with_temp_dir( + cmd, ".whl", wheel_directory, config_settings + ) + + def get_requires_for_build_editable(self, config_settings=None): + return self.get_requires_for_build_wheel(config_settings) + + def prepare_metadata_for_build_editable(self, metadata_directory, + config_settings=None): + return self.prepare_metadata_for_build_wheel( + metadata_directory, config_settings + ) + class _BuildMetaLegacyBackend(_BuildMetaBackend): """Compatibility backend for setuptools - This is a version of setuptools.build_meta that endeavors to maintain backwards - compatibility with pre-PEP 517 modes of invocation. It exists as a temporary - bridge between the old packaging mechanism and the new packaging mechanism, + This is a version of setuptools.build_meta that endeavors + to maintain backwards + compatibility with pre-PEP 517 modes of invocation. It + exists as a temporary + bridge between the old packaging mechanism and the new + packaging mechanism, and will eventually be removed. """ def run_setup(self, setup_script='setup.py'): @@ -250,6 +492,7 @@ class _BuildMetaLegacyBackend(_BuildMetaBackend): sys.path[:] = sys_path sys.argv[0] = sys_argv_0 + # The primary backend _BACKEND = _BuildMetaBackend() @@ -259,6 +502,11 @@ prepare_metadata_for_build_wheel = _BACKEND.prepare_metadata_for_build_wheel build_wheel = _BACKEND.build_wheel build_sdist = _BACKEND.build_sdist +if not LEGACY_EDITABLE: + get_requires_for_build_editable = _BACKEND.get_requires_for_build_editable + prepare_metadata_for_build_editable = _BACKEND.prepare_metadata_for_build_editable + build_editable = _BACKEND.build_editable + # The legacy backend __legacy__ = _BuildMetaLegacyBackend() diff --git a/libs/common/setuptools/cli-arm64.exe b/libs/common/setuptools/cli-arm64.exe new file mode 100644 index 00000000..7a87ce48 Binary files /dev/null and b/libs/common/setuptools/cli-arm64.exe differ diff --git a/libs/common/setuptools/command/__init__.py b/libs/common/setuptools/command/__init__.py index 743f5588..5acd7687 100644 --- a/libs/common/setuptools/command/__init__.py +++ b/libs/common/setuptools/command/__init__.py @@ -1,17 +1,12 @@ -__all__ = [ - 'alias', 'bdist_egg', 'bdist_rpm', 'build_ext', 'build_py', 'develop', - 'easy_install', 'egg_info', 'install', 'install_lib', 'rotate', 'saveopts', - 'sdist', 'setopt', 'test', 'install_egg_info', 'install_scripts', - 'bdist_wininst', 'upload_docs', 'build_clib', 'dist_info', -] - from distutils.command.bdist import bdist import sys -from setuptools.command import install_scripts - if 'egg' not in bdist.format_commands: - bdist.format_command['egg'] = ('bdist_egg', "Python .egg file") - bdist.format_commands.append('egg') + try: + bdist.format_commands['egg'] = ('bdist_egg', "Python .egg file") + except TypeError: + # For backward compatibility with older distutils (stdlib) + bdist.format_command['egg'] = ('bdist_egg', "Python .egg file") + bdist.format_commands.append('egg') del bdist, sys diff --git a/libs/common/setuptools/command/alias.py b/libs/common/setuptools/command/alias.py index 4532b1cc..452a9244 100644 --- a/libs/common/setuptools/command/alias.py +++ b/libs/common/setuptools/command/alias.py @@ -1,7 +1,5 @@ from distutils.errors import DistutilsOptionError -from setuptools.extern.six.moves import map - from setuptools.command.setopt import edit_config, option_base, config_file diff --git a/libs/common/setuptools/command/bdist_egg.py b/libs/common/setuptools/command/bdist_egg.py index 98470f17..11a1c6be 100644 --- a/libs/common/setuptools/command/bdist_egg.py +++ b/libs/common/setuptools/command/bdist_egg.py @@ -2,7 +2,6 @@ Build .egg distributions""" -from distutils.errors import DistutilsSetupError from distutils.dir_util import remove_tree, mkpath from distutils import log from types import CodeType @@ -12,24 +11,16 @@ import re import textwrap import marshal -from setuptools.extern import six - -from pkg_resources import get_build_platform, Distribution, ensure_directory -from pkg_resources import EntryPoint +from pkg_resources import get_build_platform, Distribution from setuptools.extension import Library from setuptools import Command +from .._path import ensure_directory -try: - # Python 2.7 or >=3.2 - from sysconfig import get_path, get_python_version +from sysconfig import get_path, get_python_version - def _get_purelib(): - return get_path("purelib") -except ImportError: - from distutils.sysconfig import get_python_lib, get_python_version - def _get_purelib(): - return get_python_lib(False) +def _get_purelib(): + return get_path("purelib") def strip_module(filename): @@ -54,10 +45,12 @@ def write_stub(resource, pyfile): _stub_template = textwrap.dedent(""" def __bootstrap__(): global __bootstrap__, __loader__, __file__ - import sys, pkg_resources, imp + import sys, pkg_resources, importlib.util __file__ = pkg_resources.resource_filename(__name__, %r) __loader__ = None; del __bootstrap__, __loader__ - imp.load_dynamic(__name__,__file__) + spec = importlib.util.spec_from_file_location(__name__,__file__) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) __bootstrap__() """).lstrip() with open(pyfile, 'w') as f: @@ -158,7 +151,7 @@ class bdist_egg(Command): self.run_command(cmdname) return cmd - def run(self): + def run(self): # noqa: C901 # is too complex (14) # FIXME # Generate metadata first self.run_command("egg_info") # We run install_lib before install_data, because some data hacks @@ -273,43 +266,7 @@ class bdist_egg(Command): return analyze_egg(self.bdist_dir, self.stubs) def gen_header(self): - epm = EntryPoint.parse_map(self.distribution.entry_points or '') - ep = epm.get('setuptools.installation', {}).get('eggsecutable') - if ep is None: - return 'w' # not an eggsecutable, do it the usual way. - - if not ep.attrs or ep.extras: - raise DistutilsSetupError( - "eggsecutable entry point (%r) cannot have 'extras' " - "or refer to a module" % (ep,) - ) - - pyver = '{}.{}'.format(*sys.version_info) - pkg = ep.module_name - full = '.'.join(ep.attrs) - base = ep.attrs[0] - basename = os.path.basename(self.egg_output) - - header = ( - "#!/bin/sh\n" - 'if [ `basename $0` = "%(basename)s" ]\n' - 'then exec python%(pyver)s -c "' - "import sys, os; sys.path.insert(0, os.path.abspath('$0')); " - "from %(pkg)s import %(base)s; sys.exit(%(full)s())" - '" "$@"\n' - 'else\n' - ' echo $0 is not the correct name for this egg file.\n' - ' echo Please rename it back to %(basename)s and try again.\n' - ' exec false\n' - 'fi\n' - ) % locals() - - if not self.dry_run: - mkpath(os.path.dirname(self.egg_output), dry_run=self.dry_run) - f = open(self.egg_output, 'w') - f.write(header) - f.close() - return 'a' + return 'w' def copy_metadata_to(self, target_dir): "Copy metadata (egg info) to the target_dir" @@ -411,9 +368,7 @@ def scan_module(egg_dir, base, name, stubs): return True # Extension module pkg = base[len(egg_dir) + 1:].replace(os.sep, '.') module = pkg + (pkg and '.' or '') + os.path.splitext(name)[0] - if six.PY2: - skip = 8 # skip magic & date - elif sys.version_info < (3, 7): + if sys.version_info < (3, 7): skip = 12 # skip magic & date & file size else: skip = 16 # skip magic & reserved? & date & file size @@ -444,7 +399,7 @@ def iter_symbols(code): for name in code.co_names: yield name for const in code.co_consts: - if isinstance(const, six.string_types): + if isinstance(const, str): yield const elif isinstance(const, CodeType): for name in iter_symbols(const): diff --git a/libs/common/setuptools/command/bdist_rpm.py b/libs/common/setuptools/command/bdist_rpm.py index 70730927..98bf5dea 100644 --- a/libs/common/setuptools/command/bdist_rpm.py +++ b/libs/common/setuptools/command/bdist_rpm.py @@ -1,4 +1,7 @@ import distutils.command.bdist_rpm as orig +import warnings + +from setuptools import SetuptoolsDeprecationWarning class bdist_rpm(orig.bdist_rpm): @@ -8,36 +11,30 @@ class bdist_rpm(orig.bdist_rpm): 1. Run egg_info to ensure the name and version are properly calculated. 2. Always run 'install' using --single-version-externally-managed to disable eggs in RPM distributions. - 3. Replace dash with underscore in the version numbers for better RPM - compatibility. """ def run(self): + warnings.warn( + "bdist_rpm is deprecated and will be removed in a future " + "version. Use bdist_wheel (wheel packages) instead.", + SetuptoolsDeprecationWarning, + ) + # ensure distro name is up-to-date self.run_command('egg_info') orig.bdist_rpm.run(self) def _make_spec_file(self): - version = self.distribution.get_version() - rpmversion = version.replace('-', '_') spec = orig.bdist_rpm._make_spec_file(self) - line23 = '%define version ' + version - line24 = '%define version ' + rpmversion spec = [ line.replace( - "Source0: %{name}-%{version}.tar", - "Source0: %{name}-%{unmangled_version}.tar" - ).replace( "setup.py install ", "setup.py install --single-version-externally-managed " ).replace( "%setup", "%setup -n %{name}-%{unmangled_version}" - ).replace(line23, line24) + ) for line in spec ] - insert_loc = spec.index(line24) + 1 - unmangled_version = "%define unmangled_version " + version - spec.insert(insert_loc, unmangled_version) return spec diff --git a/libs/common/setuptools/command/bdist_wininst.py b/libs/common/setuptools/command/bdist_wininst.py deleted file mode 100644 index 073de97b..00000000 --- a/libs/common/setuptools/command/bdist_wininst.py +++ /dev/null @@ -1,21 +0,0 @@ -import distutils.command.bdist_wininst as orig - - -class bdist_wininst(orig.bdist_wininst): - def reinitialize_command(self, command, reinit_subcommands=0): - """ - Supplement reinitialize_command to work around - http://bugs.python.org/issue20819 - """ - cmd = self.distribution.reinitialize_command( - command, reinit_subcommands) - if command in ('install', 'install_lib'): - cmd.install_lib = None - return cmd - - def run(self): - self._is_running = True - try: - orig.bdist_wininst.run(self) - finally: - self._is_running = False diff --git a/libs/common/setuptools/command/build.py b/libs/common/setuptools/command/build.py new file mode 100644 index 00000000..fa3c99ef --- /dev/null +++ b/libs/common/setuptools/command/build.py @@ -0,0 +1,146 @@ +import sys +import warnings +from typing import TYPE_CHECKING, List, Dict +from distutils.command.build import build as _build + +from setuptools import SetuptoolsDeprecationWarning + +if sys.version_info >= (3, 8): + from typing import Protocol +elif TYPE_CHECKING: + from typing_extensions import Protocol +else: + from abc import ABC as Protocol + + +_ORIGINAL_SUBCOMMANDS = {"build_py", "build_clib", "build_ext", "build_scripts"} + + +class build(_build): + # copy to avoid sharing the object with parent class + sub_commands = _build.sub_commands[:] + + def get_sub_commands(self): + subcommands = {cmd[0] for cmd in _build.sub_commands} + if subcommands - _ORIGINAL_SUBCOMMANDS: + msg = """ + It seems that you are using `distutils.command.build` to add + new subcommands. Using `distutils` directly is considered deprecated, + please use `setuptools.command.build`. + """ + warnings.warn(msg, SetuptoolsDeprecationWarning) + self.sub_commands = _build.sub_commands + return super().get_sub_commands() + + +class SubCommand(Protocol): + """In order to support editable installations (see :pep:`660`) all + build subcommands **SHOULD** implement this protocol. They also **MUST** inherit + from ``setuptools.Command``. + + When creating an :pep:`editable wheel <660>`, ``setuptools`` will try to evaluate + custom ``build`` subcommands using the following procedure: + + 1. ``setuptools`` will set the ``editable_mode`` attribute to ``True`` + 2. ``setuptools`` will execute the ``run()`` command. + + .. important:: + Subcommands **SHOULD** take advantage of ``editable_mode=True`` to adequate + its behaviour or perform optimisations. + + For example, if a subcommand doesn't need to generate an extra file and + all it does is to copy a source file into the build directory, + ``run()`` **SHOULD** simply "early return". + + Similarly, if the subcommand creates files that would be placed alongside + Python files in the final distribution, during an editable install + the command **SHOULD** generate these files "in place" (i.e. write them to + the original source directory, instead of using the build directory). + Note that ``get_output_mapping()`` should reflect that and include mappings + for "in place" builds accordingly. + + 3. ``setuptools`` use any knowledge it can derive from the return values of + ``get_outputs()`` and ``get_output_mapping()`` to create an editable wheel. + When relevant ``setuptools`` **MAY** attempt to use file links based on the value + of ``get_output_mapping()``. Alternatively, ``setuptools`` **MAY** attempt to use + :doc:`import hooks ` to redirect any attempt to import + to the directory with the original source code and other files built in place. + + Please note that custom sub-commands **SHOULD NOT** rely on ``run()`` being + executed (or not) to provide correct return values for ``get_outputs()``, + ``get_output_mapping()`` or ``get_source_files()``. The ``get_*`` methods should + work independently of ``run()``. + """ + + editable_mode: bool = False + """Boolean flag that will be set to ``True`` when setuptools is used for an + editable installation (see :pep:`660`). + Implementations **SHOULD** explicitly set the default value of this attribute to + ``False``. + When subcommands run, they can use this flag to perform optimizations or change + their behaviour accordingly. + """ + + build_lib: str + """String representing the directory where the build artifacts should be stored, + e.g. ``build/lib``. + For example, if a distribution wants to provide a Python module named ``pkg.mod``, + then a corresponding file should be written to ``{build_lib}/package/module.py``. + A way of thinking about this is that the files saved under ``build_lib`` + would be eventually copied to one of the directories in :obj:`site.PREFIXES` + upon installation. + + A command that produces platform-independent files (e.g. compiling text templates + into Python functions), **CAN** initialize ``build_lib`` by copying its value from + the ``build_py`` command. On the other hand, a command that produces + platform-specific files **CAN** initialize ``build_lib`` by copying its value from + the ``build_ext`` command. In general this is done inside the ``finalize_options`` + method with the help of the ``set_undefined_options`` command:: + + def finalize_options(self): + self.set_undefined_options("build_py", ("build_lib", "build_lib")) + ... + """ + + def initialize_options(self): + """(Required by the original :class:`setuptools.Command` interface)""" + + def finalize_options(self): + """(Required by the original :class:`setuptools.Command` interface)""" + + def run(self): + """(Required by the original :class:`setuptools.Command` interface)""" + + def get_source_files(self) -> List[str]: + """ + Return a list of all files that are used by the command to create the expected + outputs. + For example, if your build command transpiles Java files into Python, you should + list here all the Java files. + The primary purpose of this function is to help populating the ``sdist`` + with all the files necessary to build the distribution. + All files should be strings relative to the project root directory. + """ + + def get_outputs(self) -> List[str]: + """ + Return a list of files intended for distribution as they would have been + produced by the build. + These files should be strings in the form of + ``"{build_lib}/destination/file/path"``. + + .. note:: + The return value of ``get_output()`` should include all files used as keys + in ``get_output_mapping()`` plus files that are generated during the build + and don't correspond to any source file already present in the project. + """ + + def get_output_mapping(self) -> Dict[str, str]: + """ + Return a mapping between destination files as they would be produced by the + build (dict keys) into the respective existing (source) files (dict values). + Existing (source) files should be represented as strings relative to the project + root directory. + Destination files should be strings in the form of + ``"{build_lib}/destination/file/path"``. + """ diff --git a/libs/common/setuptools/command/build_clib.py b/libs/common/setuptools/command/build_clib.py index 09caff6f..09483e69 100644 --- a/libs/common/setuptools/command/build_clib.py +++ b/libs/common/setuptools/command/build_clib.py @@ -25,10 +25,10 @@ class build_clib(orig.build_clib): sources = build_info.get('sources') if sources is None or not isinstance(sources, (list, tuple)): raise DistutilsSetupError( - "in 'libraries' option (library '%s'), " - "'sources' must be present and must be " - "a list of source filenames" % lib_name) - sources = list(sources) + "in 'libraries' option (library '%s'), " + "'sources' must be present and must be " + "a list of source filenames" % lib_name) + sources = sorted(list(sources)) log.info("building '%s' library", lib_name) @@ -38,9 +38,9 @@ class build_clib(orig.build_clib): obj_deps = build_info.get('obj_deps', dict()) if not isinstance(obj_deps, dict): raise DistutilsSetupError( - "in 'libraries' option (library '%s'), " - "'obj_deps' must be a dictionary of " - "type 'source: list'" % lib_name) + "in 'libraries' option (library '%s'), " + "'obj_deps' must be a dictionary of " + "type 'source: list'" % lib_name) dependencies = [] # Get the global dependencies that are specified by the '' key. @@ -48,9 +48,9 @@ class build_clib(orig.build_clib): global_deps = obj_deps.get('', list()) if not isinstance(global_deps, (list, tuple)): raise DistutilsSetupError( - "in 'libraries' option (library '%s'), " - "'obj_deps' must be a dictionary of " - "type 'source: list'" % lib_name) + "in 'libraries' option (library '%s'), " + "'obj_deps' must be a dictionary of " + "type 'source: list'" % lib_name) # Build the list to be used by newer_pairwise_group # each source will be auto-added to its dependencies. @@ -60,39 +60,42 @@ class build_clib(orig.build_clib): extra_deps = obj_deps.get(source, list()) if not isinstance(extra_deps, (list, tuple)): raise DistutilsSetupError( - "in 'libraries' option (library '%s'), " - "'obj_deps' must be a dictionary of " - "type 'source: list'" % lib_name) + "in 'libraries' option (library '%s'), " + "'obj_deps' must be a dictionary of " + "type 'source: list'" % lib_name) src_deps.extend(extra_deps) dependencies.append(src_deps) expected_objects = self.compiler.object_filenames( - sources, - output_dir=self.build_temp - ) + sources, + output_dir=self.build_temp, + ) - if newer_pairwise_group(dependencies, expected_objects) != ([], []): + if ( + newer_pairwise_group(dependencies, expected_objects) + != ([], []) + ): # First, compile the source code to object files in the library # directory. (This should probably change to putting object # files in a temporary build directory.) macros = build_info.get('macros') include_dirs = build_info.get('include_dirs') cflags = build_info.get('cflags') - objects = self.compiler.compile( - sources, - output_dir=self.build_temp, - macros=macros, - include_dirs=include_dirs, - extra_postargs=cflags, - debug=self.debug - ) + self.compiler.compile( + sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=include_dirs, + extra_postargs=cflags, + debug=self.debug + ) # Now "link" the object files together into a static library. # (On Unix at least, this isn't really linking -- it just # builds an archive. Whatever.) self.compiler.create_static_lib( - expected_objects, - lib_name, - output_dir=self.build_clib, - debug=self.debug - ) + expected_objects, + lib_name, + output_dir=self.build_clib, + debug=self.debug + ) diff --git a/libs/common/setuptools/command/build_ext.py b/libs/common/setuptools/command/build_ext.py index 1b51e040..cbfe3ec1 100644 --- a/libs/common/setuptools/command/build_ext.py +++ b/libs/common/setuptools/command/build_ext.py @@ -1,22 +1,17 @@ import os import sys import itertools +from importlib.machinery import EXTENSION_SUFFIXES +from importlib.util import cache_from_source as _compiled_file_name +from typing import Dict, Iterator, List, Tuple + from distutils.command.build_ext import build_ext as _du_build_ext -from distutils.file_util import copy_file from distutils.ccompiler import new_compiler from distutils.sysconfig import customize_compiler, get_config_var -from distutils.errors import DistutilsError from distutils import log -from setuptools.extension import Library -from setuptools.extern import six - -if six.PY2: - import imp - - EXTENSION_SUFFIXES = [s for s, _, tp in imp.get_suffixes() if tp == imp.C_EXTENSION] -else: - from importlib.machinery import EXTENSION_SUFFIXES +from setuptools.errors import BaseError +from setuptools.extension import Extension, Library try: # Attempt to use Cython for building extensions, if available @@ -29,7 +24,7 @@ except ImportError: # make sure _config_vars is initialized get_config_var("LDSHARED") -from distutils.sysconfig import _config_vars as _CONFIG_VARS +from distutils.sysconfig import _config_vars as _CONFIG_VARS # noqa def _customize_compiler_for_shlib(compiler): @@ -65,7 +60,9 @@ elif os.name != 'nt': except ImportError: pass -if_dl = lambda s: s if have_rtld else '' + +def if_dl(s): + return s if have_rtld else '' def get_abi3_suffix(): @@ -78,6 +75,9 @@ def get_abi3_suffix(): class build_ext(_build_ext): + editable_mode: bool = False + inplace: bool = False + def run(self): """Build extensions in build directory, then copy if --inplace""" old_inplace, self.inplace = self.inplace, 0 @@ -86,41 +86,78 @@ class build_ext(_build_ext): if old_inplace: self.copy_extensions_to_source() + def _get_inplace_equivalent(self, build_py, ext: Extension) -> Tuple[str, str]: + fullname = self.get_ext_fullname(ext.name) + filename = self.get_ext_filename(fullname) + modpath = fullname.split('.') + package = '.'.join(modpath[:-1]) + package_dir = build_py.get_package_dir(package) + inplace_file = os.path.join(package_dir, os.path.basename(filename)) + regular_file = os.path.join(self.build_lib, filename) + return (inplace_file, regular_file) + def copy_extensions_to_source(self): build_py = self.get_finalized_command('build_py') for ext in self.extensions: - fullname = self.get_ext_fullname(ext.name) - filename = self.get_ext_filename(fullname) - modpath = fullname.split('.') - package = '.'.join(modpath[:-1]) - package_dir = build_py.get_package_dir(package) - dest_filename = os.path.join(package_dir, - os.path.basename(filename)) - src_filename = os.path.join(self.build_lib, filename) + inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) # Always copy, even if source is older than destination, to ensure # that the right extensions for the current Python/platform are # used. - copy_file( - src_filename, dest_filename, verbose=self.verbose, - dry_run=self.dry_run - ) + if os.path.exists(regular_file) or not ext.optional: + self.copy_file(regular_file, inplace_file, level=self.verbose) + if ext._needs_stub: - self.write_stub(package_dir or os.curdir, ext, True) + inplace_stub = self._get_equivalent_stub(ext, inplace_file) + self._write_stub_file(inplace_stub, ext, compile=True) + # Always compile stub and remove the original (leave the cache behind) + # (this behaviour was observed in previous iterations of the code) + + def _get_equivalent_stub(self, ext: Extension, output_file: str) -> str: + dir_ = os.path.dirname(output_file) + _, _, name = ext.name.rpartition(".") + return f"{os.path.join(dir_, name)}.py" + + def _get_output_mapping(self) -> Iterator[Tuple[str, str]]: + if not self.inplace: + return + + build_py = self.get_finalized_command('build_py') + opt = self.get_finalized_command('install_lib').optimize or "" + + for ext in self.extensions: + inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) + yield (regular_file, inplace_file) + + if ext._needs_stub: + # This version of `build_ext` always builds artifacts in another dir, + # when "inplace=True" is given it just copies them back. + # This is done in the `copy_extensions_to_source` function, which + # always compile stub files via `_compile_and_remove_stub`. + # At the end of the process, a `.pyc` stub file is created without the + # corresponding `.py`. + + inplace_stub = self._get_equivalent_stub(ext, inplace_file) + regular_stub = self._get_equivalent_stub(ext, regular_file) + inplace_cache = _compiled_file_name(inplace_stub, optimization=opt) + output_cache = _compiled_file_name(regular_stub, optimization=opt) + yield (output_cache, inplace_cache) def get_ext_filename(self, fullname): - filename = _build_ext.get_ext_filename(self, fullname) + so_ext = os.getenv('SETUPTOOLS_EXT_SUFFIX') + if so_ext: + filename = os.path.join(*fullname.split('.')) + so_ext + else: + filename = _build_ext.get_ext_filename(self, fullname) + so_ext = get_config_var('EXT_SUFFIX') + if fullname in self.ext_map: ext = self.ext_map[fullname] - use_abi3 = ( - not six.PY2 - and getattr(ext, 'py_limited_api') - and get_abi3_suffix() - ) + use_abi3 = getattr(ext, 'py_limited_api') and get_abi3_suffix() if use_abi3: - so_ext = get_config_var('EXT_SUFFIX') filename = filename[:-len(so_ext)] - filename = filename + get_abi3_suffix() + so_ext = get_abi3_suffix() + filename = filename + so_ext if isinstance(ext, Library): fn, ext = os.path.splitext(filename) return self.shlib_compiler.library_filename(fn, libtype) @@ -134,6 +171,7 @@ class build_ext(_build_ext): self.shlib_compiler = None self.shlibs = [] self.ext_map = {} + self.editable_mode = False def finalize_options(self): _build_ext.finalize_options(self) @@ -164,6 +202,9 @@ class build_ext(_build_ext): if ltd and use_stubs and os.curdir not in ext.runtime_library_dirs: ext.runtime_library_dirs.append(os.curdir) + if self.editable_mode: + self.inplace = True + def setup_shlib_compiler(self): compiler = self.shlib_compiler = new_compiler( compiler=self.compiler, dry_run=self.dry_run, force=self.force @@ -204,8 +245,8 @@ class build_ext(_build_ext): self.compiler = self.shlib_compiler _build_ext.build_extension(self, ext) if ext._needs_stub: - cmd = self.get_finalized_command('build_py').build_lib - self.write_stub(cmd, ext) + build_lib = self.get_finalized_command('build_py').build_lib + self.write_stub(build_lib, ext) finally: self.compiler = _compiler @@ -218,8 +259,15 @@ class build_ext(_build_ext): pkg = '.'.join(ext._full_name.split('.')[:-1] + ['']) return any(pkg + libname in libnames for libname in ext.libraries) - def get_outputs(self): - return _build_ext.get_outputs(self) + self.__get_stubs_outputs() + def get_outputs(self) -> List[str]: + if self.inplace: + return list(self.get_output_mapping().keys()) + return sorted(_build_ext.get_outputs(self) + self.__get_stubs_outputs()) + + def get_output_mapping(self) -> Dict[str, str]: + """See :class:`setuptools.commands.build.SubCommand`""" + mapping = self._get_output_mapping() + return dict(sorted(mapping, key=lambda x: x[0])) def __get_stubs_outputs(self): # assemble the base name for each extension that needs a stub @@ -239,19 +287,21 @@ class build_ext(_build_ext): yield '.pyo' def write_stub(self, output_dir, ext, compile=False): - log.info("writing stub loader for %s to %s", ext._full_name, - output_dir) - stub_file = (os.path.join(output_dir, *ext._full_name.split('.')) + - '.py') + stub_file = os.path.join(output_dir, *ext._full_name.split('.')) + '.py' + self._write_stub_file(stub_file, ext, compile) + + def _write_stub_file(self, stub_file: str, ext: Extension, compile=False): + log.info("writing stub loader for %s to %s", ext._full_name, stub_file) if compile and os.path.exists(stub_file): - raise DistutilsError(stub_file + " already exists! Please delete.") + raise BaseError(stub_file + " already exists! Please delete.") if not self.dry_run: f = open(stub_file, 'w') f.write( '\n'.join([ "def __bootstrap__():", " global __bootstrap__, __file__, __loader__", - " import sys, os, pkg_resources, imp" + if_dl(", dl"), + " import sys, os, pkg_resources, importlib.util" + + if_dl(", dl"), " __file__ = pkg_resources.resource_filename" "(__name__,%r)" % os.path.basename(ext._file_name), @@ -263,7 +313,10 @@ class build_ext(_build_ext): " try:", " os.chdir(os.path.dirname(__file__))", if_dl(" sys.setdlopenflags(dl.RTLD_NOW)"), - " imp.load_dynamic(__name__,__file__)", + " spec = importlib.util.spec_from_file_location(", + " __name__, __file__)", + " mod = importlib.util.module_from_spec(spec)", + " spec.loader.exec_module(mod)", " finally:", if_dl(" sys.setdlopenflags(old_flags)"), " os.chdir(old_dir)", @@ -273,16 +326,19 @@ class build_ext(_build_ext): ) f.close() if compile: - from distutils.util import byte_compile + self._compile_and_remove_stub(stub_file) - byte_compile([stub_file], optimize=0, + def _compile_and_remove_stub(self, stub_file: str): + from distutils.util import byte_compile + + byte_compile([stub_file], optimize=0, + force=True, dry_run=self.dry_run) + optimize = self.get_finalized_command('install_lib').optimize + if optimize > 0: + byte_compile([stub_file], optimize=optimize, force=True, dry_run=self.dry_run) - optimize = self.get_finalized_command('install_lib').optimize - if optimize > 0: - byte_compile([stub_file], optimize=optimize, - force=True, dry_run=self.dry_run) - if os.path.exists(stub_file) and not self.dry_run: - os.unlink(stub_file) + if os.path.exists(stub_file) and not self.dry_run: + os.unlink(stub_file) if use_stubs or os.name == 'nt': diff --git a/libs/common/setuptools/command/build_py.py b/libs/common/setuptools/command/build_py.py index b0314fd4..ec062742 100644 --- a/libs/common/setuptools/command/build_py.py +++ b/libs/common/setuptools/command/build_py.py @@ -1,3 +1,4 @@ +from functools import partial from glob import glob from distutils.util import convert_path import distutils.command.build_py as orig @@ -7,20 +8,20 @@ import textwrap import io import distutils.errors import itertools +import stat +import warnings +from pathlib import Path +from typing import Dict, Iterable, Iterator, List, Optional, Tuple -from setuptools.extern import six -from setuptools.extern.six.moves import map, filter, filterfalse - -try: - from setuptools.lib2to3_ex import Mixin2to3 -except ImportError: - - class Mixin2to3: - def run_2to3(self, files, doctests=True): - "do nothing" +from setuptools._deprecation_warning import SetuptoolsDeprecationWarning +from setuptools.extern.more_itertools import unique_everseen -class build_py(orig.build_py, Mixin2to3): +def make_writable(target): + os.chmod(target, os.stat(target).st_mode | stat.S_IWRITE) + + +class build_py(orig.build_py): """Enhanced 'build_py' command that includes data files with packages The data files are specified via a 'package_data' argument to 'setup()'. @@ -29,20 +30,29 @@ class build_py(orig.build_py, Mixin2to3): Also, this version of the 'build_py' command allows you to specify both 'py_modules' and 'packages' in the same setup operation. """ + editable_mode: bool = False + existing_egg_info_dir: Optional[str] = None #: Private API, internal use only. def finalize_options(self): orig.build_py.finalize_options(self) self.package_data = self.distribution.package_data - self.exclude_package_data = (self.distribution.exclude_package_data or - {}) + self.exclude_package_data = self.distribution.exclude_package_data or {} if 'data_files' in self.__dict__: del self.__dict__['data_files'] self.__updated_files = [] - self.__doctests_2to3 = [] + + def copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, + link=None, level=1): + # Overwrite base class to allow using links + if link: + infile = str(Path(infile).resolve()) + outfile = str(Path(outfile).resolve()) + return super().copy_file(infile, outfile, preserve_mode, preserve_times, + link, level) def run(self): """Build modules, packages, and copy data files to build directory""" - if not self.py_modules and not self.packages: + if not (self.py_modules or self.packages) or self.editable_mode: return if self.py_modules: @@ -52,10 +62,6 @@ class build_py(orig.build_py, Mixin2to3): self.build_packages() self.build_package_data() - self.run_2to3(self.__updated_files, False) - self.run_2to3(self.__updated_files, True) - self.run_2to3(self.__doctests_2to3, True) - # Only compile actual .py files, using our base class' idea of what our # output files are. self.byte_compile(orig.build_py.get_outputs(self, include_bytecode=0)) @@ -68,11 +74,7 @@ class build_py(orig.build_py, Mixin2to3): return orig.build_py.__getattr__(self, attr) def build_module(self, module, module_file, package): - if six.PY2 and isinstance(package, six.string_types): - # avoid errors on Python 2 when unicode is passed (#190) - package = package.split('.') - outfile, copied = orig.build_py.build_module(self, module, module_file, - package) + outfile, copied = orig.build_py.build_module(self, module, module_file, package) if copied: self.__updated_files.append(outfile) return outfile, copied @@ -82,6 +84,16 @@ class build_py(orig.build_py, Mixin2to3): self.analyze_manifest() return list(map(self._get_pkg_data_files, self.packages or ())) + def get_data_files_without_manifest(self): + """ + Generate list of ``(package,src_dir,build_dir,filenames)`` tuples, + but without triggering any attempt to analyze or build the manifest. + """ + # Prevent eventual errors from unset `manifest_files` + # (that would otherwise be set by `analyze_manifest`) + self.__dict__.setdefault('manifest_files', {}) + return list(map(self._get_pkg_data_files, self.packages or ())) + def _get_pkg_data_files(self, package): # Locate package source directory src_dir = self.get_package_dir(package) @@ -103,7 +115,7 @@ class build_py(orig.build_py, Mixin2to3): package, src_dir, ) - globs_expanded = map(glob, patterns) + globs_expanded = map(partial(glob, recursive=True), patterns) # flatten the expanded globs into an iterable of matches globs_matches = itertools.chain.from_iterable(globs_expanded) glob_files = filter(os.path.isfile, globs_matches) @@ -113,18 +125,41 @@ class build_py(orig.build_py, Mixin2to3): ) return self.exclude_data_files(package, src_dir, files) - def build_package_data(self): - """Copy data files into build directory""" + def get_outputs(self, include_bytecode=1) -> List[str]: + """See :class:`setuptools.commands.build.SubCommand`""" + if self.editable_mode: + return list(self.get_output_mapping().keys()) + return super().get_outputs(include_bytecode) + + def get_output_mapping(self) -> Dict[str, str]: + """See :class:`setuptools.commands.build.SubCommand`""" + mapping = itertools.chain( + self._get_package_data_output_mapping(), + self._get_module_mapping(), + ) + return dict(sorted(mapping, key=lambda x: x[0])) + + def _get_module_mapping(self) -> Iterator[Tuple[str, str]]: + """Iterate over all modules producing (dest, src) pairs.""" + for (package, module, module_file) in self.find_all_modules(): + package = package.split('.') + filename = self.get_module_outfile(self.build_lib, package, module) + yield (filename, module_file) + + def _get_package_data_output_mapping(self) -> Iterator[Tuple[str, str]]: + """Iterate over package data producing (dest, src) pairs.""" for package, src_dir, build_dir, filenames in self.data_files: for filename in filenames: target = os.path.join(build_dir, filename) - self.mkpath(os.path.dirname(target)) srcfile = os.path.join(src_dir, filename) - outf, copied = self.copy_file(srcfile, target) - srcfile = os.path.abspath(srcfile) - if (copied and - srcfile in self.distribution.convert_2to3_doctests): - self.__doctests_2to3.append(outf) + yield (target, srcfile) + + def build_package_data(self): + """Copy data files into build directory""" + for target, srcfile in self._get_package_data_output_mapping(): + self.mkpath(os.path.dirname(target)) + _outf, _copied = self.copy_file(srcfile, target) + make_writable(target) def analyze_manifest(self): self.manifest_files = mf = {} @@ -135,9 +170,21 @@ class build_py(orig.build_py, Mixin2to3): # Locate package source directory src_dirs[assert_relative(self.get_package_dir(package))] = package - self.run_command('egg_info') - ei_cmd = self.get_finalized_command('egg_info') - for path in ei_cmd.filelist.files: + if ( + getattr(self, 'existing_egg_info_dir', None) + and Path(self.existing_egg_info_dir, "SOURCES.txt").exists() + ): + egg_info_dir = self.existing_egg_info_dir + manifest = Path(egg_info_dir, "SOURCES.txt") + files = manifest.read_text(encoding="utf-8").splitlines() + else: + self.run_command('egg_info') + ei_cmd = self.get_finalized_command('egg_info') + egg_info_dir = ei_cmd.egg_info + files = ei_cmd.filelist.files + + check = _IncludePackageDataAbuse() + for path in self._filter_build_files(files, egg_info_dir): d, f = os.path.split(assert_relative(path)) prev = None oldf = f @@ -146,10 +193,34 @@ class build_py(orig.build_py, Mixin2to3): d, df = os.path.split(d) f = os.path.join(df, f) if d in src_dirs: - if path.endswith('.py') and f == oldf: - continue # it's a module, not data + if f == oldf: + if check.is_module(f): + continue # it's a module, not data + else: + importable = check.importable_subpackage(src_dirs[d], f) + if importable: + check.warn(importable) mf.setdefault(src_dirs[d], []).append(path) + def _filter_build_files(self, files: Iterable[str], egg_info: str) -> Iterator[str]: + """ + ``build_meta`` may try to create egg_info outside of the project directory, + and this can be problematic for certain plugins (reported in issue #3500). + + Extensions might also include between their sources files created on the + ``build_lib`` and ``build_temp`` directories. + + This function should filter this case of invalid files out. + """ + build = self.get_finalized_command("build") + build_dirs = (egg_info, self.build_lib, build.build_temp, build.build_base) + norm_dirs = [os.path.normpath(p) for p in build_dirs if p] + + for file in files: + norm_path = os.path.normpath(file) + if not os.path.isabs(file) or all(d not in norm_path for d in norm_dirs): + yield file + def get_data_files(self): pass # Lazily compute data files in _get_data_files() function. @@ -186,6 +257,8 @@ class build_py(orig.build_py, Mixin2to3): def initialize_options(self): self.packages_checked = {} orig.build_py.initialize_options(self) + self.editable_mode = False + self.existing_egg_info_dir = None def get_package_dir(self, package): res = orig.build_py.get_package_dir(self, package) @@ -201,20 +274,13 @@ class build_py(orig.build_py, Mixin2to3): package, src_dir, ) - match_groups = ( - fnmatch.filter(files, pattern) - for pattern in patterns - ) + match_groups = (fnmatch.filter(files, pattern) for pattern in patterns) # flatten the groups of matches into an iterable of matches matches = itertools.chain.from_iterable(match_groups) bad = set(matches) - keepers = ( - fn - for fn in files - if fn not in bad - ) + keepers = (fn for fn in files if fn not in bad) # ditch dupes - return list(_unique_everseen(keepers)) + return list(unique_everseen(keepers)) @staticmethod def _get_platform_patterns(spec, package, src_dir): @@ -235,36 +301,68 @@ class build_py(orig.build_py, Mixin2to3): ) -# from Python docs -def _unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." - # unique_everseen('AAAABBBCCDAABBB') --> A B C D - # unique_everseen('ABBCcAD', str.lower) --> A B C D - seen = set() - seen_add = seen.add - if key is None: - for element in filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - else: - for element in iterable: - k = key(element) - if k not in seen: - seen_add(k) - yield element - - def assert_relative(path): if not os.path.isabs(path): return path from distutils.errors import DistutilsSetupError - msg = textwrap.dedent(""" + msg = ( + textwrap.dedent( + """ Error: setup script specifies an absolute path: %s setup() arguments must *always* be /-separated paths relative to the setup.py directory, *never* absolute paths. - """).lstrip() % path + """ + ).lstrip() + % path + ) raise DistutilsSetupError(msg) + + +class _IncludePackageDataAbuse: + """Inform users that package or module is included as 'data file'""" + + MESSAGE = """\ + Installing {importable!r} as data is deprecated, please list it in `packages`. + !!\n\n + ############################ + # Package would be ignored # + ############################ + Python recognizes {importable!r} as an importable package, + but it is not listed in the `packages` configuration of setuptools. + + {importable!r} has been automatically added to the distribution only + because it may contain data files, but this behavior is likely to change + in future versions of setuptools (and therefore is considered deprecated). + + Please make sure that {importable!r} is included as a package by using + the `packages` configuration field or the proper discovery methods + (for example by using `find_namespace_packages(...)`/`find_namespace:` + instead of `find_packages(...)`/`find:`). + + You can read more about "package discovery" and "data files" on setuptools + documentation page. + \n\n!! + """ + + def __init__(self): + self._already_warned = set() + + def is_module(self, file): + return file.endswith(".py") and file[:-len(".py")].isidentifier() + + def importable_subpackage(self, parent, file): + pkg = Path(file).parent + parts = list(itertools.takewhile(str.isidentifier, pkg.parts)) + if parts: + return ".".join([parent, *parts]) + return None + + def warn(self, importable): + if importable not in self._already_warned: + msg = textwrap.dedent(self.MESSAGE).format(importable=importable) + warnings.warn(msg, SetuptoolsDeprecationWarning, stacklevel=2) + self._already_warned.add(importable) diff --git a/libs/common/setuptools/command/develop.py b/libs/common/setuptools/command/develop.py index b5619246..24fb0a7c 100644 --- a/libs/common/setuptools/command/develop.py +++ b/libs/common/setuptools/command/develop.py @@ -5,15 +5,11 @@ import os import glob import io -from setuptools.extern import six - import pkg_resources from setuptools.command.easy_install import easy_install from setuptools import namespaces import setuptools -__metaclass__ = type - class develop(namespaces.DevelopInstaller, easy_install): """Set up package for development""" @@ -67,7 +63,8 @@ class develop(namespaces.DevelopInstaller, easy_install): target = pkg_resources.normalize_path(self.egg_base) egg_path = pkg_resources.normalize_path( - os.path.join(self.install_dir, self.egg_path)) + os.path.join(self.install_dir, self.egg_path) + ) if egg_path != target: raise DistutilsOptionError( "--egg-path must be a relative path from the install" @@ -78,7 +75,7 @@ class develop(namespaces.DevelopInstaller, easy_install): self.dist = pkg_resources.Distribution( target, pkg_resources.PathMetadata(target, os.path.abspath(ei.egg_info)), - project_name=ei.egg_name + project_name=ei.egg_name, ) self.setup_path = self._resolve_setup_path( @@ -103,43 +100,19 @@ class develop(namespaces.DevelopInstaller, easy_install): if resolved != pkg_resources.normalize_path(os.curdir): raise DistutilsOptionError( "Can't get a consistent path to setup script from" - " installation directory", resolved, - pkg_resources.normalize_path(os.curdir)) + " installation directory", + resolved, + pkg_resources.normalize_path(os.curdir), + ) return path_to_setup def install_for_development(self): - if not six.PY2 and getattr(self.distribution, 'use_2to3', False): - # If we run 2to3 we can not do this inplace: + self.run_command('egg_info') - # Ensure metadata is up-to-date - self.reinitialize_command('build_py', inplace=0) - self.run_command('build_py') - bpy_cmd = self.get_finalized_command("build_py") - build_path = pkg_resources.normalize_path(bpy_cmd.build_lib) + # Build extensions in-place + self.reinitialize_command('build_ext', inplace=1) + self.run_command('build_ext') - # Build extensions - self.reinitialize_command('egg_info', egg_base=build_path) - self.run_command('egg_info') - - self.reinitialize_command('build_ext', inplace=0) - self.run_command('build_ext') - - # Fixup egg-link and easy-install.pth - ei_cmd = self.get_finalized_command("egg_info") - self.egg_path = build_path - self.dist.location = build_path - # XXX - self.dist._provider = pkg_resources.PathMetadata( - build_path, ei_cmd.egg_info) - else: - # Without 2to3 inplace works fine: - self.run_command('egg_info') - - # Build extensions in-place - self.reinitialize_command('build_ext', inplace=1) - self.run_command('build_ext') - - self.install_site_py() # ensure that target dir is site-safe if setuptools.bootstrap_install_from: self.easy_install(setuptools.bootstrap_install_from) setuptools.bootstrap_install_from = None @@ -161,8 +134,7 @@ class develop(namespaces.DevelopInstaller, easy_install): egg_link_file = open(self.egg_link) contents = [line.rstrip() for line in egg_link_file] egg_link_file.close() - if contents not in ([self.egg_path], - [self.egg_path, self.setup_path]): + if contents not in ([self.egg_path], [self.egg_path, self.setup_path]): log.warn("Link points to %s: uninstall aborted", contents) return if not self.dry_run: diff --git a/libs/common/setuptools/command/dist_info.py b/libs/common/setuptools/command/dist_info.py index c45258fa..0685c945 100644 --- a/libs/common/setuptools/command/dist_info.py +++ b/libs/common/setuptools/command/dist_info.py @@ -4,9 +4,18 @@ As defined in the wheel specification """ import os +import re +import shutil +import sys +import warnings +from contextlib import contextmanager +from inspect import cleandoc +from pathlib import Path from distutils.core import Command from distutils import log +from setuptools.extern import packaging +from setuptools._deprecation_warning import SetuptoolsDeprecationWarning class dist_info(Command): @@ -15,22 +24,119 @@ class dist_info(Command): user_options = [ ('egg-base=', 'e', "directory containing .egg-info directories" - " (default: top of the source tree)"), + " (default: top of the source tree)" + " DEPRECATED: use --output-dir."), + ('output-dir=', 'o', "directory inside of which the .dist-info will be" + "created (default: top of the source tree)"), + ('tag-date', 'd', "Add date stamp (e.g. 20050528) to version number"), + ('tag-build=', 'b', "Specify explicit tag to add to version number"), + ('no-date', 'D', "Don't include date stamp [default]"), + ('keep-egg-info', None, "*TRANSITIONAL* will be removed in the future"), ] + boolean_options = ['tag-date', 'keep-egg-info'] + negative_opt = {'no-date': 'tag-date'} + def initialize_options(self): self.egg_base = None + self.output_dir = None + self.name = None + self.dist_info_dir = None + self.tag_date = None + self.tag_build = None + self.keep_egg_info = False def finalize_options(self): - pass + if self.egg_base: + msg = "--egg-base is deprecated for dist_info command. Use --output-dir." + warnings.warn(msg, SetuptoolsDeprecationWarning) + self.output_dir = self.egg_base or self.output_dir + + dist = self.distribution + project_dir = dist.src_root or os.curdir + self.output_dir = Path(self.output_dir or project_dir) + + egg_info = self.reinitialize_command("egg_info") + egg_info.egg_base = str(self.output_dir) + + if self.tag_date: + egg_info.tag_date = self.tag_date + else: + self.tag_date = egg_info.tag_date + + if self.tag_build: + egg_info.tag_build = self.tag_build + else: + self.tag_build = egg_info.tag_build + + egg_info.finalize_options() + self.egg_info = egg_info + + name = _safe(dist.get_name()) + version = _version(dist.get_version()) + self.name = f"{name}-{version}" + self.dist_info_dir = os.path.join(self.output_dir, f"{self.name}.dist-info") + + @contextmanager + def _maybe_bkp_dir(self, dir_path: str, requires_bkp: bool): + if requires_bkp: + bkp_name = f"{dir_path}.__bkp__" + _rm(bkp_name, ignore_errors=True) + _copy(dir_path, bkp_name, dirs_exist_ok=True, symlinks=True) + try: + yield + finally: + _rm(dir_path, ignore_errors=True) + shutil.move(bkp_name, dir_path) + else: + yield def run(self): - egg_info = self.get_finalized_command('egg_info') - egg_info.egg_base = self.egg_base - egg_info.finalize_options() - egg_info.run() - dist_info_dir = egg_info.egg_info[:-len('.egg-info')] + '.dist-info' - log.info("creating '{}'".format(os.path.abspath(dist_info_dir))) + self.output_dir.mkdir(parents=True, exist_ok=True) + self.egg_info.run() + egg_info_dir = self.egg_info.egg_info + assert os.path.isdir(egg_info_dir), ".egg-info dir should have been created" + log.info("creating '{}'".format(os.path.abspath(self.dist_info_dir))) bdist_wheel = self.get_finalized_command('bdist_wheel') - bdist_wheel.egg2dist(egg_info.egg_info, dist_info_dir) + + # TODO: if bdist_wheel if merged into setuptools, just add "keep_egg_info" there + with self._maybe_bkp_dir(egg_info_dir, self.keep_egg_info): + bdist_wheel.egg2dist(egg_info_dir, self.dist_info_dir) + + +def _safe(component: str) -> str: + """Escape a component used to form a wheel name according to PEP 491""" + return re.sub(r"[^\w\d.]+", "_", component) + + +def _version(version: str) -> str: + """Convert an arbitrary string to a version string.""" + v = version.replace(' ', '.') + try: + return str(packaging.version.Version(v)).replace("-", "_") + except packaging.version.InvalidVersion: + msg = f"""Invalid version: {version!r}. + !!\n\n + ################### + # Invalid version # + ################### + {version!r} is not valid according to PEP 440.\n + Please make sure specify a valid version for your package. + Also note that future releases of setuptools may halt the build process + if an invalid version is given. + \n\n!! + """ + warnings.warn(cleandoc(msg)) + return _safe(v).strip("_") + + +def _rm(dir_name, **opts): + if os.path.isdir(dir_name): + shutil.rmtree(dir_name, **opts) + + +def _copy(src, dst, **opts): + if sys.version_info < (3, 8): + opts.pop("dirs_exist_ok", None) + shutil.copytree(src, dst, **opts) diff --git a/libs/common/setuptools/command/easy_install.py b/libs/common/setuptools/command/easy_install.py index 426301d6..444d3b33 100644 --- a/libs/common/setuptools/command/easy_install.py +++ b/libs/common/setuptools/command/easy_install.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ Easy Install ------------ @@ -7,7 +6,7 @@ A tool for doing automatic download/extract/build of distutils-based Python packages. For detailed documentation, see the accompanying EasyInstall.txt file, or visit the `EasyInstall home page`__. -__ https://setuptools.readthedocs.io/en/latest/easy_install.html +__ https://setuptools.pypa.io/en/latest/deprecated/easy_install.html """ @@ -18,10 +17,10 @@ from distutils.errors import ( DistutilsArgError, DistutilsOptionError, DistutilsError, DistutilsPlatformError, ) -from distutils.command.install import INSTALL_SCHEMES, SCHEME_KEYS from distutils import log, dir_util from distutils.command.build_scripts import first_line_re from distutils.spawn import find_executable +from distutils.command import install import sys import os import zipimport @@ -39,18 +38,16 @@ import contextlib import subprocess import shlex import io +import configparser +import sysconfig -from sysconfig import get_config_vars, get_path +from sysconfig import get_path from setuptools import SetuptoolsDeprecationWarning -from setuptools.extern import six -from setuptools.extern.six.moves import configparser, map - from setuptools import Command from setuptools.sandbox import run_setup -from setuptools.py27compat import rmtree_safe from setuptools.command import setopt from setuptools.archive_util import unpack_archive from setuptools.package_index import ( @@ -59,21 +56,22 @@ from setuptools.package_index import ( from setuptools.command import bdist_egg, egg_info from setuptools.wheel import Wheel from pkg_resources import ( - yield_lines, normalize_path, resource_string, ensure_directory, + normalize_path, resource_string, get_distribution, find_distributions, Environment, Requirement, Distribution, PathMetadata, EggMetadata, WorkingSet, DistributionNotFound, VersionConflict, DEVELOP_DIST, ) -import pkg_resources.py31compat +import pkg_resources +from .._path import ensure_directory +from ..extern.jaraco.text import yield_lines -__metaclass__ = type # Turn on PEP440Warnings warnings.filterwarnings("default", category=pkg_resources.PEP440Warning) __all__ = [ - 'samefile', 'easy_install', 'PthDistributions', 'extract_wininst_cfg', - 'main', 'get_exe_prefixes', + 'easy_install', 'PthDistributions', 'extract_wininst_cfg', + 'get_exe_prefixes', ] @@ -81,47 +79,20 @@ def is_64bit(): return struct.calcsize("P") == 8 -def samefile(p1, p2): - """ - Determine if two paths reference the same file. - - Augments os.path.samefile to work on Windows and - suppresses errors if the path doesn't exist. - """ - both_exist = os.path.exists(p1) and os.path.exists(p2) - use_samefile = hasattr(os.path, 'samefile') and both_exist - if use_samefile: - return os.path.samefile(p1, p2) - norm_p1 = os.path.normpath(os.path.normcase(p1)) - norm_p2 = os.path.normpath(os.path.normcase(p2)) - return norm_p1 == norm_p2 +def _to_bytes(s): + return s.encode('utf8') -if six.PY2: - - def _to_bytes(s): - return s - - def isascii(s): - try: - six.text_type(s, 'ascii') - return True - except UnicodeError: - return False -else: - - def _to_bytes(s): - return s.encode('utf8') - - def isascii(s): - try: - s.encode('ascii') - return True - except UnicodeError: - return False +def isascii(s): + try: + s.encode('ascii') + return True + except UnicodeError: + return False -_one_liner = lambda text: textwrap.dedent(text).strip().replace('\n', '; ') +def _one_liner(text): + return textwrap.dedent(text).strip().replace('\n', '; ') class easy_install(Command): @@ -156,23 +127,26 @@ class easy_install(Command): "allow building eggs from local checkouts"), ('version', None, "print version information and exit"), ('no-find-links', None, - "Don't load find-links defined in packages being installed") + "Don't load find-links defined in packages being installed"), + ('user', None, "install in user site-package '%s'" % site.USER_SITE) ] boolean_options = [ 'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy', 'editable', - 'no-deps', 'local-snapshots-ok', 'version' + 'no-deps', 'local-snapshots-ok', 'version', + 'user' ] - if site.ENABLE_USER_SITE: - help_msg = "install in user site-package '%s'" % site.USER_SITE - user_options.append(('user', None, help_msg)) - boolean_options.append('user') - negative_opt = {'always-unzip': 'zip-ok'} create_index = PackageIndex def initialize_options(self): + warnings.warn( + "easy_install command is deprecated. " + "Use build and pip and other standards-based tools.", + EasyInstallDeprecationWarning, + ) + # the --user option seems to be an opt-in one, # so the default should be False. self.user = 0 @@ -195,12 +169,8 @@ class easy_install(Command): self.install_data = None self.install_base = None self.install_platbase = None - if site.ENABLE_USER_SITE: - self.install_userbase = site.USER_BASE - self.install_usersite = site.USER_SITE - else: - self.install_userbase = None - self.install_usersite = None + self.install_userbase = site.USER_BASE + self.install_usersite = site.USER_SITE self.no_find_links = None # Options not specifiable via command line @@ -208,7 +178,6 @@ class easy_install(Command): self.pth_file = self.always_copy_from = None self.site_dirs = None self.installed_projects = {} - self.sitepy_installed = False # Always read easy_install options, even if we are subclassed, or have # an independent instance created. This ensures that defaults will # always come from the standard configuration file(s)' "easy_install" @@ -247,30 +216,43 @@ class easy_install(Command): print(tmpl.format(**locals())) raise SystemExit() - def finalize_options(self): + def finalize_options(self): # noqa: C901 # is too complex (25) # FIXME self.version and self._render_version() py_version = sys.version.split()[0] - prefix, exec_prefix = get_config_vars('prefix', 'exec_prefix') - self.config_vars = { + self.config_vars = dict(sysconfig.get_config_vars()) + + self.config_vars.update({ 'dist_name': self.distribution.get_name(), 'dist_version': self.distribution.get_version(), 'dist_fullname': self.distribution.get_fullname(), 'py_version': py_version, - 'py_version_short': py_version[0:3], - 'py_version_nodot': py_version[0] + py_version[2], - 'sys_prefix': prefix, - 'prefix': prefix, - 'sys_exec_prefix': exec_prefix, - 'exec_prefix': exec_prefix, + 'py_version_short': f'{sys.version_info.major}.{sys.version_info.minor}', + 'py_version_nodot': f'{sys.version_info.major}{sys.version_info.minor}', + 'sys_prefix': self.config_vars['prefix'], + 'sys_exec_prefix': self.config_vars['exec_prefix'], # Only python 3.2+ has abiflags 'abiflags': getattr(sys, 'abiflags', ''), - } + 'platlibdir': getattr(sys, 'platlibdir', 'lib'), + }) + with contextlib.suppress(AttributeError): + # only for distutils outside stdlib + self.config_vars.update({ + 'implementation_lower': install._get_implementation().lower(), + 'implementation': install._get_implementation(), + }) - if site.ENABLE_USER_SITE: - self.config_vars['userbase'] = self.install_userbase - self.config_vars['usersite'] = self.install_usersite + # pypa/distutils#113 Python 3.9 compat + self.config_vars.setdefault( + 'py_version_nodot_plat', + getattr(sys, 'windir', '').replace('.', ''), + ) + + self.config_vars['userbase'] = self.install_userbase + self.config_vars['usersite'] = self.install_usersite + if self.user and not site.ENABLE_USER_SITE: + log.warn("WARNING: The user site-packages directory is disabled.") self._fix_install_dir_for_user_site() @@ -305,27 +287,14 @@ class easy_install(Command): self.script_dir = self.install_scripts # default --record from the install command self.set_undefined_options('install', ('record', 'record')) - # Should this be moved to the if statement below? It's not used - # elsewhere - normpath = map(normalize_path, sys.path) self.all_site_dirs = get_site_dirs() - if self.site_dirs is not None: - site_dirs = [ - os.path.expanduser(s.strip()) for s in - self.site_dirs.split(',') - ] - for d in site_dirs: - if not os.path.isdir(d): - log.warn("%s (in --site-dirs) does not exist", d) - elif normalize_path(d) not in normpath: - raise DistutilsOptionError( - d + " (in --site-dirs) is not on sys.path" - ) - else: - self.all_site_dirs.append(normalize_path(d)) + self.all_site_dirs.extend(self._process_site_dirs(self.site_dirs)) + if not self.editable: self.check_site_dir() - self.index_url = self.index_url or "https://pypi.org/simple/" + default_index = os.getenv("__EASYINSTALL_INDEX", "https://pypi.org/simple/") + # ^ Private API for testing purposes only + self.index_url = self.index_url or default_index self.shadow_path = self.all_site_dirs[:] for path_item in self.install_dir, normalize_path(self.script_dir): if path_item not in self.shadow_path: @@ -342,7 +311,7 @@ class easy_install(Command): self.local_index = Environment(self.shadow_path + sys.path) if self.find_links is not None: - if isinstance(self.find_links, six.string_types): + if isinstance(self.find_links, str): self.find_links = self.find_links.split() else: self.find_links = [] @@ -351,13 +320,7 @@ class easy_install(Command): if not self.no_find_links: self.package_index.add_find_links(self.find_links) self.set_undefined_options('install_lib', ('optimize', 'optimize')) - if not isinstance(self.optimize, int): - try: - self.optimize = int(self.optimize) - if not (0 <= self.optimize <= 2): - raise ValueError - except ValueError: - raise DistutilsOptionError("--optimize must be 0, 1, or 2") + self.optimize = self._validate_optimize(self.optimize) if self.editable and not self.build_directory: raise DistutilsArgError( @@ -369,11 +332,44 @@ class easy_install(Command): self.outputs = [] + @staticmethod + def _process_site_dirs(site_dirs): + if site_dirs is None: + return + + normpath = map(normalize_path, sys.path) + site_dirs = [ + os.path.expanduser(s.strip()) for s in + site_dirs.split(',') + ] + for d in site_dirs: + if not os.path.isdir(d): + log.warn("%s (in --site-dirs) does not exist", d) + elif normalize_path(d) not in normpath: + raise DistutilsOptionError( + d + " (in --site-dirs) is not on sys.path" + ) + else: + yield normalize_path(d) + + @staticmethod + def _validate_optimize(value): + try: + value = int(value) + if value not in range(3): + raise ValueError + except ValueError as e: + raise DistutilsOptionError( + "--optimize must be 0, 1, or 2" + ) from e + + return value + def _fix_install_dir_for_user_site(self): """ Fix the install_dir if "--user" was used. """ - if not self.user or not site.ENABLE_USER_SITE: + if not self.user: return self.create_home_path() @@ -381,7 +377,7 @@ class easy_install(Command): msg = "User base directory is not specified" raise DistutilsPlatformError(msg) self.install_base = self.install_platbase = self.install_userbase - scheme_name = os.name.replace('posix', 'unix') + '_user' + scheme_name = f'{os.name}_user' self.select_scheme(scheme_name) def _expand_attrs(self, attrs): @@ -414,8 +410,8 @@ class easy_install(Command): if show_deprecation: self.announce( "WARNING: The easy_install command is deprecated " - "and will be removed in a future version." - , log.WARN, + "and will be removed in a future version.", + log.WARN, ) if self.verbose != self.distribution.verbose: log.set_verbosity(self.verbose) @@ -453,12 +449,18 @@ class easy_install(Command): def warn_deprecated_options(self): pass - def check_site_dir(self): + def check_site_dir(self): # noqa: C901 # is too complex (12) # FIXME """Verify that self.install_dir is .pth-capable dir, if needed""" instdir = normalize_path(self.install_dir) pth_file = os.path.join(instdir, 'easy-install.pth') + if not os.path.exists(instdir): + try: + os.makedirs(instdir) + except (OSError, IOError): + self.cant_write_to_target() + # Is it a configured, PYTHONPATH, implicit, or explicit site dir? is_site_dir = instdir in self.all_site_dirs @@ -478,8 +480,9 @@ class easy_install(Command): self.cant_write_to_target() if not is_site_dir and not self.multi_version: - # Can't install non-multi to non-site dir - raise DistutilsError(self.no_default_version_msg()) + # Can't install non-multi to non-site dir with easy_install + pythonpath = os.environ.get('PYTHONPATH', '') + log.warn(self.__no_default_msg, self.install_dir, pythonpath) if is_site_dir: if self.pth_file is None: @@ -487,12 +490,8 @@ class easy_install(Command): else: self.pth_file = None - if instdir not in map(normalize_path, _pythonpath()): - # only PYTHONPATH dirs need a site.py, so pretend it's there - self.sitepy_installed = True - elif self.multi_version and not os.path.exists(pth_file): - self.sitepy_installed = True # don't need site.py in this case - self.pth_file = None # and don't create a .pth file + if self.multi_version and not os.path.exists(pth_file): + self.pth_file = None # don't create a .pth file self.install_dir = instdir __cant_write_msg = textwrap.dedent(""" @@ -507,13 +506,13 @@ class easy_install(Command): the distutils default setting) was: %s - """).lstrip() + """).lstrip() # noqa __not_exists_id = textwrap.dedent(""" This directory does not currently exist. Please create it and try again, or choose a different installation directory (using the -d or --install-dir option). - """).lstrip() + """).lstrip() # noqa __access_msg = textwrap.dedent(""" Perhaps your account does not have write access to this directory? If the @@ -526,10 +525,10 @@ class easy_install(Command): For information on other options, you may wish to consult the documentation at: - https://setuptools.readthedocs.io/en/latest/easy_install.html + https://setuptools.pypa.io/en/latest/deprecated/easy_install.html Please make the appropriate changes for your system and try again. - """).lstrip() + """).lstrip() # noqa def cant_write_to_target(self): msg = self.__cant_write_msg % (sys.exc_info()[1], self.install_dir,) @@ -557,7 +556,7 @@ class easy_install(Command): if ok_exists: os.unlink(ok_file) dirname = os.path.dirname(ok_file) - pkg_resources.py31compat.makedirs(dirname, exist_ok=True) + os.makedirs(dirname, exist_ok=True) f = open(pth_file, 'w') except (OSError, IOError): self.cant_write_to_target() @@ -646,12 +645,9 @@ class easy_install(Command): # cast to str as workaround for #709 and #710 and #712 yield str(tmpdir) finally: - os.path.exists(tmpdir) and rmtree(rmtree_safe(tmpdir)) + os.path.exists(tmpdir) and rmtree(tmpdir) def easy_install(self, spec, deps=False): - if not self.editable: - self.install_site_py() - with self._tmpdir() as tmpdir: if not isinstance(spec, Requirement): if URL_SCHEME(spec): @@ -721,15 +717,16 @@ class easy_install(Command): return dist def select_scheme(self, name): - """Sets the install directories by applying the install schemes.""" - # it's the caller's problem if they supply a bad name! - scheme = INSTALL_SCHEMES[name] - for key in SCHEME_KEYS: - attrname = 'install_' + key - if getattr(self, attrname) is None: - setattr(self, attrname, scheme[key]) + try: + install._select_scheme(self, name) + except AttributeError: + # stdlib distutils + install.install.select_scheme(self, name.replace('posix', 'unix')) - def process_distribution(self, requirement, dist, deps=True, *info): + # FIXME: 'easy_install.process_distribution' is too complex (12) + def process_distribution( # noqa: C901 + self, requirement, dist, deps=True, *info, + ): self.update_pth(dist) self.package_index.add(dist) if dist in self.local_index[dist.key]: @@ -758,9 +755,9 @@ class easy_install(Command): [requirement], self.local_index, self.easy_install ) except DistributionNotFound as e: - raise DistutilsError(str(e)) + raise DistutilsError(str(e)) from e except VersionConflict as e: - raise DistutilsError(e.report()) + raise DistutilsError(e.report()) from e if self.always_copy or self.always_copy_from: # Force all the relevant distros to be copied or activated for dist in distros: @@ -853,12 +850,19 @@ class easy_install(Command): def install_eggs(self, spec, dist_filename, tmpdir): # .egg dirs or files are already built, so just return them - if dist_filename.lower().endswith('.egg'): - return [self.install_egg(dist_filename, tmpdir)] - elif dist_filename.lower().endswith('.exe'): - return [self.install_exe(dist_filename, tmpdir)] - elif dist_filename.lower().endswith('.whl'): - return [self.install_wheel(dist_filename, tmpdir)] + installer_map = { + '.egg': self.install_egg, + '.exe': self.install_exe, + '.whl': self.install_wheel, + } + try: + install_dist = installer_map[ + dist_filename.lower()[-4:] + ] + except KeyError: + pass + else: + return [install_dist(dist_filename, tmpdir)] # Anything else, try to extract and build setup_base = tmpdir @@ -903,7 +907,8 @@ class easy_install(Command): metadata = EggMetadata(zipimport.zipimporter(egg_path)) return Distribution.from_filename(egg_path, metadata=metadata) - def install_egg(self, egg_path, tmpdir): + # FIXME: 'easy_install.install_egg' is too complex (11) + def install_egg(self, egg_path, tmpdir): # noqa: C901 destination = os.path.join( self.install_dir, os.path.basename(egg_path), @@ -913,7 +918,9 @@ class easy_install(Command): ensure_directory(destination) dist = self.egg_distribution(egg_path) - if not samefile(egg_path, destination): + if not ( + os.path.exists(destination) and os.path.samefile(egg_path, destination) + ): if os.path.isdir(destination) and not os.path.islink(destination): dir_util.remove_tree(destination, dry_run=self.dry_run) elif os.path.exists(destination): @@ -1002,7 +1009,8 @@ class easy_install(Command): # install the .egg return self.install_egg(egg_path, tmpdir) - def exe_to_egg(self, dist_filename, egg_tmp): + # FIXME: 'easy_install.exe_to_egg' is too complex (12) + def exe_to_egg(self, dist_filename, egg_tmp): # noqa: C901 """Extract a bdist_wininst to the directories an egg would use""" # Check for .pth file and set up prefix translations prefixes = get_exe_prefixes(dist_filename) @@ -1093,13 +1101,13 @@ class easy_install(Command): pkg_resources.require("%(name)s") # latest installed version pkg_resources.require("%(name)s==%(version)s") # this exact version pkg_resources.require("%(name)s>=%(version)s") # this version or higher - """).lstrip() + """).lstrip() # noqa __id_warning = textwrap.dedent(""" Note also that the installation directory must be on sys.path at runtime for this to work. (e.g. by being the application's script directory, by being on PYTHONPATH, or by being added to sys.path by your code.) - """) + """) # noqa def installation_report(self, req, dist, what="Installed"): """Helpful installation message for display to package users""" @@ -1124,7 +1132,7 @@ class easy_install(Command): %(python)s setup.py develop See the setuptools documentation for the "develop" command for more info. - """).lstrip() + """).lstrip() # noqa def report_editable(self, spec, setup_script): dirname = os.path.dirname(setup_script) @@ -1149,7 +1157,9 @@ class easy_install(Command): try: run_setup(setup_script, args) except SystemExit as v: - raise DistutilsError("Setup script exited with %s" % (v.args[0],)) + raise DistutilsError( + "Setup script exited with %s" % (v.args[0],) + ) from v def build_and_install(self, setup_script, setup_base): args = ['bdist_egg', '--dist-dir'] @@ -1192,22 +1202,24 @@ class easy_install(Command): for key, val in ei_opts.items(): if key not in fetch_directives: continue - fetch_options[key.replace('_', '-')] = val[1] + fetch_options[key] = val[1] # create a settings dictionary suitable for `edit_config` settings = dict(easy_install=fetch_options) cfg_filename = os.path.join(base, 'setup.cfg') setopt.edit_config(cfg_filename, settings) - def update_pth(self, dist): + def update_pth(self, dist): # noqa: C901 # is too complex (11) # FIXME if self.pth_file is None: return for d in self.pth_file[dist.key]: # drop old entries - if self.multi_version or d.location != dist.location: - log.info("Removing %s from easy-install.pth file", d) - self.pth_file.remove(d) - if d.location in self.shadow_path: - self.shadow_path.remove(d.location) + if not self.multi_version and d.location == dist.location: + continue + + log.info("Removing %s from easy-install.pth file", d) + self.pth_file.remove(d) + if d.location in self.shadow_path: + self.shadow_path.remove(d.location) if not self.multi_version: if dist.location in self.pth_file.paths: @@ -1221,19 +1233,21 @@ class easy_install(Command): if dist.location not in self.shadow_path: self.shadow_path.append(dist.location) - if not self.dry_run: + if self.dry_run: + return - self.pth_file.save() + self.pth_file.save() - if dist.key == 'setuptools': - # Ensure that setuptools itself never becomes unavailable! - # XXX should this check for latest version? - filename = os.path.join(self.install_dir, 'setuptools.pth') - if os.path.islink(filename): - os.unlink(filename) - f = open(filename, 'wt') - f.write(self.pth_file.make_relative(dist.location) + '\n') - f.close() + if dist.key != 'setuptools': + return + + # Ensure that setuptools itself never becomes unavailable! + # XXX should this check for latest version? + filename = os.path.join(self.install_dir, 'setuptools.pth') + if os.path.islink(filename): + os.unlink(filename) + with open(filename, 'wt') as f: + f.write(self.pth_file.make_relative(dist.location) + '\n') def unpack_progress(self, src, dst): # Progress filter for unpacking @@ -1304,53 +1318,18 @@ class easy_install(Command): * You can set up the installation directory to support ".pth" files by using one of the approaches described here: - https://setuptools.readthedocs.io/en/latest/easy_install.html#custom-installation-locations + https://setuptools.pypa.io/en/latest/deprecated/easy_install.html#custom-installation-locations - Please make the appropriate changes for your system and try again.""").lstrip() - - def no_default_version_msg(self): - template = self.__no_default_msg - return template % (self.install_dir, os.environ.get('PYTHONPATH', '')) - - def install_site_py(self): - """Make sure there's a site.py in the target dir, if needed""" - - if self.sitepy_installed: - return # already did it, or don't need to - - sitepy = os.path.join(self.install_dir, "site.py") - source = resource_string("setuptools", "site-patch.py") - source = source.decode('utf-8') - current = "" - - if os.path.exists(sitepy): - log.debug("Checking existing site.py in %s", self.install_dir) - with io.open(sitepy) as strm: - current = strm.read() - - if not current.startswith('def __boot():'): - raise DistutilsError( - "%s is not a setuptools-generated site.py; please" - " remove it." % sitepy - ) - - if current != source: - log.info("Creating %s", sitepy) - if not self.dry_run: - ensure_directory(sitepy) - with io.open(sitepy, 'w', encoding='utf-8') as strm: - strm.write(source) - self.byte_compile([sitepy]) - - self.sitepy_installed = True + Please make the appropriate changes for your system and try again. + """).strip() def create_home_path(self): """Create directories under ~.""" if not self.user: return home = convert_path(os.path.expanduser("~")) - for name, path in six.iteritems(self.config_vars): + for path in only_strs(self.config_vars.values()): if path.startswith(home) and not os.path.isdir(path): self.debug_print("os.makedirs('%s', 0o700)" % path) os.makedirs(path, 0o700) @@ -1372,7 +1351,7 @@ class easy_install(Command): if self.prefix: # Set default install_dir/scripts from --prefix - config_vars = config_vars.copy() + config_vars = dict(config_vars) config_vars['base'] = self.prefix scheme = self.INSTALL_SCHEMES.get(os.name, self.DEFAULT_SCHEME) for attr, val in scheme.items(): @@ -1409,58 +1388,63 @@ def get_site_dirs(): if sys.exec_prefix != sys.prefix: prefixes.append(sys.exec_prefix) for prefix in prefixes: - if prefix: - if sys.platform in ('os2emx', 'riscos'): - sitedirs.append(os.path.join(prefix, "Lib", "site-packages")) - elif os.sep == '/': - sitedirs.extend([ - os.path.join( - prefix, - "lib", - "python{}.{}".format(*sys.version_info), - "site-packages", - ), - os.path.join(prefix, "lib", "site-python"), - ]) - else: - sitedirs.extend([ + if not prefix: + continue + + if sys.platform in ('os2emx', 'riscos'): + sitedirs.append(os.path.join(prefix, "Lib", "site-packages")) + elif os.sep == '/': + sitedirs.extend([ + os.path.join( prefix, - os.path.join(prefix, "lib", "site-packages"), - ]) - if sys.platform == 'darwin': - # for framework builds *only* we add the standard Apple - # locations. Currently only per-user, but /Library and - # /Network/Library could be added too - if 'Python.framework' in prefix: - home = os.environ.get('HOME') - if home: - home_sp = os.path.join( - home, - 'Library', - 'Python', - '{}.{}'.format(*sys.version_info), - 'site-packages', - ) - sitedirs.append(home_sp) + "lib", + "python{}.{}".format(*sys.version_info), + "site-packages", + ), + os.path.join(prefix, "lib", "site-python"), + ]) + else: + sitedirs.extend([ + prefix, + os.path.join(prefix, "lib", "site-packages"), + ]) + if sys.platform != 'darwin': + continue + + # for framework builds *only* we add the standard Apple + # locations. Currently only per-user, but /Library and + # /Network/Library could be added too + if 'Python.framework' not in prefix: + continue + + home = os.environ.get('HOME') + if not home: + continue + + home_sp = os.path.join( + home, + 'Library', + 'Python', + '{}.{}'.format(*sys.version_info), + 'site-packages', + ) + sitedirs.append(home_sp) lib_paths = get_path('purelib'), get_path('platlib') - for site_lib in lib_paths: - if site_lib not in sitedirs: - sitedirs.append(site_lib) + + sitedirs.extend(s for s in lib_paths if s not in sitedirs) if site.ENABLE_USER_SITE: sitedirs.append(site.USER_SITE) - try: + with contextlib.suppress(AttributeError): sitedirs.extend(site.getsitepackages()) - except AttributeError: - pass sitedirs = list(map(normalize_path, sitedirs)) return sitedirs -def expand_paths(inputs): +def expand_paths(inputs): # noqa: C901 # is too complex (11) # FIXME """Yield sys.path directories that might contain "old-style" packages""" seen = {} @@ -1492,13 +1476,18 @@ def expand_paths(inputs): # Yield existing non-dupe, non-import directory lines from it for line in lines: - if not line.startswith("import"): - line = normalize_path(line.rstrip()) - if line not in seen: - seen[line] = 1 - if not os.path.isdir(line): - continue - yield line, os.listdir(line) + if line.startswith("import"): + continue + + line = normalize_path(line.rstrip()) + if line in seen: + continue + + seen[line] = 1 + if not os.path.isdir(line): + continue + + yield line, os.listdir(line) def extract_wininst_cfg(dist_filename): @@ -1531,7 +1520,7 @@ def extract_wininst_cfg(dist_filename): # Now the config is in bytes, but for RawConfigParser, it should # be text, so decode it. config = config.decode(sys.getfilesystemencoding()) - cfg.readfp(six.StringIO(config)) + cfg.read_file(io.StringIO(config)) except configparser.Error: return None if not cfg.has_section('metadata') or not cfg.has_section('Setup'): @@ -1566,9 +1555,7 @@ def get_exe_prefixes(exe_filename): if name.endswith('-nspkg.pth'): continue if parts[0].upper() in ('PURELIB', 'PLATLIB'): - contents = z.read(name) - if not six.PY2: - contents = contents.decode() + contents = z.read(name).decode() for pth in yield_lines(contents): pth = pth.strip().replace('\\', '/') if not pth.startswith('import'): @@ -1591,7 +1578,7 @@ class PthDistributions(Environment): self.sitedirs = list(map(normalize_path, sitedirs)) self.basedir = normalize_path(os.path.dirname(self.filename)) self._load() - Environment.__init__(self, [], None, None) + super().__init__([], None, None) for path in yield_lines(self.paths): list(map(self.add, find_distributions(path, True))) @@ -1664,14 +1651,14 @@ class PthDistributions(Environment): if new_path: self.paths.append(dist.location) self.dirty = True - Environment.add(self, dist) + super().add(dist) def remove(self, dist): """Remove `dist` from the distribution map""" while dist.location in self.paths: self.paths.remove(dist.location) self.dirty = True - Environment.remove(self, dist) + super().remove(dist) def make_relative(self, path): npath, last = os.path.split(normalize_path(path)) @@ -1732,7 +1719,8 @@ def auto_chmod(func, arg, exc): chmod(arg, stat.S_IWRITE) return func(arg) et, ev, _ = sys.exc_info() - six.reraise(et, (ev[0], ev[1] + (" %s %s" % (func, arg)))) + # TODO: This code doesn't make sense. What is it trying to do? + raise (ev[0], ev[1] + (" %s %s" % (func, arg))) def update_dist_caches(dist_path, fix_zipimporter_caches): @@ -2068,17 +2056,38 @@ class ScriptWriter: template = textwrap.dedent(r""" # EASY-INSTALL-ENTRY-SCRIPT: %(spec)r,%(group)r,%(name)r - __requires__ = %(spec)r import re import sys - from pkg_resources import load_entry_point + + # for compatibility with easy_install; see #2198 + __requires__ = %(spec)r + + try: + from importlib.metadata import distribution + except ImportError: + try: + from importlib_metadata import distribution + except ImportError: + from pkg_resources import load_entry_point + + + def importlib_load_entry_point(spec, group, name): + dist_name, _, _ = spec.partition('==') + matches = ( + entry_point + for entry_point in distribution(dist_name).entry_points + if entry_point.group == group and entry_point.name == name + ) + return next(matches).load() + + + globals().setdefault('load_entry_point', importlib_load_entry_point) + if __name__ == '__main__': sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) - sys.exit( - load_entry_point(%(spec)r, %(group)r, %(name)r)() - ) - """).lstrip() + sys.exit(load_entry_point(%(spec)r, %(group)r, %(name)r)()) + """).lstrip() command_spec_class = CommandSpec @@ -2093,7 +2102,8 @@ class ScriptWriter: @classmethod def get_script_header(cls, script_text, executable=None, wininst=False): # for backward compatibility - warnings.warn("Use get_header", EasyInstallDeprecationWarning, stacklevel=2) + warnings.warn( + "Use get_header", EasyInstallDeprecationWarning, stacklevel=2) if wininst: executable = "python.exe" return cls.get_header(script_text, executable) @@ -2195,7 +2205,7 @@ class WindowsScriptWriter(ScriptWriter): @classmethod def _adjust_header(cls, type_, orig_header): """ - Make sure 'pythonw' is used for gui and and 'python' is used for + Make sure 'pythonw' is used for gui and 'python' is used for console (regardless of what sys.executable is). """ pattern = 'pythonw.exe' @@ -2265,7 +2275,10 @@ def get_win_launcher(type): """ launcher_fn = '%s.exe' % type if is_64bit(): - launcher_fn = launcher_fn.replace(".", "-64.") + if get_platform() == "win-arm64": + launcher_fn = launcher_fn.replace(".", "-arm64.") + else: + launcher_fn = launcher_fn.replace(".", "-64.") else: launcher_fn = launcher_fn.replace(".", "-32.") return resource_string('setuptools', launcher_fn) @@ -2273,10 +2286,7 @@ def get_win_launcher(type): def load_launcher_manifest(name): manifest = pkg_resources.resource_string(__name__, 'launcher manifest.xml') - if six.PY2: - return manifest % vars() - else: - return manifest.decode('utf-8') % vars() + return manifest.decode('utf-8') % vars() def rmtree(path, ignore_errors=False, onerror=auto_chmod): @@ -2289,59 +2299,14 @@ def current_umask(): return tmp -def bootstrap(): - # This function is called when setuptools*.egg is run using /bin/sh - import setuptools +def only_strs(values): + """ + Exclude non-str values. Ref #3063. + """ + return filter(lambda val: isinstance(val, str), values) - argv0 = os.path.dirname(setuptools.__path__[0]) - sys.argv[0] = argv0 - sys.argv.append(argv0) - main() - - -def main(argv=None, **kw): - from setuptools import setup - from setuptools.dist import Distribution - - class DistributionWithoutHelpCommands(Distribution): - common_usage = "" - - def _show_help(self, *args, **kw): - with _patch_usage(): - Distribution._show_help(self, *args, **kw) - - if argv is None: - argv = sys.argv[1:] - - with _patch_usage(): - setup( - script_args=['-q', 'easy_install', '-v'] + argv, - script_name=sys.argv[0] or 'easy_install', - distclass=DistributionWithoutHelpCommands, - **kw - ) - - -@contextlib.contextmanager -def _patch_usage(): - import distutils.core - USAGE = textwrap.dedent(""" - usage: %(script)s [options] requirement_or_url ... - or: %(script)s --help - """).lstrip() - - def gen_usage(script_name): - return USAGE % dict( - script=os.path.basename(script_name), - ) - - saved = distutils.core.gen_usage - distutils.core.gen_usage = gen_usage - try: - yield - finally: - distutils.core.gen_usage = saved class EasyInstallDeprecationWarning(SetuptoolsDeprecationWarning): - """Class for warning about deprecations in EasyInstall in SetupTools. Not ignored by default, unlike DeprecationWarning.""" - + """ + Warning for EasyInstall deprecations, bypassing suppression. + """ diff --git a/libs/common/setuptools/command/editable_wheel.py b/libs/common/setuptools/command/editable_wheel.py new file mode 100644 index 00000000..d60cfbeb --- /dev/null +++ b/libs/common/setuptools/command/editable_wheel.py @@ -0,0 +1,844 @@ +""" +Create a wheel that, when installed, will make the source package 'editable' +(add it to the interpreter's path, including metadata) per PEP 660. Replaces +'setup.py develop'. + +.. note:: + One of the mechanisms briefly mentioned in PEP 660 to implement editable installs is + to create a separated directory inside ``build`` and use a .pth file to point to that + directory. In the context of this file such directory is referred as + *auxiliary build directory* or ``auxiliary_dir``. +""" + +import logging +import os +import re +import shutil +import sys +import traceback +import warnings +from contextlib import suppress +from enum import Enum +from inspect import cleandoc +from itertools import chain +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import ( + TYPE_CHECKING, + Dict, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, + TypeVar, + Union, +) + +from setuptools import Command, SetuptoolsDeprecationWarning, errors, namespaces +from setuptools.command.build_py import build_py as build_py_cls +from setuptools.discovery import find_package_path +from setuptools.dist import Distribution + +if TYPE_CHECKING: + from wheel.wheelfile import WheelFile # noqa + +if sys.version_info >= (3, 8): + from typing import Protocol +elif TYPE_CHECKING: + from typing_extensions import Protocol +else: + from abc import ABC as Protocol + +_Path = Union[str, Path] +_P = TypeVar("_P", bound=_Path) +_logger = logging.getLogger(__name__) + + +class _EditableMode(Enum): + """ + Possible editable installation modes: + `lenient` (new files automatically added to the package - DEFAULT); + `strict` (requires a new installation when files are added/removed); or + `compat` (attempts to emulate `python setup.py develop` - DEPRECATED). + """ + + STRICT = "strict" + LENIENT = "lenient" + COMPAT = "compat" # TODO: Remove `compat` after Dec/2022. + + @classmethod + def convert(cls, mode: Optional[str]) -> "_EditableMode": + if not mode: + return _EditableMode.LENIENT # default + + _mode = mode.upper() + if _mode not in _EditableMode.__members__: + raise errors.OptionError(f"Invalid editable mode: {mode!r}. Try: 'strict'.") + + if _mode == "COMPAT": + msg = """ + The 'compat' editable mode is transitional and will be removed + in future versions of `setuptools`. + Please adapt your code accordingly to use either the 'strict' or the + 'lenient' modes. + + For more information, please check: + https://setuptools.pypa.io/en/latest/userguide/development_mode.html + """ + warnings.warn(msg, SetuptoolsDeprecationWarning) + + return _EditableMode[_mode] + + +_STRICT_WARNING = """ +New or renamed files may not be automatically picked up without a new installation. +""" + +_LENIENT_WARNING = """ +Options like `package-data`, `include/exclude-package-data` or +`packages.find.exclude/include` may have no effect. +""" + + +class editable_wheel(Command): + """Build 'editable' wheel for development. + (This command is reserved for internal use of setuptools). + """ + + description = "create a PEP 660 'editable' wheel" + + user_options = [ + ("dist-dir=", "d", "directory to put final built distributions in"), + ("dist-info-dir=", "I", "path to a pre-build .dist-info directory"), + ("mode=", None, cleandoc(_EditableMode.__doc__ or "")), + ] + + def initialize_options(self): + self.dist_dir = None + self.dist_info_dir = None + self.project_dir = None + self.mode = None + + def finalize_options(self): + dist = self.distribution + self.project_dir = dist.src_root or os.curdir + self.package_dir = dist.package_dir or {} + self.dist_dir = Path(self.dist_dir or os.path.join(self.project_dir, "dist")) + + def run(self): + try: + self.dist_dir.mkdir(exist_ok=True) + self._ensure_dist_info() + + # Add missing dist_info files + self.reinitialize_command("bdist_wheel") + bdist_wheel = self.get_finalized_command("bdist_wheel") + bdist_wheel.write_wheelfile(self.dist_info_dir) + + self._create_wheel_file(bdist_wheel) + except Exception as ex: + traceback.print_exc() + msg = """ + Support for editable installs via PEP 660 was recently introduced + in `setuptools`. If you are seeing this error, please report to: + + https://github.com/pypa/setuptools/issues + + Meanwhile you can try the legacy behavior by setting an + environment variable and trying to install again: + + SETUPTOOLS_ENABLE_FEATURES="legacy-editable" + """ + raise errors.InternalError(cleandoc(msg)) from ex + + def _ensure_dist_info(self): + if self.dist_info_dir is None: + dist_info = self.reinitialize_command("dist_info") + dist_info.output_dir = self.dist_dir + dist_info.ensure_finalized() + dist_info.run() + self.dist_info_dir = dist_info.dist_info_dir + else: + assert str(self.dist_info_dir).endswith(".dist-info") + assert Path(self.dist_info_dir, "METADATA").exists() + + def _install_namespaces(self, installation_dir, pth_prefix): + # XXX: Only required to support the deprecated namespace practice + dist = self.distribution + if not dist.namespace_packages: + return + + src_root = Path(self.project_dir, self.package_dir.get("", ".")).resolve() + installer = _NamespaceInstaller(dist, installation_dir, pth_prefix, src_root) + installer.install_namespaces() + + def _find_egg_info_dir(self) -> Optional[str]: + parent_dir = Path(self.dist_info_dir).parent if self.dist_info_dir else Path() + candidates = map(str, parent_dir.glob("*.egg-info")) + return next(candidates, None) + + def _configure_build( + self, name: str, unpacked_wheel: _Path, build_lib: _Path, tmp_dir: _Path + ): + """Configure commands to behave in the following ways: + + - Build commands can write to ``build_lib`` if they really want to... + (but this folder is expected to be ignored and modules are expected to live + in the project directory...) + - Binary extensions should be built in-place (editable_mode = True) + - Data/header/script files are not part of the "editable" specification + so they are written directly to the unpacked_wheel directory. + """ + # Non-editable files (data, headers, scripts) are written directly to the + # unpacked_wheel + + dist = self.distribution + wheel = str(unpacked_wheel) + build_lib = str(build_lib) + data = str(Path(unpacked_wheel, f"{name}.data", "data")) + headers = str(Path(unpacked_wheel, f"{name}.data", "headers")) + scripts = str(Path(unpacked_wheel, f"{name}.data", "scripts")) + + # egg-info may be generated again to create a manifest (used for package data) + egg_info = dist.reinitialize_command("egg_info", reinit_subcommands=True) + egg_info.egg_base = str(tmp_dir) + egg_info.ignore_egg_info_in_manifest = True + + build = dist.reinitialize_command("build", reinit_subcommands=True) + install = dist.reinitialize_command("install", reinit_subcommands=True) + + build.build_platlib = build.build_purelib = build.build_lib = build_lib + install.install_purelib = install.install_platlib = install.install_lib = wheel + install.install_scripts = build.build_scripts = scripts + install.install_headers = headers + install.install_data = data + + install_scripts = dist.get_command_obj("install_scripts") + install_scripts.no_ep = True + + build.build_temp = str(tmp_dir) + + build_py = dist.get_command_obj("build_py") + build_py.compile = False + build_py.existing_egg_info_dir = self._find_egg_info_dir() + + self._set_editable_mode() + + build.ensure_finalized() + install.ensure_finalized() + + def _set_editable_mode(self): + """Set the ``editable_mode`` flag in the build sub-commands""" + dist = self.distribution + build = dist.get_command_obj("build") + for cmd_name in build.get_sub_commands(): + cmd = dist.get_command_obj(cmd_name) + if hasattr(cmd, "editable_mode"): + cmd.editable_mode = True + elif hasattr(cmd, "inplace"): + cmd.inplace = True # backward compatibility with distutils + + def _collect_build_outputs(self) -> Tuple[List[str], Dict[str, str]]: + files: List[str] = [] + mapping: Dict[str, str] = {} + build = self.get_finalized_command("build") + + for cmd_name in build.get_sub_commands(): + cmd = self.get_finalized_command(cmd_name) + if hasattr(cmd, "get_outputs"): + files.extend(cmd.get_outputs() or []) + if hasattr(cmd, "get_output_mapping"): + mapping.update(cmd.get_output_mapping() or {}) + + return files, mapping + + def _run_build_commands( + self, dist_name: str, unpacked_wheel: _Path, build_lib: _Path, tmp_dir: _Path + ) -> Tuple[List[str], Dict[str, str]]: + self._configure_build(dist_name, unpacked_wheel, build_lib, tmp_dir) + self._run_build_subcommands() + files, mapping = self._collect_build_outputs() + self._run_install("headers") + self._run_install("scripts") + self._run_install("data") + return files, mapping + + def _run_build_subcommands(self): + """ + Issue #3501 indicates that some plugins/customizations might rely on: + + 1. ``build_py`` not running + 2. ``build_py`` always copying files to ``build_lib`` + + However both these assumptions may be false in editable_wheel. + This method implements a temporary workaround to support the ecosystem + while the implementations catch up. + """ + # TODO: Once plugins/customisations had the chance to catch up, replace + # `self._run_build_subcommands()` with `self.run_command("build")`. + # Also remove _safely_run, TestCustomBuildPy. Suggested date: Aug/2023. + build: Command = self.get_finalized_command("build") + for name in build.get_sub_commands(): + cmd = self.get_finalized_command(name) + if name == "build_py" and type(cmd) != build_py_cls: + self._safely_run(name) + else: + self.run_command(name) + + def _safely_run(self, cmd_name: str): + try: + return self.run_command(cmd_name) + except Exception: + msg = f"""{traceback.format_exc()}\n + If you are seeing this warning it is very likely that a setuptools + plugin or customization overrides the `{cmd_name}` command, without + taking into consideration how editable installs run build steps + starting from v64.0.0. + + Plugin authors and developers relying on custom build steps are encouraged + to update their `{cmd_name}` implementation considering the information in + https://setuptools.pypa.io/en/latest/userguide/extension.html + about editable installs. + + For the time being `setuptools` will silence this error and ignore + the faulty command, but this behaviour will change in future versions.\n + """ + warnings.warn(msg, SetuptoolsDeprecationWarning, stacklevel=2) + + def _create_wheel_file(self, bdist_wheel): + from wheel.wheelfile import WheelFile + + dist_info = self.get_finalized_command("dist_info") + dist_name = dist_info.name + tag = "-".join(bdist_wheel.get_tag()) + build_tag = "0.editable" # According to PEP 427 needs to start with digit + archive_name = f"{dist_name}-{build_tag}-{tag}.whl" + wheel_path = Path(self.dist_dir, archive_name) + if wheel_path.exists(): + wheel_path.unlink() + + unpacked_wheel = TemporaryDirectory(suffix=archive_name) + build_lib = TemporaryDirectory(suffix=".build-lib") + build_tmp = TemporaryDirectory(suffix=".build-temp") + + with unpacked_wheel as unpacked, build_lib as lib, build_tmp as tmp: + unpacked_dist_info = Path(unpacked, Path(self.dist_info_dir).name) + shutil.copytree(self.dist_info_dir, unpacked_dist_info) + self._install_namespaces(unpacked, dist_info.name) + files, mapping = self._run_build_commands(dist_name, unpacked, lib, tmp) + strategy = self._select_strategy(dist_name, tag, lib) + with strategy, WheelFile(wheel_path, "w") as wheel_obj: + strategy(wheel_obj, files, mapping) + wheel_obj.write_files(unpacked) + + return wheel_path + + def _run_install(self, category: str): + has_category = getattr(self.distribution, f"has_{category}", None) + if has_category and has_category(): + _logger.info(f"Installing {category} as non editable") + self.run_command(f"install_{category}") + + def _select_strategy( + self, + name: str, + tag: str, + build_lib: _Path, + ) -> "EditableStrategy": + """Decides which strategy to use to implement an editable installation.""" + build_name = f"__editable__.{name}-{tag}" + project_dir = Path(self.project_dir) + mode = _EditableMode.convert(self.mode) + + if mode is _EditableMode.STRICT: + auxiliary_dir = _empty_dir(Path(self.project_dir, "build", build_name)) + return _LinkTree(self.distribution, name, auxiliary_dir, build_lib) + + packages = _find_packages(self.distribution) + has_simple_layout = _simple_layout(packages, self.package_dir, project_dir) + is_compat_mode = mode is _EditableMode.COMPAT + if set(self.package_dir) == {""} and has_simple_layout or is_compat_mode: + # src-layout(ish) is relatively safe for a simple pth file + src_dir = self.package_dir.get("", ".") + return _StaticPth(self.distribution, name, [Path(project_dir, src_dir)]) + + # Use a MetaPathFinder to avoid adding accidental top-level packages/modules + return _TopLevelFinder(self.distribution, name) + + +class EditableStrategy(Protocol): + def __call__(self, wheel: "WheelFile", files: List[str], mapping: Dict[str, str]): + ... + + def __enter__(self): + ... + + def __exit__(self, _exc_type, _exc_value, _traceback): + ... + + +class _StaticPth: + def __init__(self, dist: Distribution, name: str, path_entries: List[Path]): + self.dist = dist + self.name = name + self.path_entries = path_entries + + def __call__(self, wheel: "WheelFile", files: List[str], mapping: Dict[str, str]): + entries = "\n".join((str(p.resolve()) for p in self.path_entries)) + contents = bytes(f"{entries}\n", "utf-8") + wheel.writestr(f"__editable__.{self.name}.pth", contents) + + def __enter__(self): + msg = f""" + Editable install will be performed using .pth file to extend `sys.path` with: + {list(map(os.fspath, self.path_entries))!r} + """ + _logger.warning(msg + _LENIENT_WARNING) + return self + + def __exit__(self, _exc_type, _exc_value, _traceback): + ... + + +class _LinkTree(_StaticPth): + """ + Creates a ``.pth`` file that points to a link tree in the ``auxiliary_dir``. + + This strategy will only link files (not dirs), so it can be implemented in + any OS, even if that means using hardlinks instead of symlinks. + + By collocating ``auxiliary_dir`` and the original source code, limitations + with hardlinks should be avoided. + """ + def __init__( + self, dist: Distribution, + name: str, + auxiliary_dir: _Path, + build_lib: _Path, + ): + self.auxiliary_dir = Path(auxiliary_dir) + self.build_lib = Path(build_lib).resolve() + self._file = dist.get_command_obj("build_py").copy_file + super().__init__(dist, name, [self.auxiliary_dir]) + + def __call__(self, wheel: "WheelFile", files: List[str], mapping: Dict[str, str]): + self._create_links(files, mapping) + super().__call__(wheel, files, mapping) + + def _normalize_output(self, file: str) -> Optional[str]: + # Files relative to build_lib will be normalized to None + with suppress(ValueError): + path = Path(file).resolve().relative_to(self.build_lib) + return str(path).replace(os.sep, '/') + return None + + def _create_file(self, relative_output: str, src_file: str, link=None): + dest = self.auxiliary_dir / relative_output + if not dest.parent.is_dir(): + dest.parent.mkdir(parents=True) + self._file(src_file, dest, link=link) + + def _create_links(self, outputs, output_mapping): + self.auxiliary_dir.mkdir(parents=True, exist_ok=True) + link_type = "sym" if _can_symlink_files(self.auxiliary_dir) else "hard" + mappings = { + self._normalize_output(k): v + for k, v in output_mapping.items() + } + mappings.pop(None, None) # remove files that are not relative to build_lib + + for output in outputs: + relative = self._normalize_output(output) + if relative and relative not in mappings: + self._create_file(relative, output) + + for relative, src in mappings.items(): + self._create_file(relative, src, link=link_type) + + def __enter__(self): + msg = "Strict editable install will be performed using a link tree.\n" + _logger.warning(msg + _STRICT_WARNING) + return self + + def __exit__(self, _exc_type, _exc_value, _traceback): + msg = f"""\n + Strict editable installation performed using the auxiliary directory: + {self.auxiliary_dir} + + Please be careful to not remove this directory, otherwise you might not be able + to import/use your package. + """ + warnings.warn(msg, InformationOnly) + + +class _TopLevelFinder: + def __init__(self, dist: Distribution, name: str): + self.dist = dist + self.name = name + + def __call__(self, wheel: "WheelFile", files: List[str], mapping: Dict[str, str]): + src_root = self.dist.src_root or os.curdir + top_level = chain(_find_packages(self.dist), _find_top_level_modules(self.dist)) + package_dir = self.dist.package_dir or {} + roots = _find_package_roots(top_level, package_dir, src_root) + + namespaces_: Dict[str, List[str]] = dict(chain( + _find_namespaces(self.dist.packages or [], roots), + ((ns, []) for ns in _find_virtual_namespaces(roots)), + )) + + name = f"__editable__.{self.name}.finder" + finder = _make_identifier(name) + content = bytes(_finder_template(name, roots, namespaces_), "utf-8") + wheel.writestr(f"{finder}.py", content) + + content = bytes(f"import {finder}; {finder}.install()", "utf-8") + wheel.writestr(f"__editable__.{self.name}.pth", content) + + def __enter__(self): + msg = "Editable install will be performed using a meta path finder.\n" + _logger.warning(msg + _LENIENT_WARNING) + return self + + def __exit__(self, _exc_type, _exc_value, _traceback): + msg = """\n + Please be careful with folders in your working directory with the same + name as your package as they may take precedence during imports. + """ + warnings.warn(msg, InformationOnly) + + +def _can_symlink_files(base_dir: Path) -> bool: + with TemporaryDirectory(dir=str(base_dir.resolve())) as tmp: + path1, path2 = Path(tmp, "file1.txt"), Path(tmp, "file2.txt") + path1.write_text("file1", encoding="utf-8") + with suppress(AttributeError, NotImplementedError, OSError): + os.symlink(path1, path2) + if path2.is_symlink() and path2.read_text(encoding="utf-8") == "file1": + return True + + try: + os.link(path1, path2) # Ensure hard links can be created + except Exception as ex: + msg = ( + "File system does not seem to support either symlinks or hard links. " + "Strict editable installs require one of them to be supported." + ) + raise LinksNotSupported(msg) from ex + return False + + +def _simple_layout( + packages: Iterable[str], package_dir: Dict[str, str], project_dir: Path +) -> bool: + """Return ``True`` if: + - all packages are contained by the same parent directory, **and** + - all packages become importable if the parent directory is added to ``sys.path``. + + >>> _simple_layout(['a'], {"": "src"}, "/tmp/myproj") + True + >>> _simple_layout(['a', 'a.b'], {"": "src"}, "/tmp/myproj") + True + >>> _simple_layout(['a', 'a.b'], {}, "/tmp/myproj") + True + >>> _simple_layout(['a', 'a.a1', 'a.a1.a2', 'b'], {"": "src"}, "/tmp/myproj") + True + >>> _simple_layout(['a', 'a.a1', 'a.a1.a2', 'b'], {"a": "a", "b": "b"}, ".") + True + >>> _simple_layout(['a', 'a.a1', 'a.a1.a2', 'b'], {"a": "_a", "b": "_b"}, ".") + False + >>> _simple_layout(['a', 'a.a1', 'a.a1.a2', 'b'], {"a": "_a"}, "/tmp/myproj") + False + >>> _simple_layout(['a', 'a.a1', 'a.a1.a2', 'b'], {"a.a1.a2": "_a2"}, ".") + False + >>> _simple_layout(['a', 'a.b'], {"": "src", "a.b": "_ab"}, "/tmp/myproj") + False + >>> # Special cases, no packages yet: + >>> _simple_layout([], {"": "src"}, "/tmp/myproj") + True + >>> _simple_layout([], {"a": "_a", "": "src"}, "/tmp/myproj") + False + """ + layout = { + pkg: find_package_path(pkg, package_dir, project_dir) + for pkg in packages + } + if not layout: + return set(package_dir) in ({}, {""}) + parent = os.path.commonpath([_parent_path(k, v) for k, v in layout.items()]) + return all( + _normalize_path(Path(parent, *key.split('.'))) == _normalize_path(value) + for key, value in layout.items() + ) + + +def _parent_path(pkg, pkg_path): + """Infer the parent path containing a package, that if added to ``sys.path`` would + allow importing that package. + When ``pkg`` is directly mapped into a directory with a different name, return its + own path. + >>> _parent_path("a", "src/a") + 'src' + >>> _parent_path("b", "src/c") + 'src/c' + """ + parent = pkg_path[:-len(pkg)] if pkg_path.endswith(pkg) else pkg_path + return parent.rstrip("/" + os.sep) + + +def _find_packages(dist: Distribution) -> Iterator[str]: + yield from iter(dist.packages or []) + + py_modules = dist.py_modules or [] + nested_modules = [mod for mod in py_modules if "." in mod] + if dist.ext_package: + yield dist.ext_package + else: + ext_modules = dist.ext_modules or [] + nested_modules += [x.name for x in ext_modules if "." in x.name] + + for module in nested_modules: + package, _, _ = module.rpartition(".") + yield package + + +def _find_top_level_modules(dist: Distribution) -> Iterator[str]: + py_modules = dist.py_modules or [] + yield from (mod for mod in py_modules if "." not in mod) + + if not dist.ext_package: + ext_modules = dist.ext_modules or [] + yield from (x.name for x in ext_modules if "." not in x.name) + + +def _find_package_roots( + packages: Iterable[str], + package_dir: Mapping[str, str], + src_root: _Path, +) -> Dict[str, str]: + pkg_roots: Dict[str, str] = { + pkg: _absolute_root(find_package_path(pkg, package_dir, src_root)) + for pkg in sorted(packages) + } + + return _remove_nested(pkg_roots) + + +def _absolute_root(path: _Path) -> str: + """Works for packages and top-level modules""" + path_ = Path(path) + parent = path_.parent + + if path_.exists(): + return str(path_.resolve()) + else: + return str(parent.resolve() / path_.name) + + +def _find_virtual_namespaces(pkg_roots: Dict[str, str]) -> Iterator[str]: + """By carefully designing ``package_dir``, it is possible to implement the logical + structure of PEP 420 in a package without the corresponding directories. + + Moreover a parent package can be purposefully/accidentally skipped in the discovery + phase (e.g. ``find_packages(include=["mypkg.*"])``, when ``mypkg.foo`` is included + by ``mypkg`` itself is not). + We consider this case to also be a virtual namespace (ignoring the original + directory) to emulate a non-editable installation. + + This function will try to find these kinds of namespaces. + """ + for pkg in pkg_roots: + if "." not in pkg: + continue + parts = pkg.split(".") + for i in range(len(parts) - 1, 0, -1): + partial_name = ".".join(parts[:i]) + path = Path(find_package_path(partial_name, pkg_roots, "")) + if not path.exists() or partial_name not in pkg_roots: + # partial_name not in pkg_roots ==> purposefully/accidentally skipped + yield partial_name + + +def _find_namespaces( + packages: List[str], pkg_roots: Dict[str, str] +) -> Iterator[Tuple[str, List[str]]]: + for pkg in packages: + path = find_package_path(pkg, pkg_roots, "") + if Path(path).exists() and not Path(path, "__init__.py").exists(): + yield (pkg, [path]) + + +def _remove_nested(pkg_roots: Dict[str, str]) -> Dict[str, str]: + output = dict(pkg_roots.copy()) + + for pkg, path in reversed(list(pkg_roots.items())): + if any( + pkg != other and _is_nested(pkg, path, other, other_path) + for other, other_path in pkg_roots.items() + ): + output.pop(pkg) + + return output + + +def _is_nested(pkg: str, pkg_path: str, parent: str, parent_path: str) -> bool: + """ + Return ``True`` if ``pkg`` is nested inside ``parent`` both logically and in the + file system. + >>> _is_nested("a.b", "path/a/b", "a", "path/a") + True + >>> _is_nested("a.b", "path/a/b", "a", "otherpath/a") + False + >>> _is_nested("a.b", "path/a/b", "c", "path/c") + False + >>> _is_nested("a.a", "path/a/a", "a", "path/a") + True + >>> _is_nested("b.a", "path/b/a", "a", "path/a") + False + """ + norm_pkg_path = _normalize_path(pkg_path) + rest = pkg.replace(parent, "", 1).strip(".").split(".") + return ( + pkg.startswith(parent) + and norm_pkg_path == _normalize_path(Path(parent_path, *rest)) + ) + + +def _normalize_path(filename: _Path) -> str: + """Normalize a file/dir name for comparison purposes""" + # See pkg_resources.normalize_path + file = os.path.abspath(filename) if sys.platform == 'cygwin' else filename + return os.path.normcase(os.path.realpath(os.path.normpath(file))) + + +def _empty_dir(dir_: _P) -> _P: + """Create a directory ensured to be empty. Existing files may be removed.""" + shutil.rmtree(dir_, ignore_errors=True) + os.makedirs(dir_) + return dir_ + + +def _make_identifier(name: str) -> str: + """Make a string safe to be used as Python identifier. + >>> _make_identifier("12abc") + '_12abc' + >>> _make_identifier("__editable__.myns.pkg-78.9.3_local") + '__editable___myns_pkg_78_9_3_local' + """ + safe = re.sub(r'\W|^(?=\d)', '_', name) + assert safe.isidentifier() + return safe + + +class _NamespaceInstaller(namespaces.Installer): + def __init__(self, distribution, installation_dir, editable_name, src_root): + self.distribution = distribution + self.src_root = src_root + self.installation_dir = installation_dir + self.editable_name = editable_name + self.outputs = [] + self.dry_run = False + + def _get_target(self): + """Installation target.""" + return os.path.join(self.installation_dir, self.editable_name) + + def _get_root(self): + """Where the modules/packages should be loaded from.""" + return repr(str(self.src_root)) + + +_FINDER_TEMPLATE = """\ +import sys +from importlib.machinery import ModuleSpec +from importlib.machinery import all_suffixes as module_suffixes +from importlib.util import spec_from_file_location +from itertools import chain +from pathlib import Path + +MAPPING = {mapping!r} +NAMESPACES = {namespaces!r} +PATH_PLACEHOLDER = {name!r} + ".__path_hook__" + + +class _EditableFinder: # MetaPathFinder + @classmethod + def find_spec(cls, fullname, path=None, target=None): + for pkg, pkg_path in reversed(list(MAPPING.items())): + if fullname == pkg or fullname.startswith(f"{{pkg}}."): + rest = fullname.replace(pkg, "", 1).strip(".").split(".") + return cls._find_spec(fullname, Path(pkg_path, *rest)) + + return None + + @classmethod + def _find_spec(cls, fullname, candidate_path): + init = candidate_path / "__init__.py" + candidates = (candidate_path.with_suffix(x) for x in module_suffixes()) + for candidate in chain([init], candidates): + if candidate.exists(): + return spec_from_file_location(fullname, candidate) + + +class _EditableNamespaceFinder: # PathEntryFinder + @classmethod + def _path_hook(cls, path): + if path == PATH_PLACEHOLDER: + return cls + raise ImportError + + @classmethod + def _paths(cls, fullname): + # Ensure __path__ is not empty for the spec to be considered a namespace. + return NAMESPACES[fullname] or MAPPING.get(fullname) or [PATH_PLACEHOLDER] + + @classmethod + def find_spec(cls, fullname, target=None): + if fullname in NAMESPACES: + spec = ModuleSpec(fullname, None, is_package=True) + spec.submodule_search_locations = cls._paths(fullname) + return spec + return None + + @classmethod + def find_module(cls, fullname): + return None + + +def install(): + if not any(finder == _EditableFinder for finder in sys.meta_path): + sys.meta_path.append(_EditableFinder) + + if not NAMESPACES: + return + + if not any(hook == _EditableNamespaceFinder._path_hook for hook in sys.path_hooks): + # PathEntryFinder is needed to create NamespaceSpec without private APIS + sys.path_hooks.append(_EditableNamespaceFinder._path_hook) + if PATH_PLACEHOLDER not in sys.path: + sys.path.append(PATH_PLACEHOLDER) # Used just to trigger the path hook +""" + + +def _finder_template( + name: str, mapping: Mapping[str, str], namespaces: Dict[str, List[str]] +) -> str: + """Create a string containing the code for the``MetaPathFinder`` and + ``PathEntryFinder``. + """ + mapping = dict(sorted(mapping.items(), key=lambda p: p[0])) + return _FINDER_TEMPLATE.format(name=name, mapping=mapping, namespaces=namespaces) + + +class InformationOnly(UserWarning): + """Currently there is no clear way of displaying messages to the users + that use the setuptools backend directly via ``pip``. + The only thing that might work is a warning, although it is not the + most appropriate tool for the job... + """ + + +class LinksNotSupported(errors.FileError): + """File system does not seem to support either symlinks or hard links.""" diff --git a/libs/common/setuptools/command/egg_info.py b/libs/common/setuptools/command/egg_info.py index a5c5a2fc..25888ed8 100644 --- a/libs/common/setuptools/command/egg_info.py +++ b/libs/common/setuptools/command/egg_info.py @@ -8,6 +8,7 @@ from distutils.util import convert_path from distutils import log import distutils.errors import distutils.filelist +import functools import os import re import sys @@ -16,8 +17,8 @@ import warnings import time import collections -from setuptools.extern import six -from setuptools.extern.six.moves import map +from .._importlib import metadata +from .. import _entry_points from setuptools import Command from setuptools.command.sdist import sdist @@ -25,15 +26,17 @@ from setuptools.command.sdist import walk_revctrl from setuptools.command.setopt import edit_config from setuptools.command import bdist_egg from pkg_resources import ( - parse_requirements, safe_name, parse_version, - safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename) + Requirement, safe_name, parse_version, + safe_version, to_filename) import setuptools.unicode_utils as unicode_utils from setuptools.glob import glob from setuptools.extern import packaging +from setuptools.extern.jaraco.text import yield_lines from setuptools import SetuptoolsDeprecationWarning -def translate_pattern(glob): + +def translate_pattern(glob): # noqa: C901 # is too complex (14) # FIXME """ Translate a file path glob like '*.txt' in to a regular expression. This differs from fnmatch.translate which allows wildcards to match @@ -113,7 +116,7 @@ def translate_pattern(glob): pat += sep pat += r'\Z' - return re.compile(pat, flags=re.MULTILINE|re.DOTALL) + return re.compile(pat, flags=re.MULTILINE | re.DOTALL) class InfoCommon: @@ -125,14 +128,29 @@ class InfoCommon: return safe_name(self.distribution.get_name()) def tagged_version(self): - version = self.distribution.get_version() - # egg_info may be called more than once for a distribution, - # in which case the version string already contains all tags. - if self.vtags and version.endswith(self.vtags): - return safe_version(version) - return safe_version(version + self.vtags) + return safe_version(self._maybe_tag(self.distribution.get_version())) - def tags(self): + def _maybe_tag(self, version): + """ + egg_info may be called more than once for a distribution, + in which case the version string already contains all tags. + """ + return ( + version if self.vtags and self._already_tagged(version) + else version + self.vtags + ) + + def _already_tagged(self, version: str) -> bool: + # Depending on their format, tags may change with version normalization. + # So in addition the regular tags, we have to search for the normalized ones. + return version.endswith(self.vtags) or version.endswith(self._safe_tags()) + + def _safe_tags(self) -> str: + # To implement this we can rely on `safe_version` pretending to be version 0 + # followed by tags. Then we simply discard the starting 0 (fake version number) + return safe_version(f"0{self.vtags}")[1:] + + def tags(self) -> str: version = '' if self.tag_build: version += self.tag_build @@ -164,6 +182,7 @@ class egg_info(InfoCommon, Command): self.egg_info = None self.egg_version = None self.broken_egg_info = False + self.ignore_egg_info_in_manifest = False #################################### # allow the 'tag_svn_revision' to be detected and @@ -201,17 +220,13 @@ class egg_info(InfoCommon, Command): try: is_version = isinstance(parsed_version, packaging.version.Version) - spec = ( - "%s==%s" if is_version else "%s===%s" - ) - list( - parse_requirements(spec % (self.egg_name, self.egg_version)) - ) - except ValueError: + spec = "%s==%s" if is_version else "%s===%s" + Requirement(spec % (self.egg_name, self.egg_version)) + except ValueError as e: raise distutils.errors.DistutilsOptionError( "Invalid distribution name or version syntax: %s-%s" % (self.egg_name, self.egg_version) - ) + ) from e if self.egg_base is None: dirs = self.distribution.package_dir @@ -266,8 +281,7 @@ class egg_info(InfoCommon, Command): to the file. """ log.info("writing %s to %s", what, filename) - if not six.PY2: - data = data.encode("utf-8") + data = data.encode("utf-8") if not self.dry_run: f = open(filename, 'wb') f.write(data) @@ -282,10 +296,8 @@ class egg_info(InfoCommon, Command): def run(self): self.mkpath(self.egg_info) os.utime(self.egg_info, None) - installer = self.distribution.fetch_build_egg - for ep in iter_entry_points('egg_info.writers'): - ep.require(installer=installer) - writer = ep.resolve() + for ep in metadata.entry_points(group='egg_info.writers'): + writer = ep.load() writer(self, ep.name, os.path.join(self.egg_info, ep.name)) # Get rid of native_libs.txt if it was put there by older bdist_egg @@ -299,6 +311,7 @@ class egg_info(InfoCommon, Command): """Generate SOURCES.txt manifest file""" manifest_filename = os.path.join(self.egg_info, "SOURCES.txt") mm = manifest_maker(self.distribution) + mm.ignore_egg_info_dir = self.ignore_egg_info_in_manifest mm.manifest = manifest_filename mm.run() self.filelist = mm.filelist @@ -322,6 +335,10 @@ class egg_info(InfoCommon, Command): class FileList(_FileList): # Implementations of the various MANIFEST.in commands + def __init__(self, warn=None, debug_print=None, ignore_egg_info_dir=False): + super().__init__(warn, debug_print) + self.ignore_egg_info_dir = ignore_egg_info_dir + def process_template_line(self, line): # Parse the line: split it up, make sure the right number of words # is there, and return the relevant words. 'action' is always @@ -330,70 +347,74 @@ class FileList(_FileList): # patterns, (dir and patterns), or (dir_pattern). (action, patterns, dir, dir_pattern) = self._parse_template_line(line) + action_map = { + 'include': self.include, + 'exclude': self.exclude, + 'global-include': self.global_include, + 'global-exclude': self.global_exclude, + 'recursive-include': functools.partial( + self.recursive_include, dir, + ), + 'recursive-exclude': functools.partial( + self.recursive_exclude, dir, + ), + 'graft': self.graft, + 'prune': self.prune, + } + log_map = { + 'include': "warning: no files found matching '%s'", + 'exclude': ( + "warning: no previously-included files found " + "matching '%s'" + ), + 'global-include': ( + "warning: no files found matching '%s' " + "anywhere in distribution" + ), + 'global-exclude': ( + "warning: no previously-included files matching " + "'%s' found anywhere in distribution" + ), + 'recursive-include': ( + "warning: no files found matching '%s' " + "under directory '%s'" + ), + 'recursive-exclude': ( + "warning: no previously-included files matching " + "'%s' found under directory '%s'" + ), + 'graft': "warning: no directories found matching '%s'", + 'prune': "no previously-included directories found matching '%s'", + } + + try: + process_action = action_map[action] + except KeyError: + raise DistutilsInternalError( + "this cannot happen: invalid action '{action!s}'". + format(action=action), + ) + # OK, now we know that the action is valid and we have the # right number of words on the line for that action -- so we # can proceed with minimal error-checking. - if action == 'include': - self.debug_print("include " + ' '.join(patterns)) - for pattern in patterns: - if not self.include(pattern): - log.warn("warning: no files found matching '%s'", pattern) - elif action == 'exclude': - self.debug_print("exclude " + ' '.join(patterns)) - for pattern in patterns: - if not self.exclude(pattern): - log.warn(("warning: no previously-included files " - "found matching '%s'"), pattern) + action_is_recursive = action.startswith('recursive-') + if action in {'graft', 'prune'}: + patterns = [dir_pattern] + extra_log_args = (dir, ) if action_is_recursive else () + log_tmpl = log_map[action] - elif action == 'global-include': - self.debug_print("global-include " + ' '.join(patterns)) - for pattern in patterns: - if not self.global_include(pattern): - log.warn(("warning: no files found matching '%s' " - "anywhere in distribution"), pattern) - - elif action == 'global-exclude': - self.debug_print("global-exclude " + ' '.join(patterns)) - for pattern in patterns: - if not self.global_exclude(pattern): - log.warn(("warning: no previously-included files matching " - "'%s' found anywhere in distribution"), - pattern) - - elif action == 'recursive-include': - self.debug_print("recursive-include %s %s" % - (dir, ' '.join(patterns))) - for pattern in patterns: - if not self.recursive_include(dir, pattern): - log.warn(("warning: no files found matching '%s' " - "under directory '%s'"), - pattern, dir) - - elif action == 'recursive-exclude': - self.debug_print("recursive-exclude %s %s" % - (dir, ' '.join(patterns))) - for pattern in patterns: - if not self.recursive_exclude(dir, pattern): - log.warn(("warning: no previously-included files matching " - "'%s' found under directory '%s'"), - pattern, dir) - - elif action == 'graft': - self.debug_print("graft " + dir_pattern) - if not self.graft(dir_pattern): - log.warn("warning: no directories found matching '%s'", - dir_pattern) - - elif action == 'prune': - self.debug_print("prune " + dir_pattern) - if not self.prune(dir_pattern): - log.warn(("no previously-included directories found " - "matching '%s'"), dir_pattern) - - else: - raise DistutilsInternalError( - "this cannot happen: invalid action '%s'" % action) + self.debug_print( + ' '.join( + [action] + + ([dir] if action_is_recursive else []) + + patterns, + ) + ) + for pattern in patterns: + if not process_action(pattern): + log.warn(log_tmpl, pattern, *extra_log_args) def _remove_files(self, predicate): """ @@ -507,6 +528,10 @@ class FileList(_FileList): return False try: + # ignore egg-info paths + is_egg_info = ".egg-info" in u_path or b".egg-info" in utf8_path + if self.ignore_egg_info_dir and is_egg_info: + return False # accept is either way checks out if os.path.exists(u_path) or os.path.exists(utf8_path): return True @@ -523,17 +548,19 @@ class manifest_maker(sdist): self.prune = 1 self.manifest_only = 1 self.force_manifest = 1 + self.ignore_egg_info_dir = False def finalize_options(self): pass def run(self): - self.filelist = FileList() + self.filelist = FileList(ignore_egg_info_dir=self.ignore_egg_info_dir) if not os.path.exists(self.manifest): self.write_manifest() # it must exist so it'll get in the list self.add_defaults() if os.path.exists(self.template): self.read_template() + self.add_license_files() self.prune_file_list() self.filelist.sort() self.filelist.remove_duplicates() @@ -568,7 +595,6 @@ class manifest_maker(sdist): def add_defaults(self): sdist.add_defaults(self) - self.check_license() self.filelist.append(self.template) self.filelist.append(self.manifest) rcfiles = list(walk_revctrl()) @@ -585,6 +611,13 @@ class manifest_maker(sdist): ei_cmd = self.get_finalized_command('egg_info') self.filelist.graft(ei_cmd.egg_info) + def add_license_files(self): + license_files = self.distribution.metadata.license_files or [] + for lf in license_files: + log.info("adding license file '%s'", lf) + pass + self.filelist.extend(license_files) + def prune_file_list(self): build = self.get_finalized_command('build') base_dir = self.distribution.get_fullname() @@ -594,6 +627,27 @@ class manifest_maker(sdist): self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, is_regex=1) + def _safe_data_files(self, build_py): + """ + The parent class implementation of this method + (``sdist``) will try to include data files, which + might cause recursion problems when + ``include_package_data=True``. + + Therefore, avoid triggering any attempt of + analyzing/building the manifest again. + """ + if hasattr(build_py, 'get_data_files_without_manifest'): + return build_py.get_data_files_without_manifest() + + warnings.warn( + "Custom 'build_py' does not implement " + "'get_data_files_without_manifest'.\nPlease extend command classes" + " from setuptools instead of distutils.", + SetuptoolsDeprecationWarning + ) + return build_py.get_data_files() + def write_file(filename, contents): """Create a file with the specified name and write 'contents' (a @@ -637,14 +691,16 @@ def warn_depends_obsolete(cmd, basename, filename): def _write_requirements(stream, reqs): lines = yield_lines(reqs or ()) - append_cr = lambda line: line + '\n' + + def append_cr(line): + return line + '\n' lines = map(append_cr, lines) stream.writelines(lines) def write_requirements(cmd, basename, filename): dist = cmd.distribution - data = six.StringIO() + data = io.StringIO() _write_requirements(data, dist.install_requires) extras_require = dist.extras_require or {} for extra in sorted(extras_require): @@ -682,20 +738,9 @@ def write_arg(cmd, basename, filename, force=False): def write_entries(cmd, basename, filename): - ep = cmd.distribution.entry_points - - if isinstance(ep, six.string_types) or ep is None: - data = ep - elif ep is not None: - data = [] - for section, contents in sorted(ep.items()): - if not isinstance(contents, six.string_types): - contents = EntryPoint.parse_group(section, contents) - contents = '\n'.join(sorted(map(str, contents.values()))) - data.append('[%s]\n%s\n\n' % (section, contents)) - data = ''.join(data) - - cmd.write_or_delete_file('entry points', filename, data, True) + eps = _entry_points.load(cmd.distribution.entry_points) + defn = _entry_points.render(eps) + cmd.write_or_delete_file('entry points', filename, defn, True) def get_pkg_info_revision(): @@ -703,7 +748,8 @@ def get_pkg_info_revision(): Get a -r### off of PKG-INFO Version in case this is an sdist of a subversion revision. """ - warnings.warn("get_pkg_info_revision is deprecated.", EggInfoDeprecationWarning) + warnings.warn( + "get_pkg_info_revision is deprecated.", EggInfoDeprecationWarning) if os.path.exists('PKG-INFO'): with io.open('PKG-INFO') as f: for line in f: @@ -714,4 +760,4 @@ def get_pkg_info_revision(): class EggInfoDeprecationWarning(SetuptoolsDeprecationWarning): - """Class for warning about deprecations in eggInfo in setupTools. Not ignored by default, unlike DeprecationWarning.""" + """Deprecated behavior warning for EggInfo, bypassing suppression.""" diff --git a/libs/common/setuptools/command/install.py b/libs/common/setuptools/command/install.py index 72b9a3e4..55fdb124 100644 --- a/libs/common/setuptools/command/install.py +++ b/libs/common/setuptools/command/install.py @@ -30,6 +30,13 @@ class install(orig.install): _nc = dict(new_commands) def initialize_options(self): + + warnings.warn( + "setup.py install is deprecated. " + "Use build and pip and other standards-based tools.", + setuptools.SetuptoolsDeprecationWarning, + ) + orig.install.initialize_options(self) self.old_and_unmanageable = None self.single_version_externally_managed = None @@ -84,14 +91,21 @@ class install(orig.install): msg = "For best results, pass -X:Frames to enable call stack." warnings.warn(msg) return True - res = inspect.getouterframes(run_frame)[2] - caller, = res[:1] - info = inspect.getframeinfo(caller) - caller_module = caller.f_globals.get('__name__', '') - return ( - caller_module == 'distutils.dist' - and info.function == 'run_commands' - ) + + frames = inspect.getouterframes(run_frame) + for frame in frames[2:4]: + caller, = frame[:1] + info = inspect.getframeinfo(caller) + caller_module = caller.f_globals.get('__name__', '') + + if caller_module == "setuptools.dist" and info.function == "run_command": + # Starting from v61.0.0 setuptools overwrites dist.run_command + continue + + return ( + caller_module == 'distutils.dist' + and info.function == 'run_commands' + ) def do_egg_install(self): diff --git a/libs/common/setuptools/command/install_egg_info.py b/libs/common/setuptools/command/install_egg_info.py index edc4718b..65ede406 100644 --- a/libs/common/setuptools/command/install_egg_info.py +++ b/libs/common/setuptools/command/install_egg_info.py @@ -4,6 +4,7 @@ import os from setuptools import Command from setuptools import namespaces from setuptools.archive_util import unpack_archive +from .._path import ensure_directory import pkg_resources @@ -37,7 +38,7 @@ class install_egg_info(namespaces.Installer, Command): elif os.path.exists(self.target): self.execute(os.unlink, (self.target,), "Removing " + self.target) if not self.dry_run: - pkg_resources.ensure_directory(self.target) + ensure_directory(self.target) self.execute( self.copytree, (), "Copying %s to %s" % (self.source, self.target) ) diff --git a/libs/common/setuptools/command/install_lib.py b/libs/common/setuptools/command/install_lib.py index 07d65933..2e9d8757 100644 --- a/libs/common/setuptools/command/install_lib.py +++ b/libs/common/setuptools/command/install_lib.py @@ -77,7 +77,8 @@ class install_lib(orig.install_lib): if not hasattr(sys, 'implementation'): return - base = os.path.join('__pycache__', '__init__.' + sys.implementation.cache_tag) + base = os.path.join( + '__pycache__', '__init__.' + sys.implementation.cache_tag) yield base + '.pyc' yield base + '.pyo' yield base + '.opt-1.pyc' diff --git a/libs/common/setuptools/command/install_scripts.py b/libs/common/setuptools/command/install_scripts.py index 16234273..aeb0e424 100644 --- a/libs/common/setuptools/command/install_scripts.py +++ b/libs/common/setuptools/command/install_scripts.py @@ -1,9 +1,11 @@ from distutils import log import distutils.command.install_scripts as orig +from distutils.errors import DistutilsModuleError import os import sys -from pkg_resources import Distribution, PathMetadata, ensure_directory +from pkg_resources import Distribution, PathMetadata +from .._path import ensure_directory class install_scripts(orig.install_scripts): @@ -32,8 +34,11 @@ class install_scripts(orig.install_scripts): ) bs_cmd = self.get_finalized_command('build_scripts') exec_param = getattr(bs_cmd, 'executable', None) - bw_cmd = self.get_finalized_command("bdist_wininst") - is_wininst = getattr(bw_cmd, '_is_running', False) + try: + bw_cmd = self.get_finalized_command("bdist_wininst") + is_wininst = getattr(bw_cmd, '_is_running', False) + except (ImportError, DistutilsModuleError): + is_wininst = False writer = ei.ScriptWriter if is_wininst: exec_param = "python.exe" diff --git a/libs/common/setuptools/command/py36compat.py b/libs/common/setuptools/command/py36compat.py index 61063e75..343547a4 100644 --- a/libs/common/setuptools/command/py36compat.py +++ b/libs/common/setuptools/command/py36compat.py @@ -3,8 +3,6 @@ from glob import glob from distutils.util import convert_path from distutils.command import sdist -from setuptools.extern.six.moves import filter - class sdist_add_defaults: """ @@ -132,5 +130,5 @@ class sdist_add_defaults: if hasattr(sdist.sdist, '_add_defaults_standards'): # disable the functionality already available upstream - class sdist_add_defaults: + class sdist_add_defaults: # noqa pass diff --git a/libs/common/setuptools/command/rotate.py b/libs/common/setuptools/command/rotate.py index b89353f5..74795ba9 100644 --- a/libs/common/setuptools/command/rotate.py +++ b/libs/common/setuptools/command/rotate.py @@ -4,8 +4,6 @@ from distutils.errors import DistutilsOptionError import os import shutil -from setuptools.extern import six - from setuptools import Command @@ -36,9 +34,9 @@ class rotate(Command): raise DistutilsOptionError("Must specify number of files to keep") try: self.keep = int(self.keep) - except ValueError: - raise DistutilsOptionError("--keep must be an integer") - if isinstance(self.match, six.string_types): + except ValueError as e: + raise DistutilsOptionError("--keep must be an integer") from e + if isinstance(self.match, str): self.match = [ convert_path(p.strip()) for p in self.match.split(',') ] diff --git a/libs/common/setuptools/command/sdist.py b/libs/common/setuptools/command/sdist.py index 8c3438ea..4a8cde7e 100644 --- a/libs/common/setuptools/command/sdist.py +++ b/libs/common/setuptools/command/sdist.py @@ -4,19 +4,19 @@ import os import sys import io import contextlib - -from setuptools.extern import six, ordered_set +from itertools import chain from .py36compat import sdist_add_defaults -import pkg_resources +from .._importlib import metadata +from .build import _ORIGINAL_SUBCOMMANDS _default_revctrl = list def walk_revctrl(dirname=''): """Find all files under revision control""" - for ep in pkg_resources.iter_entry_points('setuptools.file_finders'): + for ep in metadata.entry_points(group='setuptools.file_finders'): for item in ep.load()(dirname): yield item @@ -33,6 +33,10 @@ class sdist(sdist_add_defaults, orig.sdist): ('dist-dir=', 'd', "directory to put the source distribution archive(s) in " "[default: dist]"), + ('owner=', 'u', + "Owner name used when creating a tar file [default: current user]"), + ('group=', 'g', + "Group name used when creating a tar file [default: current group]"), ] negative_opt = {} @@ -98,34 +102,12 @@ class sdist(sdist_add_defaults, orig.sdist): if orig_val is not NoValue: setattr(os, 'link', orig_val) - def __read_template_hack(self): - # This grody hack closes the template file (MANIFEST.in) if an - # exception occurs during read_template. - # Doing so prevents an error when easy_install attempts to delete the - # file. - try: - orig.sdist.read_template(self) - except Exception: - _, _, tb = sys.exc_info() - tb.tb_next.tb_frame.f_locals['template'].close() - raise - - # Beginning with Python 2.7.2, 3.1.4, and 3.2.1, this leaky file handle - # has been fixed, so only override the method if we're using an earlier - # Python. - has_leaky_handle = ( - sys.version_info < (2, 7, 2) - or (3, 0) <= sys.version_info < (3, 1, 4) - or (3, 2) <= sys.version_info < (3, 2, 1) - ) - if has_leaky_handle: - read_template = __read_template_hack + def add_defaults(self): + super().add_defaults() + self._add_defaults_build_sub_commands() def _add_defaults_optional(self): - if six.PY2: - sdist_add_defaults._add_defaults_optional(self) - else: - super()._add_defaults_optional() + super()._add_defaults_optional() if os.path.isfile('pyproject.toml'): self.filelist.append('pyproject.toml') @@ -136,14 +118,25 @@ class sdist(sdist_add_defaults, orig.sdist): self.filelist.extend(build_py.get_source_files()) self._add_data_files(self._safe_data_files(build_py)) + def _add_defaults_build_sub_commands(self): + build = self.get_finalized_command("build") + missing_cmds = set(build.get_sub_commands()) - _ORIGINAL_SUBCOMMANDS + # ^-- the original built-in sub-commands are already handled by default. + cmds = (self.get_finalized_command(c) for c in missing_cmds) + files = (c.get_source_files() for c in cmds if hasattr(c, "get_source_files")) + self.filelist.extend(chain.from_iterable(files)) + def _safe_data_files(self, build_py): """ - Extracting data_files from build_py is known to cause - infinite recursion errors when `include_package_data` - is enabled, so suppress it in that case. + Since the ``sdist`` class is also used to compute the MANIFEST + (via :obj:`setuptools.command.egg_info.manifest_maker`), + there might be recursion problems when trying to obtain the list of + data_files and ``include_package_data=True`` (which in turn depends on + the files included in the MANIFEST). + + To avoid that, ``manifest_maker`` should be able to overwrite this + method and avoid recursive attempts to build/analyze the MANIFEST. """ - if self.distribution.include_package_data: - return () return build_py.data_files def _add_data_files(self, data_files): @@ -158,10 +151,7 @@ class sdist(sdist_add_defaults, orig.sdist): def _add_defaults_data_files(self): try: - if six.PY2: - sdist_add_defaults._add_defaults_data_files(self) - else: - super()._add_defaults_data_files() + super()._add_defaults_data_files() except TypeError: log.warn("data_files contains unexpected objects") @@ -207,46 +197,14 @@ class sdist(sdist_add_defaults, orig.sdist): manifest = open(self.manifest, 'rb') for line in manifest: # The manifest must contain UTF-8. See #303. - if not six.PY2: - try: - line = line.decode('UTF-8') - except UnicodeDecodeError: - log.warn("%r not UTF-8 decodable -- skipping" % line) - continue + try: + line = line.decode('UTF-8') + except UnicodeDecodeError: + log.warn("%r not UTF-8 decodable -- skipping" % line) + continue # ignore comments and blank lines line = line.strip() if line.startswith('#') or not line: continue self.filelist.append(line) manifest.close() - - def check_license(self): - """Checks if license_file' or 'license_files' is configured and adds any - valid paths to 'self.filelist'. - """ - - files = ordered_set.OrderedSet() - - opts = self.distribution.get_option_dict('metadata') - - # ignore the source of the value - _, license_file = opts.get('license_file', (None, None)) - - if license_file is None: - log.debug("'license_file' option was not specified") - else: - files.add(license_file) - - try: - files.update(self.distribution.metadata.license_files) - except TypeError: - log.warn("warning: 'license_files' option is malformed") - - for f in files: - if not os.path.exists(f): - log.warn( - "warning: Failed to find the configured license file '%s'", - f) - files.remove(f) - - self.filelist.extend(files) diff --git a/libs/common/setuptools/command/setopt.py b/libs/common/setuptools/command/setopt.py index 7e57cc02..6358c045 100644 --- a/libs/common/setuptools/command/setopt.py +++ b/libs/common/setuptools/command/setopt.py @@ -3,8 +3,7 @@ from distutils import log from distutils.errors import DistutilsOptionError import distutils import os - -from setuptools.extern.six.moves import configparser +import configparser from setuptools import Command @@ -40,6 +39,7 @@ def edit_config(filename, settings, dry_run=False): """ log.debug("Reading configuration from %s", filename) opts = configparser.RawConfigParser() + opts.optionxform = lambda x: x opts.read([filename]) for section, options in settings.items(): if options is None: diff --git a/libs/common/setuptools/command/test.py b/libs/common/setuptools/command/test.py index f6470e9c..8dde513c 100644 --- a/libs/common/setuptools/command/test.py +++ b/libs/common/setuptools/command/test.py @@ -8,20 +8,22 @@ from distutils.errors import DistutilsError, DistutilsOptionError from distutils import log from unittest import TestLoader -from setuptools.extern import six -from setuptools.extern.six.moves import map, filter - -from pkg_resources import (resource_listdir, resource_exists, normalize_path, - working_set, _namespace_packages, evaluate_marker, - add_activation_listener, require, EntryPoint) +from pkg_resources import ( + resource_listdir, + resource_exists, + normalize_path, + working_set, + evaluate_marker, + add_activation_listener, + require, +) +from .._importlib import metadata from setuptools import Command -from .build_py import _unique_everseen - -__metaclass__ = type +from setuptools.extern.more_itertools import unique_everseen +from setuptools.extern.jaraco.functools import pass_none class ScanningLoader(TestLoader): - def __init__(self): TestLoader.__init__(self) self._visited = set() @@ -78,8 +80,11 @@ class test(Command): user_options = [ ('test-module=', 'm', "Run 'test_suite' in specified module"), - ('test-suite=', 's', - "Run single test, case or suite (e.g. 'module.test_suite')"), + ( + 'test-suite=', + 's', + "Run single test, case or suite (e.g. 'module.test_suite')", + ), ('test-runner=', 'r', "Test runner to use"), ] @@ -113,7 +118,7 @@ class test(Command): return list(self._test_args()) def _test_args(self): - if not self.test_suite and sys.version_info >= (2, 7): + if not self.test_suite: yield 'discover' if self.verbose: yield '--verbose' @@ -129,30 +134,11 @@ class test(Command): @contextlib.contextmanager def project_on_sys_path(self, include_dists=[]): - with_2to3 = not six.PY2 and getattr(self.distribution, 'use_2to3', False) + self.run_command('egg_info') - if with_2to3: - # If we run 2to3 we can not do this inplace: - - # Ensure metadata is up-to-date - self.reinitialize_command('build_py', inplace=0) - self.run_command('build_py') - bpy_cmd = self.get_finalized_command("build_py") - build_path = normalize_path(bpy_cmd.build_lib) - - # Build extensions - self.reinitialize_command('egg_info', egg_base=build_path) - self.run_command('egg_info') - - self.reinitialize_command('build_ext', inplace=0) - self.run_command('build_ext') - else: - # Without 2to3 inplace works fine: - self.run_command('egg_info') - - # Build extensions in-place - self.reinitialize_command('build_ext', inplace=1) - self.run_command('build_ext') + # Build extensions in-place + self.reinitialize_command('build_ext', inplace=1) + self.run_command('build_ext') ei_cmd = self.get_finalized_command("egg_info") @@ -187,7 +173,7 @@ class test(Command): orig_pythonpath = os.environ.get('PYTHONPATH', nothing) current_pythonpath = os.environ.get('PYTHONPATH', '') try: - prefix = os.pathsep.join(_unique_everseen(paths)) + prefix = os.pathsep.join(unique_everseen(paths)) to_join = filter(None, [prefix, current_pythonpath]) new_path = os.pathsep.join(to_join) if new_path: @@ -208,7 +194,8 @@ class test(Command): ir_d = dist.fetch_build_eggs(dist.install_requires) tr_d = dist.fetch_build_eggs(dist.tests_require or []) er_d = dist.fetch_build_eggs( - v for k, v in dist.extras_require.items() + v + for k, v in dist.extras_require.items() if k.startswith(':') and evaluate_marker(k[1:]) ) return itertools.chain(ir_d, tr_d, er_d) @@ -237,23 +224,10 @@ class test(Command): self.run_tests() def run_tests(self): - # Purge modules under test from sys.modules. The test loader will - # re-import them from the build location. Required when 2to3 is used - # with namespace packages. - if not six.PY2 and getattr(self.distribution, 'use_2to3', False): - module = self.test_suite.split('.')[0] - if module in _namespace_packages: - del_modules = [] - if module in sys.modules: - del_modules.append(module) - module += '.' - for name in sys.modules: - if name.startswith(module): - del_modules.append(name) - list(map(sys.modules.__delitem__, del_modules)) - test = unittest.main( - None, None, self._argv, + None, + None, + self._argv, testLoader=self._resolve_as_ep(self.test_loader), testRunner=self._resolve_as_ep(self.test_runner), exit=False, @@ -268,12 +242,10 @@ class test(Command): return ['unittest'] + self.test_args @staticmethod + @pass_none def _resolve_as_ep(val): """ Load the indicated attribute value, called, as a as if it were specified as an entry point. """ - if val is None: - return - parsed = EntryPoint.parse("x=" + val) - return parsed.resolve()() + return metadata.EntryPoint(value=val, name=None, group=None).load()() diff --git a/libs/common/setuptools/command/upload_docs.py b/libs/common/setuptools/command/upload_docs.py index 130a0cb6..63eb28c7 100644 --- a/libs/common/setuptools/command/upload_docs.py +++ b/libs/common/setuptools/command/upload_docs.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- """upload_docs Implements a Distutils 'upload_docs' subcommand (upload documentation to -PyPI's pythonhosted.org). +sites other than PyPi such as devpi). """ from base64 import standard_b64encode @@ -15,17 +14,18 @@ import tempfile import shutil import itertools import functools +import http.client +import urllib.parse +import warnings -from setuptools.extern import six -from setuptools.extern.six.moves import http_client, urllib +from .._importlib import metadata +from .. import SetuptoolsDeprecationWarning -from pkg_resources import iter_entry_points from .upload import upload def _encode(s): - errors = 'strict' if six.PY2 else 'surrogateescape' - return s.encode('utf-8', errors) + return s.encode('utf-8', 'surrogateescape') class upload_docs(upload): @@ -33,7 +33,7 @@ class upload_docs(upload): # supported by Warehouse (and won't be). DEFAULT_REPOSITORY = 'https://pypi.python.org/pypi/' - description = 'Upload documentation to PyPI' + description = 'Upload documentation to sites other than PyPi such as devpi' user_options = [ ('repository=', 'r', @@ -45,9 +45,10 @@ class upload_docs(upload): boolean_options = upload.boolean_options def has_sphinx(self): - if self.upload_dir is None: - for ep in iter_entry_points('distutils.commands', 'build_sphinx'): - return True + return bool( + self.upload_dir is None + and metadata.entry_points(group='distutils.commands', name='build_sphinx') + ) sub_commands = [('build_sphinx', has_sphinx)] @@ -57,19 +58,20 @@ class upload_docs(upload): self.target_dir = None def finalize_options(self): + log.warn( + "Upload_docs command is deprecated. Use Read the Docs " + "(https://readthedocs.org) instead.") upload.finalize_options(self) if self.upload_dir is None: if self.has_sphinx(): build_sphinx = self.get_finalized_command('build_sphinx') - self.target_dir = build_sphinx.builder_target_dir + self.target_dir = dict(build_sphinx.builder_target_dirs)['html'] else: build = self.get_finalized_command('build') self.target_dir = os.path.join(build.build_base, 'docs') else: self.ensure_dirname('upload_dir') self.target_dir = self.upload_dir - if 'pypi.python.org' in self.repository: - log.warn("Upload_docs command is deprecated. Use RTD instead.") self.announce('Using upload directory %s' % self.target_dir) def create_zipfile(self, filename): @@ -89,6 +91,12 @@ class upload_docs(upload): zip_file.close() def run(self): + warnings.warn( + "upload_docs is deprecated and will be removed in a future " + "version. Use tools like httpie or curl instead.", + SetuptoolsDeprecationWarning, + ) + # Run sub commands for cmd_name in self.get_sub_commands(): self.run_command(cmd_name) @@ -127,8 +135,8 @@ class upload_docs(upload): """ Build up the MIME payload for the POST data """ - boundary = b'--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' - sep_boundary = b'\n--' + boundary + boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' + sep_boundary = b'\n--' + boundary.encode('ascii') end_boundary = sep_boundary + b'--' end_items = end_boundary, b"\n", builder = functools.partial( @@ -138,7 +146,7 @@ class upload_docs(upload): part_groups = map(builder, data.items()) parts = itertools.chain.from_iterable(part_groups) body_items = itertools.chain(parts, end_items) - content_type = 'multipart/form-data; boundary=%s' % boundary.decode('ascii') + content_type = 'multipart/form-data; boundary=%s' % boundary return b''.join(body_items), content_type def upload_file(self, filename): @@ -152,9 +160,7 @@ class upload_docs(upload): } # set up the authentication credentials = _encode(self.username + ':' + self.password) - credentials = standard_b64encode(credentials) - if not six.PY2: - credentials = credentials.decode('ascii') + credentials = standard_b64encode(credentials).decode('ascii') auth = "Basic " + credentials body, ct = self._build_multipart(data) @@ -169,9 +175,9 @@ class upload_docs(upload): urllib.parse.urlparse(self.repository) assert not params and not query and not fragments if schema == 'http': - conn = http_client.HTTPConnection(netloc) + conn = http.client.HTTPConnection(netloc) elif schema == 'https': - conn = http_client.HTTPSConnection(netloc) + conn = http.client.HTTPSConnection(netloc) else: raise AssertionError("unsupported schema " + schema) diff --git a/libs/common/setuptools/config/__init__.py b/libs/common/setuptools/config/__init__.py new file mode 100644 index 00000000..1a5153ad --- /dev/null +++ b/libs/common/setuptools/config/__init__.py @@ -0,0 +1,35 @@ +"""For backward compatibility, expose main functions from +``setuptools.config.setupcfg`` +""" +import warnings +from functools import wraps +from textwrap import dedent +from typing import Callable, TypeVar, cast + +from .._deprecation_warning import SetuptoolsDeprecationWarning +from . import setupcfg + +Fn = TypeVar("Fn", bound=Callable) + +__all__ = ('parse_configuration', 'read_configuration') + + +def _deprecation_notice(fn: Fn) -> Fn: + @wraps(fn) + def _wrapper(*args, **kwargs): + msg = f"""\ + As setuptools moves its configuration towards `pyproject.toml`, + `{__name__}.{fn.__name__}` became deprecated. + + For the time being, you can use the `{setupcfg.__name__}` module + to access a backward compatible API, but this module is provisional + and might be removed in the future. + """ + warnings.warn(dedent(msg), SetuptoolsDeprecationWarning, stacklevel=2) + return fn(*args, **kwargs) + + return cast(Fn, _wrapper) + + +read_configuration = _deprecation_notice(setupcfg.read_configuration) +parse_configuration = _deprecation_notice(setupcfg.parse_configuration) diff --git a/libs/common/setuptools/config/_apply_pyprojecttoml.py b/libs/common/setuptools/config/_apply_pyprojecttoml.py new file mode 100644 index 00000000..8af55616 --- /dev/null +++ b/libs/common/setuptools/config/_apply_pyprojecttoml.py @@ -0,0 +1,377 @@ +"""Translation layer between pyproject config and setuptools distribution and +metadata objects. + +The distribution and metadata objects are modeled after (an old version of) +core metadata, therefore configs in the format specified for ``pyproject.toml`` +need to be processed before being applied. + +**PRIVATE MODULE**: API reserved for setuptools internal usage only. +""" +import logging +import os +import warnings +from collections.abc import Mapping +from email.headerregistry import Address +from functools import partial, reduce +from itertools import chain +from types import MappingProxyType +from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, + Type, Union) + +from setuptools._deprecation_warning import SetuptoolsDeprecationWarning + +if TYPE_CHECKING: + from setuptools._importlib import metadata # noqa + from setuptools.dist import Distribution # noqa + +EMPTY: Mapping = MappingProxyType({}) # Immutable dict-like +_Path = Union[os.PathLike, str] +_DictOrStr = Union[dict, str] +_CorrespFn = Callable[["Distribution", Any, _Path], None] +_Correspondence = Union[str, _CorrespFn] + +_logger = logging.getLogger(__name__) + + +def apply(dist: "Distribution", config: dict, filename: _Path) -> "Distribution": + """Apply configuration dict read with :func:`read_configuration`""" + + if not config: + return dist # short-circuit unrelated pyproject.toml file + + root_dir = os.path.dirname(filename) or "." + + _apply_project_table(dist, config, root_dir) + _apply_tool_table(dist, config, filename) + + current_directory = os.getcwd() + os.chdir(root_dir) + try: + dist._finalize_requires() + dist._finalize_license_files() + finally: + os.chdir(current_directory) + + return dist + + +def _apply_project_table(dist: "Distribution", config: dict, root_dir: _Path): + project_table = config.get("project", {}).copy() + if not project_table: + return # short-circuit + + _handle_missing_dynamic(dist, project_table) + _unify_entry_points(project_table) + + for field, value in project_table.items(): + norm_key = json_compatible_key(field) + corresp = PYPROJECT_CORRESPONDENCE.get(norm_key, norm_key) + if callable(corresp): + corresp(dist, value, root_dir) + else: + _set_config(dist, corresp, value) + + +def _apply_tool_table(dist: "Distribution", config: dict, filename: _Path): + tool_table = config.get("tool", {}).get("setuptools", {}) + if not tool_table: + return # short-circuit + + for field, value in tool_table.items(): + norm_key = json_compatible_key(field) + + if norm_key in TOOL_TABLE_DEPRECATIONS: + suggestion = TOOL_TABLE_DEPRECATIONS[norm_key] + msg = f"The parameter `{norm_key}` is deprecated, {suggestion}" + warnings.warn(msg, SetuptoolsDeprecationWarning) + + norm_key = TOOL_TABLE_RENAMES.get(norm_key, norm_key) + _set_config(dist, norm_key, value) + + _copy_command_options(config, dist, filename) + + +def _handle_missing_dynamic(dist: "Distribution", project_table: dict): + """Be temporarily forgiving with ``dynamic`` fields not listed in ``dynamic``""" + # TODO: Set fields back to `None` once the feature stabilizes + dynamic = set(project_table.get("dynamic", [])) + for field, getter in _PREVIOUSLY_DEFINED.items(): + if not (field in project_table or field in dynamic): + value = getter(dist) + if value: + msg = _WouldIgnoreField.message(field, value) + warnings.warn(msg, _WouldIgnoreField) + + +def json_compatible_key(key: str) -> str: + """As defined in :pep:`566#json-compatible-metadata`""" + return key.lower().replace("-", "_") + + +def _set_config(dist: "Distribution", field: str, value: Any): + setter = getattr(dist.metadata, f"set_{field}", None) + if setter: + setter(value) + elif hasattr(dist.metadata, field) or field in SETUPTOOLS_PATCHES: + setattr(dist.metadata, field, value) + else: + setattr(dist, field, value) + + +_CONTENT_TYPES = { + ".md": "text/markdown", + ".rst": "text/x-rst", + ".txt": "text/plain", +} + + +def _guess_content_type(file: str) -> Optional[str]: + _, ext = os.path.splitext(file.lower()) + if not ext: + return None + + if ext in _CONTENT_TYPES: + return _CONTENT_TYPES[ext] + + valid = ", ".join(f"{k} ({v})" for k, v in _CONTENT_TYPES.items()) + msg = f"only the following file extensions are recognized: {valid}." + raise ValueError(f"Undefined content type for {file}, {msg}") + + +def _long_description(dist: "Distribution", val: _DictOrStr, root_dir: _Path): + from setuptools.config import expand + + if isinstance(val, str): + text = expand.read_files(val, root_dir) + ctype = _guess_content_type(val) + else: + text = val.get("text") or expand.read_files(val.get("file", []), root_dir) + ctype = val["content-type"] + + _set_config(dist, "long_description", text) + if ctype: + _set_config(dist, "long_description_content_type", ctype) + + +def _license(dist: "Distribution", val: dict, root_dir: _Path): + from setuptools.config import expand + + if "file" in val: + _set_config(dist, "license", expand.read_files([val["file"]], root_dir)) + else: + _set_config(dist, "license", val["text"]) + + +def _people(dist: "Distribution", val: List[dict], _root_dir: _Path, kind: str): + field = [] + email_field = [] + for person in val: + if "name" not in person: + email_field.append(person["email"]) + elif "email" not in person: + field.append(person["name"]) + else: + addr = Address(display_name=person["name"], addr_spec=person["email"]) + email_field.append(str(addr)) + + if field: + _set_config(dist, kind, ", ".join(field)) + if email_field: + _set_config(dist, f"{kind}_email", ", ".join(email_field)) + + +def _project_urls(dist: "Distribution", val: dict, _root_dir): + _set_config(dist, "project_urls", val) + + +def _python_requires(dist: "Distribution", val: dict, _root_dir): + from setuptools.extern.packaging.specifiers import SpecifierSet + + _set_config(dist, "python_requires", SpecifierSet(val)) + + +def _dependencies(dist: "Distribution", val: list, _root_dir): + if getattr(dist, "install_requires", []): + msg = "`install_requires` overwritten in `pyproject.toml` (dependencies)" + warnings.warn(msg) + _set_config(dist, "install_requires", val) + + +def _optional_dependencies(dist: "Distribution", val: dict, _root_dir): + existing = getattr(dist, "extras_require", {}) + _set_config(dist, "extras_require", {**existing, **val}) + + +def _unify_entry_points(project_table: dict): + project = project_table + entry_points = project.pop("entry-points", project.pop("entry_points", {})) + renaming = {"scripts": "console_scripts", "gui_scripts": "gui_scripts"} + for key, value in list(project.items()): # eager to allow modifications + norm_key = json_compatible_key(key) + if norm_key in renaming and value: + entry_points[renaming[norm_key]] = project.pop(key) + + if entry_points: + project["entry-points"] = { + name: [f"{k} = {v}" for k, v in group.items()] + for name, group in entry_points.items() + } + + +def _copy_command_options(pyproject: dict, dist: "Distribution", filename: _Path): + tool_table = pyproject.get("tool", {}) + cmdclass = tool_table.get("setuptools", {}).get("cmdclass", {}) + valid_options = _valid_command_options(cmdclass) + + cmd_opts = dist.command_options + for cmd, config in pyproject.get("tool", {}).get("distutils", {}).items(): + cmd = json_compatible_key(cmd) + valid = valid_options.get(cmd, set()) + cmd_opts.setdefault(cmd, {}) + for key, value in config.items(): + key = json_compatible_key(key) + cmd_opts[cmd][key] = (str(filename), value) + if key not in valid: + # To avoid removing options that are specified dynamically we + # just log a warn... + _logger.warning(f"Command option {cmd}.{key} is not defined") + + +def _valid_command_options(cmdclass: Mapping = EMPTY) -> Dict[str, Set[str]]: + from .._importlib import metadata + from setuptools.dist import Distribution + + valid_options = {"global": _normalise_cmd_options(Distribution.global_options)} + + unloaded_entry_points = metadata.entry_points(group='distutils.commands') + loaded_entry_points = (_load_ep(ep) for ep in unloaded_entry_points) + entry_points = (ep for ep in loaded_entry_points if ep) + for cmd, cmd_class in chain(entry_points, cmdclass.items()): + opts = valid_options.get(cmd, set()) + opts = opts | _normalise_cmd_options(getattr(cmd_class, "user_options", [])) + valid_options[cmd] = opts + + return valid_options + + +def _load_ep(ep: "metadata.EntryPoint") -> Optional[Tuple[str, Type]]: + # Ignore all the errors + try: + return (ep.name, ep.load()) + except Exception as ex: + msg = f"{ex.__class__.__name__} while trying to load entry-point {ep.name}" + _logger.warning(f"{msg}: {ex}") + return None + + +def _normalise_cmd_option_key(name: str) -> str: + return json_compatible_key(name).strip("_=") + + +def _normalise_cmd_options(desc: List[Tuple[str, Optional[str], str]]) -> Set[str]: + return {_normalise_cmd_option_key(fancy_option[0]) for fancy_option in desc} + + +def _attrgetter(attr): + """ + Similar to ``operator.attrgetter`` but returns None if ``attr`` is not found + >>> from types import SimpleNamespace + >>> obj = SimpleNamespace(a=42, b=SimpleNamespace(c=13)) + >>> _attrgetter("a")(obj) + 42 + >>> _attrgetter("b.c")(obj) + 13 + >>> _attrgetter("d")(obj) is None + True + """ + return partial(reduce, lambda acc, x: getattr(acc, x, None), attr.split(".")) + + +def _some_attrgetter(*items): + """ + Return the first "truth-y" attribute or None + >>> from types import SimpleNamespace + >>> obj = SimpleNamespace(a=42, b=SimpleNamespace(c=13)) + >>> _some_attrgetter("d", "a", "b.c")(obj) + 42 + >>> _some_attrgetter("d", "e", "b.c", "a")(obj) + 13 + >>> _some_attrgetter("d", "e", "f")(obj) is None + True + """ + def _acessor(obj): + values = (_attrgetter(i)(obj) for i in items) + return next((i for i in values if i is not None), None) + return _acessor + + +PYPROJECT_CORRESPONDENCE: Dict[str, _Correspondence] = { + "readme": _long_description, + "license": _license, + "authors": partial(_people, kind="author"), + "maintainers": partial(_people, kind="maintainer"), + "urls": _project_urls, + "dependencies": _dependencies, + "optional_dependencies": _optional_dependencies, + "requires_python": _python_requires, +} + +TOOL_TABLE_RENAMES = {"script_files": "scripts"} +TOOL_TABLE_DEPRECATIONS = { + "namespace_packages": "consider using implicit namespaces instead (PEP 420)." +} + +SETUPTOOLS_PATCHES = {"long_description_content_type", "project_urls", + "provides_extras", "license_file", "license_files"} + +_PREVIOUSLY_DEFINED = { + "name": _attrgetter("metadata.name"), + "version": _attrgetter("metadata.version"), + "description": _attrgetter("metadata.description"), + "readme": _attrgetter("metadata.long_description"), + "requires-python": _some_attrgetter("python_requires", "metadata.python_requires"), + "license": _attrgetter("metadata.license"), + "authors": _some_attrgetter("metadata.author", "metadata.author_email"), + "maintainers": _some_attrgetter("metadata.maintainer", "metadata.maintainer_email"), + "keywords": _attrgetter("metadata.keywords"), + "classifiers": _attrgetter("metadata.classifiers"), + "urls": _attrgetter("metadata.project_urls"), + "entry-points": _attrgetter("entry_points"), + "dependencies": _some_attrgetter("_orig_install_requires", "install_requires"), + "optional-dependencies": _some_attrgetter("_orig_extras_require", "extras_require"), +} + + +class _WouldIgnoreField(UserWarning): + """Inform users that ``pyproject.toml`` would overwrite previous metadata.""" + + MESSAGE = """\ + {field!r} defined outside of `pyproject.toml` would be ignored. + !!\n\n + ########################################################################## + # configuration would be ignored/result in error due to `pyproject.toml` # + ########################################################################## + + The following seems to be defined outside of `pyproject.toml`: + + `{field} = {value!r}` + + According to the spec (see the link below), however, setuptools CANNOT + consider this value unless {field!r} is listed as `dynamic`. + + https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ + + For the time being, `setuptools` will still consider the given value (as a + **transitional** measure), but please note that future releases of setuptools will + follow strictly the standard. + + To prevent this warning, you can list {field!r} under `dynamic` or alternatively + remove the `[project]` table from your file and rely entirely on other means of + configuration. + \n\n!! + """ + + @classmethod + def message(cls, field, value): + from inspect import cleandoc + return cleandoc(cls.MESSAGE.format(field=field, value=value)) diff --git a/libs/common/setuptools/config/_validate_pyproject/__init__.py b/libs/common/setuptools/config/_validate_pyproject/__init__.py new file mode 100644 index 00000000..dbe6cb4c --- /dev/null +++ b/libs/common/setuptools/config/_validate_pyproject/__init__.py @@ -0,0 +1,34 @@ +from functools import reduce +from typing import Any, Callable, Dict + +from . import formats +from .error_reporting import detailed_errors, ValidationError +from .extra_validations import EXTRA_VALIDATIONS +from .fastjsonschema_exceptions import JsonSchemaException, JsonSchemaValueException +from .fastjsonschema_validations import validate as _validate + +__all__ = [ + "validate", + "FORMAT_FUNCTIONS", + "EXTRA_VALIDATIONS", + "ValidationError", + "JsonSchemaException", + "JsonSchemaValueException", +] + + +FORMAT_FUNCTIONS: Dict[str, Callable[[str], bool]] = { + fn.__name__.replace("_", "-"): fn + for fn in formats.__dict__.values() + if callable(fn) and not fn.__name__.startswith("_") +} + + +def validate(data: Any) -> bool: + """Validate the given ``data`` object using JSON Schema + This function raises ``ValidationError`` if ``data`` is invalid. + """ + with detailed_errors(): + _validate(data, custom_formats=FORMAT_FUNCTIONS) + reduce(lambda acc, fn: fn(acc), EXTRA_VALIDATIONS, data) + return True diff --git a/libs/common/setuptools/config/_validate_pyproject/error_reporting.py b/libs/common/setuptools/config/_validate_pyproject/error_reporting.py new file mode 100644 index 00000000..f78e4838 --- /dev/null +++ b/libs/common/setuptools/config/_validate_pyproject/error_reporting.py @@ -0,0 +1,318 @@ +import io +import json +import logging +import os +import re +from contextlib import contextmanager +from textwrap import indent, wrap +from typing import Any, Dict, Iterator, List, Optional, Sequence, Union, cast + +from .fastjsonschema_exceptions import JsonSchemaValueException + +_logger = logging.getLogger(__name__) + +_MESSAGE_REPLACEMENTS = { + "must be named by propertyName definition": "keys must be named by", + "one of contains definition": "at least one item that matches", + " same as const definition:": "", + "only specified items": "only items matching the definition", +} + +_SKIP_DETAILS = ( + "must not be empty", + "is always invalid", + "must not be there", +) + +_NEED_DETAILS = {"anyOf", "oneOf", "anyOf", "contains", "propertyNames", "not", "items"} + +_CAMEL_CASE_SPLITTER = re.compile(r"\W+|([A-Z][^A-Z\W]*)") +_IDENTIFIER = re.compile(r"^[\w_]+$", re.I) + +_TOML_JARGON = { + "object": "table", + "property": "key", + "properties": "keys", + "property names": "keys", +} + + +class ValidationError(JsonSchemaValueException): + """Report violations of a given JSON schema. + + This class extends :exc:`~fastjsonschema.JsonSchemaValueException` + by adding the following properties: + + - ``summary``: an improved version of the ``JsonSchemaValueException`` error message + with only the necessary information) + + - ``details``: more contextual information about the error like the failing schema + itself and the value that violates the schema. + + Depending on the level of the verbosity of the ``logging`` configuration + the exception message will be only ``summary`` (default) or a combination of + ``summary`` and ``details`` (when the logging level is set to :obj:`logging.DEBUG`). + """ + + summary = "" + details = "" + _original_message = "" + + @classmethod + def _from_jsonschema(cls, ex: JsonSchemaValueException): + formatter = _ErrorFormatting(ex) + obj = cls(str(formatter), ex.value, formatter.name, ex.definition, ex.rule) + debug_code = os.getenv("JSONSCHEMA_DEBUG_CODE_GENERATION", "false").lower() + if debug_code != "false": # pragma: no cover + obj.__cause__, obj.__traceback__ = ex.__cause__, ex.__traceback__ + obj._original_message = ex.message + obj.summary = formatter.summary + obj.details = formatter.details + return obj + + +@contextmanager +def detailed_errors(): + try: + yield + except JsonSchemaValueException as ex: + raise ValidationError._from_jsonschema(ex) from None + + +class _ErrorFormatting: + def __init__(self, ex: JsonSchemaValueException): + self.ex = ex + self.name = f"`{self._simplify_name(ex.name)}`" + self._original_message = self.ex.message.replace(ex.name, self.name) + self._summary = "" + self._details = "" + + def __str__(self) -> str: + if _logger.getEffectiveLevel() <= logging.DEBUG and self.details: + return f"{self.summary}\n\n{self.details}" + + return self.summary + + @property + def summary(self) -> str: + if not self._summary: + self._summary = self._expand_summary() + + return self._summary + + @property + def details(self) -> str: + if not self._details: + self._details = self._expand_details() + + return self._details + + def _simplify_name(self, name): + x = len("data.") + return name[x:] if name.startswith("data.") else name + + def _expand_summary(self): + msg = self._original_message + + for bad, repl in _MESSAGE_REPLACEMENTS.items(): + msg = msg.replace(bad, repl) + + if any(substring in msg for substring in _SKIP_DETAILS): + return msg + + schema = self.ex.rule_definition + if self.ex.rule in _NEED_DETAILS and schema: + summary = _SummaryWriter(_TOML_JARGON) + return f"{msg}:\n\n{indent(summary(schema), ' ')}" + + return msg + + def _expand_details(self) -> str: + optional = [] + desc_lines = self.ex.definition.pop("$$description", []) + desc = self.ex.definition.pop("description", None) or " ".join(desc_lines) + if desc: + description = "\n".join( + wrap( + desc, + width=80, + initial_indent=" ", + subsequent_indent=" ", + break_long_words=False, + ) + ) + optional.append(f"DESCRIPTION:\n{description}") + schema = json.dumps(self.ex.definition, indent=4) + value = json.dumps(self.ex.value, indent=4) + defaults = [ + f"GIVEN VALUE:\n{indent(value, ' ')}", + f"OFFENDING RULE: {self.ex.rule!r}", + f"DEFINITION:\n{indent(schema, ' ')}", + ] + return "\n\n".join(optional + defaults) + + +class _SummaryWriter: + _IGNORE = {"description", "default", "title", "examples"} + + def __init__(self, jargon: Optional[Dict[str, str]] = None): + self.jargon: Dict[str, str] = jargon or {} + # Clarify confusing terms + self._terms = { + "anyOf": "at least one of the following", + "oneOf": "exactly one of the following", + "allOf": "all of the following", + "not": "(*NOT* the following)", + "prefixItems": f"{self._jargon('items')} (in order)", + "items": "items", + "contains": "contains at least one of", + "propertyNames": ( + f"non-predefined acceptable {self._jargon('property names')}" + ), + "patternProperties": f"{self._jargon('properties')} named via pattern", + "const": "predefined value", + "enum": "one of", + } + # Attributes that indicate that the definition is easy and can be done + # inline (e.g. string and number) + self._guess_inline_defs = [ + "enum", + "const", + "maxLength", + "minLength", + "pattern", + "format", + "minimum", + "maximum", + "exclusiveMinimum", + "exclusiveMaximum", + "multipleOf", + ] + + def _jargon(self, term: Union[str, List[str]]) -> Union[str, List[str]]: + if isinstance(term, list): + return [self.jargon.get(t, t) for t in term] + return self.jargon.get(term, term) + + def __call__( + self, + schema: Union[dict, List[dict]], + prefix: str = "", + *, + _path: Sequence[str] = (), + ) -> str: + if isinstance(schema, list): + return self._handle_list(schema, prefix, _path) + + filtered = self._filter_unecessary(schema, _path) + simple = self._handle_simple_dict(filtered, _path) + if simple: + return f"{prefix}{simple}" + + child_prefix = self._child_prefix(prefix, " ") + item_prefix = self._child_prefix(prefix, "- ") + indent = len(prefix) * " " + with io.StringIO() as buffer: + for i, (key, value) in enumerate(filtered.items()): + child_path = [*_path, key] + line_prefix = prefix if i == 0 else indent + buffer.write(f"{line_prefix}{self._label(child_path)}:") + # ^ just the first item should receive the complete prefix + if isinstance(value, dict): + filtered = self._filter_unecessary(value, child_path) + simple = self._handle_simple_dict(filtered, child_path) + buffer.write( + f" {simple}" + if simple + else f"\n{self(value, child_prefix, _path=child_path)}" + ) + elif isinstance(value, list) and ( + key != "type" or self._is_property(child_path) + ): + children = self._handle_list(value, item_prefix, child_path) + sep = " " if children.startswith("[") else "\n" + buffer.write(f"{sep}{children}") + else: + buffer.write(f" {self._value(value, child_path)}\n") + return buffer.getvalue() + + def _is_unecessary(self, path: Sequence[str]) -> bool: + if self._is_property(path) or not path: # empty path => instruction @ root + return False + key = path[-1] + return any(key.startswith(k) for k in "$_") or key in self._IGNORE + + def _filter_unecessary(self, schema: dict, path: Sequence[str]): + return { + key: value + for key, value in schema.items() + if not self._is_unecessary([*path, key]) + } + + def _handle_simple_dict(self, value: dict, path: Sequence[str]) -> Optional[str]: + inline = any(p in value for p in self._guess_inline_defs) + simple = not any(isinstance(v, (list, dict)) for v in value.values()) + if inline or simple: + return f"{{{', '.join(self._inline_attrs(value, path))}}}\n" + return None + + def _handle_list( + self, schemas: list, prefix: str = "", path: Sequence[str] = () + ) -> str: + if self._is_unecessary(path): + return "" + + repr_ = repr(schemas) + if all(not isinstance(e, (dict, list)) for e in schemas) and len(repr_) < 60: + return f"{repr_}\n" + + item_prefix = self._child_prefix(prefix, "- ") + return "".join( + self(v, item_prefix, _path=[*path, f"[{i}]"]) for i, v in enumerate(schemas) + ) + + def _is_property(self, path: Sequence[str]): + """Check if the given path can correspond to an arbitrarily named property""" + counter = 0 + for key in path[-2::-1]: + if key not in {"properties", "patternProperties"}: + break + counter += 1 + + # If the counter if even, the path correspond to a JSON Schema keyword + # otherwise it can be any arbitrary string naming a property + return counter % 2 == 1 + + def _label(self, path: Sequence[str]) -> str: + *parents, key = path + if not self._is_property(path): + norm_key = _separate_terms(key) + return self._terms.get(key) or " ".join(self._jargon(norm_key)) + + if parents[-1] == "patternProperties": + return f"(regex {key!r})" + return repr(key) # property name + + def _value(self, value: Any, path: Sequence[str]) -> str: + if path[-1] == "type" and not self._is_property(path): + type_ = self._jargon(value) + return ( + f"[{', '.join(type_)}]" if isinstance(value, list) else cast(str, type_) + ) + return repr(value) + + def _inline_attrs(self, schema: dict, path: Sequence[str]) -> Iterator[str]: + for key, value in schema.items(): + child_path = [*path, key] + yield f"{self._label(child_path)}: {self._value(value, child_path)}" + + def _child_prefix(self, parent_prefix: str, child_prefix: str) -> str: + return len(parent_prefix) * " " + child_prefix + + +def _separate_terms(word: str) -> List[str]: + """ + >>> _separate_terms("FooBar-foo") + ['foo', 'bar', 'foo'] + """ + return [w.lower() for w in _CAMEL_CASE_SPLITTER.split(word) if w] diff --git a/libs/common/setuptools/config/_validate_pyproject/extra_validations.py b/libs/common/setuptools/config/_validate_pyproject/extra_validations.py new file mode 100644 index 00000000..4130a421 --- /dev/null +++ b/libs/common/setuptools/config/_validate_pyproject/extra_validations.py @@ -0,0 +1,36 @@ +"""The purpose of this module is implement PEP 621 validations that are +difficult to express as a JSON Schema (or that are not supported by the current +JSON Schema library). +""" + +from typing import Mapping, TypeVar + +from .error_reporting import ValidationError + +T = TypeVar("T", bound=Mapping) + + +class RedefiningStaticFieldAsDynamic(ValidationError): + """According to PEP 621: + + Build back-ends MUST raise an error if the metadata specifies a field + statically as well as being listed in dynamic. + """ + + +def validate_project_dynamic(pyproject: T) -> T: + project_table = pyproject.get("project", {}) + dynamic = project_table.get("dynamic", []) + + for field in dynamic: + if field in project_table: + msg = f"You cannot provide a value for `project.{field}` and " + msg += "list it under `project.dynamic` at the same time" + name = f"data.project.{field}" + value = {field: project_table[field], "...": " # ...", "dynamic": dynamic} + raise RedefiningStaticFieldAsDynamic(msg, value, name, rule="PEP 621") + + return pyproject + + +EXTRA_VALIDATIONS = (validate_project_dynamic,) diff --git a/libs/common/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py b/libs/common/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py new file mode 100644 index 00000000..d2dddd6a --- /dev/null +++ b/libs/common/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py @@ -0,0 +1,51 @@ +import re + + +SPLIT_RE = re.compile(r'[\.\[\]]+') + + +class JsonSchemaException(ValueError): + """ + Base exception of ``fastjsonschema`` library. + """ + + +class JsonSchemaValueException(JsonSchemaException): + """ + Exception raised by validation function. Available properties: + + * ``message`` containing human-readable information what is wrong (e.g. ``data.property[index] must be smaller than or equal to 42``), + * invalid ``value`` (e.g. ``60``), + * ``name`` of a path in the data structure (e.g. ``data.property[index]``), + * ``path`` as an array in the data structure (e.g. ``['data', 'property', 'index']``), + * the whole ``definition`` which the ``value`` has to fulfil (e.g. ``{'type': 'number', 'maximum': 42}``), + * ``rule`` which the ``value`` is breaking (e.g. ``maximum``) + * and ``rule_definition`` (e.g. ``42``). + + .. versionchanged:: 2.14.0 + Added all extra properties. + """ + + def __init__(self, message, value=None, name=None, definition=None, rule=None): + super().__init__(message) + self.message = message + self.value = value + self.name = name + self.definition = definition + self.rule = rule + + @property + def path(self): + return [item for item in SPLIT_RE.split(self.name) if item != ''] + + @property + def rule_definition(self): + if not self.rule or not self.definition: + return None + return self.definition.get(self.rule) + + +class JsonSchemaDefinitionException(JsonSchemaException): + """ + Exception raised by generator of validation function. + """ diff --git a/libs/common/setuptools/config/_validate_pyproject/fastjsonschema_validations.py b/libs/common/setuptools/config/_validate_pyproject/fastjsonschema_validations.py new file mode 100644 index 00000000..ad5ee31e --- /dev/null +++ b/libs/common/setuptools/config/_validate_pyproject/fastjsonschema_validations.py @@ -0,0 +1,1035 @@ +# noqa +# type: ignore +# flake8: noqa +# pylint: skip-file +# mypy: ignore-errors +# yapf: disable +# pylama:skip=1 + + +# *** PLEASE DO NOT MODIFY DIRECTLY: Automatically generated code *** + + +VERSION = "2.15.3" +import re +from .fastjsonschema_exceptions import JsonSchemaValueException + + +REGEX_PATTERNS = { + '^.*$': re.compile('^.*$'), + '.+': re.compile('.+'), + '^.+$': re.compile('^.+$'), + 'idn-email_re_pattern': re.compile('^[^@]+@[^@]+\\.[^@]+\\Z') +} + +NoneType = type(None) + +def validate(data, custom_formats={}, name_prefix=None): + validate_https___packaging_python_org_en_latest_specifications_declaring_build_dependencies(data, custom_formats, (name_prefix or "data") + "") + return data + +def validate_https___packaging_python_org_en_latest_specifications_declaring_build_dependencies(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-build-dependencies/', 'title': 'Data structure for ``pyproject.toml`` files', '$$description': ['File format containing build-time configurations for the Python ecosystem. ', ':pep:`517` initially defined a build-system independent format for source trees', 'which was complemented by :pep:`518` to provide a way of specifying dependencies ', 'for building Python projects.', 'Please notice the ``project`` table (as initially defined in :pep:`621`) is not included', 'in this schema and should be considered separately.'], 'type': 'object', 'additionalProperties': False, 'properties': {'build-system': {'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, 'tool': {'type': 'object', 'properties': {'distutils': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://docs.python.org/3/install/', 'title': '``tool.distutils`` table', '$$description': ['Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` scripts via `distutils configuration files', '`_.', '``tool.distutils`` subtables could be used with the same purpose', '(NOT CURRENTLY IMPLEMENTED).'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, 'setuptools': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://setuptools.pypa.io/en/latest/references/keywords.html', 'title': '``tool.setuptools`` table', '$$description': ['Please notice for the time being the ``setuptools`` project does not specify', 'a way of configuring builds via ``pyproject.toml``.', 'Therefore this schema should be taken just as a *"thought experiment"* on how', 'this *might be done*, by following the principles established in', '`ini2toml `_.', 'It considers only ``setuptools`` `parameters', '`_', 'that can currently be configured via ``setup.cfg`` and are not covered by :pep:`621`', 'but intentionally excludes ``dependency_links`` and ``setup_requires``.', 'NOTE: ``scripts`` was renamed to ``script-files`` to avoid confusion with', 'entry-point based scripts (defined in :pep:`621`).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'description': 'Whether the project can be safely installed and run from a zip file.', 'type': 'boolean'}, 'script-files': {'description': 'Legacy way of defining scripts (entry-points are preferred).', 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, {'$ref': '#/definitions/find-directive'}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['**DEPRECATED**: dict-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', "Please notice this don't work with wheels. See `data files support", '`_'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', ' cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['PROVISIONAL: List of glob patterns for all license files being distributed.', '(might become standard with PEP 639).'], 'default': ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'$ref': '#/definitions/attr-directive'}, {'$ref': '#/definitions/file-directive'}]}, 'classifiers': {'$ref': '#/definitions/file-directive'}, 'description': {'$ref': '#/definitions/file-directive'}, 'dependencies': {'$ref': '#/definitions/file-directive'}, 'entry-points': {'$ref': '#/definitions/file-directive'}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$ref': '#/definitions/file-directive'}}}, 'readme': {'anyOf': [{'$ref': '#/definitions/file-directive'}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}}, 'definitions': {'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}}}}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_keys = set(data.keys()) + if "build-system" in data_keys: + data_keys.remove("build-system") + data__buildsystem = data["build-system"] + if not isinstance(data__buildsystem, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system must be object", value=data__buildsystem, name="" + (name_prefix or "data") + ".build-system", definition={'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, rule='type') + data__buildsystem_is_dict = isinstance(data__buildsystem, dict) + if data__buildsystem_is_dict: + data__buildsystem_len = len(data__buildsystem) + if not all(prop in data__buildsystem for prop in ['requires']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system must contain ['requires'] properties", value=data__buildsystem, name="" + (name_prefix or "data") + ".build-system", definition={'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, rule='required') + data__buildsystem_keys = set(data__buildsystem.keys()) + if "requires" in data__buildsystem_keys: + data__buildsystem_keys.remove("requires") + data__buildsystem__requires = data__buildsystem["requires"] + if not isinstance(data__buildsystem__requires, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.requires must be array", value=data__buildsystem__requires, name="" + (name_prefix or "data") + ".build-system.requires", definition={'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, rule='type') + data__buildsystem__requires_is_list = isinstance(data__buildsystem__requires, (list, tuple)) + if data__buildsystem__requires_is_list: + data__buildsystem__requires_len = len(data__buildsystem__requires) + for data__buildsystem__requires_x, data__buildsystem__requires_item in enumerate(data__buildsystem__requires): + if not isinstance(data__buildsystem__requires_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.requires[{data__buildsystem__requires_x}]".format(**locals()) + " must be string", value=data__buildsystem__requires_item, name="" + (name_prefix or "data") + ".build-system.requires[{data__buildsystem__requires_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "build-backend" in data__buildsystem_keys: + data__buildsystem_keys.remove("build-backend") + data__buildsystem__buildbackend = data__buildsystem["build-backend"] + if not isinstance(data__buildsystem__buildbackend, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.build-backend must be string", value=data__buildsystem__buildbackend, name="" + (name_prefix or "data") + ".build-system.build-backend", definition={'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, rule='type') + if isinstance(data__buildsystem__buildbackend, str): + if not custom_formats["pep517-backend-reference"](data__buildsystem__buildbackend): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.build-backend must be pep517-backend-reference", value=data__buildsystem__buildbackend, name="" + (name_prefix or "data") + ".build-system.build-backend", definition={'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, rule='format') + if "backend-path" in data__buildsystem_keys: + data__buildsystem_keys.remove("backend-path") + data__buildsystem__backendpath = data__buildsystem["backend-path"] + if not isinstance(data__buildsystem__backendpath, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.backend-path must be array", value=data__buildsystem__backendpath, name="" + (name_prefix or "data") + ".build-system.backend-path", definition={'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}, rule='type') + data__buildsystem__backendpath_is_list = isinstance(data__buildsystem__backendpath, (list, tuple)) + if data__buildsystem__backendpath_is_list: + data__buildsystem__backendpath_len = len(data__buildsystem__backendpath) + for data__buildsystem__backendpath_x, data__buildsystem__backendpath_item in enumerate(data__buildsystem__backendpath): + if not isinstance(data__buildsystem__backendpath_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.backend-path[{data__buildsystem__backendpath_x}]".format(**locals()) + " must be string", value=data__buildsystem__backendpath_item, name="" + (name_prefix or "data") + ".build-system.backend-path[{data__buildsystem__backendpath_x}]".format(**locals()) + "", definition={'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}, rule='type') + if data__buildsystem_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system must not contain "+str(data__buildsystem_keys)+" properties", value=data__buildsystem, name="" + (name_prefix or "data") + ".build-system", definition={'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, rule='additionalProperties') + if "project" in data_keys: + data_keys.remove("project") + data__project = data["project"] + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata(data__project, custom_formats, (name_prefix or "data") + ".project") + if "tool" in data_keys: + data_keys.remove("tool") + data__tool = data["tool"] + if not isinstance(data__tool, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".tool must be object", value=data__tool, name="" + (name_prefix or "data") + ".tool", definition={'type': 'object', 'properties': {'distutils': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://docs.python.org/3/install/', 'title': '``tool.distutils`` table', '$$description': ['Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` scripts via `distutils configuration files', '`_.', '``tool.distutils`` subtables could be used with the same purpose', '(NOT CURRENTLY IMPLEMENTED).'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, 'setuptools': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://setuptools.pypa.io/en/latest/references/keywords.html', 'title': '``tool.setuptools`` table', '$$description': ['Please notice for the time being the ``setuptools`` project does not specify', 'a way of configuring builds via ``pyproject.toml``.', 'Therefore this schema should be taken just as a *"thought experiment"* on how', 'this *might be done*, by following the principles established in', '`ini2toml `_.', 'It considers only ``setuptools`` `parameters', '`_', 'that can currently be configured via ``setup.cfg`` and are not covered by :pep:`621`', 'but intentionally excludes ``dependency_links`` and ``setup_requires``.', 'NOTE: ``scripts`` was renamed to ``script-files`` to avoid confusion with', 'entry-point based scripts (defined in :pep:`621`).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'description': 'Whether the project can be safely installed and run from a zip file.', 'type': 'boolean'}, 'script-files': {'description': 'Legacy way of defining scripts (entry-points are preferred).', 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, {'$ref': '#/definitions/find-directive'}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['**DEPRECATED**: dict-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', "Please notice this don't work with wheels. See `data files support", '`_'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', ' cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['PROVISIONAL: List of glob patterns for all license files being distributed.', '(might become standard with PEP 639).'], 'default': ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'$ref': '#/definitions/attr-directive'}, {'$ref': '#/definitions/file-directive'}]}, 'classifiers': {'$ref': '#/definitions/file-directive'}, 'description': {'$ref': '#/definitions/file-directive'}, 'dependencies': {'$ref': '#/definitions/file-directive'}, 'entry-points': {'$ref': '#/definitions/file-directive'}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$ref': '#/definitions/file-directive'}}}, 'readme': {'anyOf': [{'$ref': '#/definitions/file-directive'}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}}, 'definitions': {'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}}}, rule='type') + data__tool_is_dict = isinstance(data__tool, dict) + if data__tool_is_dict: + data__tool_keys = set(data__tool.keys()) + if "distutils" in data__tool_keys: + data__tool_keys.remove("distutils") + data__tool__distutils = data__tool["distutils"] + validate_https___docs_python_org_3_install(data__tool__distutils, custom_formats, (name_prefix or "data") + ".tool.distutils") + if "setuptools" in data__tool_keys: + data__tool_keys.remove("setuptools") + data__tool__setuptools = data__tool["setuptools"] + validate_https___setuptools_pypa_io_en_latest_references_keywords_html(data__tool__setuptools, custom_formats, (name_prefix or "data") + ".tool.setuptools") + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-build-dependencies/', 'title': 'Data structure for ``pyproject.toml`` files', '$$description': ['File format containing build-time configurations for the Python ecosystem. ', ':pep:`517` initially defined a build-system independent format for source trees', 'which was complemented by :pep:`518` to provide a way of specifying dependencies ', 'for building Python projects.', 'Please notice the ``project`` table (as initially defined in :pep:`621`) is not included', 'in this schema and should be considered separately.'], 'type': 'object', 'additionalProperties': False, 'properties': {'build-system': {'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, 'tool': {'type': 'object', 'properties': {'distutils': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://docs.python.org/3/install/', 'title': '``tool.distutils`` table', '$$description': ['Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` scripts via `distutils configuration files', '`_.', '``tool.distutils`` subtables could be used with the same purpose', '(NOT CURRENTLY IMPLEMENTED).'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, 'setuptools': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://setuptools.pypa.io/en/latest/references/keywords.html', 'title': '``tool.setuptools`` table', '$$description': ['Please notice for the time being the ``setuptools`` project does not specify', 'a way of configuring builds via ``pyproject.toml``.', 'Therefore this schema should be taken just as a *"thought experiment"* on how', 'this *might be done*, by following the principles established in', '`ini2toml `_.', 'It considers only ``setuptools`` `parameters', '`_', 'that can currently be configured via ``setup.cfg`` and are not covered by :pep:`621`', 'but intentionally excludes ``dependency_links`` and ``setup_requires``.', 'NOTE: ``scripts`` was renamed to ``script-files`` to avoid confusion with', 'entry-point based scripts (defined in :pep:`621`).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'description': 'Whether the project can be safely installed and run from a zip file.', 'type': 'boolean'}, 'script-files': {'description': 'Legacy way of defining scripts (entry-points are preferred).', 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, {'$ref': '#/definitions/find-directive'}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['**DEPRECATED**: dict-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', "Please notice this don't work with wheels. See `data files support", '`_'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', ' cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['PROVISIONAL: List of glob patterns for all license files being distributed.', '(might become standard with PEP 639).'], 'default': ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'$ref': '#/definitions/attr-directive'}, {'$ref': '#/definitions/file-directive'}]}, 'classifiers': {'$ref': '#/definitions/file-directive'}, 'description': {'$ref': '#/definitions/file-directive'}, 'dependencies': {'$ref': '#/definitions/file-directive'}, 'entry-points': {'$ref': '#/definitions/file-directive'}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$ref': '#/definitions/file-directive'}}}, 'readme': {'anyOf': [{'$ref': '#/definitions/file-directive'}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}}, 'definitions': {'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}}}}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='additionalProperties') + return data + +def validate_https___setuptools_pypa_io_en_latest_references_keywords_html(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://setuptools.pypa.io/en/latest/references/keywords.html', 'title': '``tool.setuptools`` table', '$$description': ['Please notice for the time being the ``setuptools`` project does not specify', 'a way of configuring builds via ``pyproject.toml``.', 'Therefore this schema should be taken just as a *"thought experiment"* on how', 'this *might be done*, by following the principles established in', '`ini2toml `_.', 'It considers only ``setuptools`` `parameters', '`_', 'that can currently be configured via ``setup.cfg`` and are not covered by :pep:`621`', 'but intentionally excludes ``dependency_links`` and ``setup_requires``.', 'NOTE: ``scripts`` was renamed to ``script-files`` to avoid confusion with', 'entry-point based scripts (defined in :pep:`621`).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'description': 'Whether the project can be safely installed and run from a zip file.', 'type': 'boolean'}, 'script-files': {'description': 'Legacy way of defining scripts (entry-points are preferred).', 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['**DEPRECATED**: dict-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', "Please notice this don't work with wheels. See `data files support", '`_'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', ' cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['PROVISIONAL: List of glob patterns for all license files being distributed.', '(might become standard with PEP 639).'], 'default': ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, 'readme': {'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}}, 'definitions': {'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_keys = set(data.keys()) + if "platforms" in data_keys: + data_keys.remove("platforms") + data__platforms = data["platforms"] + if not isinstance(data__platforms, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".platforms must be array", value=data__platforms, name="" + (name_prefix or "data") + ".platforms", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__platforms_is_list = isinstance(data__platforms, (list, tuple)) + if data__platforms_is_list: + data__platforms_len = len(data__platforms) + for data__platforms_x, data__platforms_item in enumerate(data__platforms): + if not isinstance(data__platforms_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".platforms[{data__platforms_x}]".format(**locals()) + " must be string", value=data__platforms_item, name="" + (name_prefix or "data") + ".platforms[{data__platforms_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "provides" in data_keys: + data_keys.remove("provides") + data__provides = data["provides"] + if not isinstance(data__provides, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".provides must be array", value=data__provides, name="" + (name_prefix or "data") + ".provides", definition={'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, rule='type') + data__provides_is_list = isinstance(data__provides, (list, tuple)) + if data__provides_is_list: + data__provides_len = len(data__provides) + for data__provides_x, data__provides_item in enumerate(data__provides): + if not isinstance(data__provides_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + " must be string", value=data__provides_item, name="" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='type') + if isinstance(data__provides_item, str): + if not custom_formats["pep508-identifier"](data__provides_item): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + " must be pep508-identifier", value=data__provides_item, name="" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='format') + if "obsoletes" in data_keys: + data_keys.remove("obsoletes") + data__obsoletes = data["obsoletes"] + if not isinstance(data__obsoletes, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".obsoletes must be array", value=data__obsoletes, name="" + (name_prefix or "data") + ".obsoletes", definition={'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, rule='type') + data__obsoletes_is_list = isinstance(data__obsoletes, (list, tuple)) + if data__obsoletes_is_list: + data__obsoletes_len = len(data__obsoletes) + for data__obsoletes_x, data__obsoletes_item in enumerate(data__obsoletes): + if not isinstance(data__obsoletes_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + " must be string", value=data__obsoletes_item, name="" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='type') + if isinstance(data__obsoletes_item, str): + if not custom_formats["pep508-identifier"](data__obsoletes_item): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + " must be pep508-identifier", value=data__obsoletes_item, name="" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='format') + if "zip-safe" in data_keys: + data_keys.remove("zip-safe") + data__zipsafe = data["zip-safe"] + if not isinstance(data__zipsafe, (bool)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".zip-safe must be boolean", value=data__zipsafe, name="" + (name_prefix or "data") + ".zip-safe", definition={'description': 'Whether the project can be safely installed and run from a zip file.', 'type': 'boolean'}, rule='type') + if "script-files" in data_keys: + data_keys.remove("script-files") + data__scriptfiles = data["script-files"] + if not isinstance(data__scriptfiles, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".script-files must be array", value=data__scriptfiles, name="" + (name_prefix or "data") + ".script-files", definition={'description': 'Legacy way of defining scripts (entry-points are preferred).', 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, rule='type') + data__scriptfiles_is_list = isinstance(data__scriptfiles, (list, tuple)) + if data__scriptfiles_is_list: + data__scriptfiles_len = len(data__scriptfiles) + for data__scriptfiles_x, data__scriptfiles_item in enumerate(data__scriptfiles): + if not isinstance(data__scriptfiles_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".script-files[{data__scriptfiles_x}]".format(**locals()) + " must be string", value=data__scriptfiles_item, name="" + (name_prefix or "data") + ".script-files[{data__scriptfiles_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "eager-resources" in data_keys: + data_keys.remove("eager-resources") + data__eagerresources = data["eager-resources"] + if not isinstance(data__eagerresources, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".eager-resources must be array", value=data__eagerresources, name="" + (name_prefix or "data") + ".eager-resources", definition={'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.'], 'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__eagerresources_is_list = isinstance(data__eagerresources, (list, tuple)) + if data__eagerresources_is_list: + data__eagerresources_len = len(data__eagerresources) + for data__eagerresources_x, data__eagerresources_item in enumerate(data__eagerresources): + if not isinstance(data__eagerresources_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".eager-resources[{data__eagerresources_x}]".format(**locals()) + " must be string", value=data__eagerresources_item, name="" + (name_prefix or "data") + ".eager-resources[{data__eagerresources_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "packages" in data_keys: + data_keys.remove("packages") + data__packages = data["packages"] + data__packages_one_of_count1 = 0 + if data__packages_one_of_count1 < 2: + try: + if not isinstance(data__packages, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".packages must be array", value=data__packages, name="" + (name_prefix or "data") + ".packages", definition={'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, rule='type') + data__packages_is_list = isinstance(data__packages, (list, tuple)) + if data__packages_is_list: + data__packages_len = len(data__packages) + for data__packages_x, data__packages_item in enumerate(data__packages): + if not isinstance(data__packages_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".packages[{data__packages_x}]".format(**locals()) + " must be string", value=data__packages_item, name="" + (name_prefix or "data") + ".packages[{data__packages_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='type') + if isinstance(data__packages_item, str): + if not custom_formats["python-module-name"](data__packages_item): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".packages[{data__packages_x}]".format(**locals()) + " must be python-module-name", value=data__packages_item, name="" + (name_prefix or "data") + ".packages[{data__packages_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='format') + data__packages_one_of_count1 += 1 + except JsonSchemaValueException: pass + if data__packages_one_of_count1 < 2: + try: + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_find_directive(data__packages, custom_formats, (name_prefix or "data") + ".packages") + data__packages_one_of_count1 += 1 + except JsonSchemaValueException: pass + if data__packages_one_of_count1 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".packages must be valid exactly by one definition" + (" (" + str(data__packages_one_of_count1) + " matches found)"), value=data__packages, name="" + (name_prefix or "data") + ".packages", definition={'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}]}, rule='oneOf') + if "package-dir" in data_keys: + data_keys.remove("package-dir") + data__packagedir = data["package-dir"] + if not isinstance(data__packagedir, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be object", value=data__packagedir, name="" + (name_prefix or "data") + ".package-dir", definition={'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, rule='type') + data__packagedir_is_dict = isinstance(data__packagedir, dict) + if data__packagedir_is_dict: + data__packagedir_keys = set(data__packagedir.keys()) + for data__packagedir_key, data__packagedir_val in data__packagedir.items(): + if REGEX_PATTERNS['^.*$'].search(data__packagedir_key): + if data__packagedir_key in data__packagedir_keys: + data__packagedir_keys.remove(data__packagedir_key) + if not isinstance(data__packagedir_val, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir.{data__packagedir_key}".format(**locals()) + " must be string", value=data__packagedir_val, name="" + (name_prefix or "data") + ".package-dir.{data__packagedir_key}".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if data__packagedir_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must not contain "+str(data__packagedir_keys)+" properties", value=data__packagedir, name="" + (name_prefix or "data") + ".package-dir", definition={'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, rule='additionalProperties') + data__packagedir_len = len(data__packagedir) + if data__packagedir_len != 0: + data__packagedir_property_names = True + for data__packagedir_key in data__packagedir: + try: + data__packagedir_key_one_of_count2 = 0 + if data__packagedir_key_one_of_count2 < 2: + try: + if isinstance(data__packagedir_key, str): + if not custom_formats["python-module-name"](data__packagedir_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be python-module-name", value=data__packagedir_key, name="" + (name_prefix or "data") + ".package-dir", definition={'format': 'python-module-name'}, rule='format') + data__packagedir_key_one_of_count2 += 1 + except JsonSchemaValueException: pass + if data__packagedir_key_one_of_count2 < 2: + try: + if data__packagedir_key != "": + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be same as const definition: ", value=data__packagedir_key, name="" + (name_prefix or "data") + ".package-dir", definition={'const': ''}, rule='const') + data__packagedir_key_one_of_count2 += 1 + except JsonSchemaValueException: pass + if data__packagedir_key_one_of_count2 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be valid exactly by one definition" + (" (" + str(data__packagedir_key_one_of_count2) + " matches found)"), value=data__packagedir_key, name="" + (name_prefix or "data") + ".package-dir", definition={'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, rule='oneOf') + except JsonSchemaValueException: + data__packagedir_property_names = False + if not data__packagedir_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be named by propertyName definition", value=data__packagedir, name="" + (name_prefix or "data") + ".package-dir", definition={'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, rule='propertyNames') + if "package-data" in data_keys: + data_keys.remove("package-data") + data__packagedata = data["package-data"] + if not isinstance(data__packagedata, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be object", value=data__packagedata, name="" + (name_prefix or "data") + ".package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='type') + data__packagedata_is_dict = isinstance(data__packagedata, dict) + if data__packagedata_is_dict: + data__packagedata_keys = set(data__packagedata.keys()) + for data__packagedata_key, data__packagedata_val in data__packagedata.items(): + if REGEX_PATTERNS['^.*$'].search(data__packagedata_key): + if data__packagedata_key in data__packagedata_keys: + data__packagedata_keys.remove(data__packagedata_key) + if not isinstance(data__packagedata_val, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data.{data__packagedata_key}".format(**locals()) + " must be array", value=data__packagedata_val, name="" + (name_prefix or "data") + ".package-data.{data__packagedata_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__packagedata_val_is_list = isinstance(data__packagedata_val, (list, tuple)) + if data__packagedata_val_is_list: + data__packagedata_val_len = len(data__packagedata_val) + for data__packagedata_val_x, data__packagedata_val_item in enumerate(data__packagedata_val): + if not isinstance(data__packagedata_val_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data.{data__packagedata_key}[{data__packagedata_val_x}]".format(**locals()) + " must be string", value=data__packagedata_val_item, name="" + (name_prefix or "data") + ".package-data.{data__packagedata_key}[{data__packagedata_val_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if data__packagedata_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must not contain "+str(data__packagedata_keys)+" properties", value=data__packagedata, name="" + (name_prefix or "data") + ".package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='additionalProperties') + data__packagedata_len = len(data__packagedata) + if data__packagedata_len != 0: + data__packagedata_property_names = True + for data__packagedata_key in data__packagedata: + try: + data__packagedata_key_one_of_count3 = 0 + if data__packagedata_key_one_of_count3 < 2: + try: + if isinstance(data__packagedata_key, str): + if not custom_formats["python-module-name"](data__packagedata_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be python-module-name", value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'format': 'python-module-name'}, rule='format') + data__packagedata_key_one_of_count3 += 1 + except JsonSchemaValueException: pass + if data__packagedata_key_one_of_count3 < 2: + try: + if data__packagedata_key != "*": + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be same as const definition: *", value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'const': '*'}, rule='const') + data__packagedata_key_one_of_count3 += 1 + except JsonSchemaValueException: pass + if data__packagedata_key_one_of_count3 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be valid exactly by one definition" + (" (" + str(data__packagedata_key_one_of_count3) + " matches found)"), value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, rule='oneOf') + except JsonSchemaValueException: + data__packagedata_property_names = False + if not data__packagedata_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be named by propertyName definition", value=data__packagedata, name="" + (name_prefix or "data") + ".package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='propertyNames') + if "include-package-data" in data_keys: + data_keys.remove("include-package-data") + data__includepackagedata = data["include-package-data"] + if not isinstance(data__includepackagedata, (bool)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".include-package-data must be boolean", value=data__includepackagedata, name="" + (name_prefix or "data") + ".include-package-data", definition={'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, rule='type') + if "exclude-package-data" in data_keys: + data_keys.remove("exclude-package-data") + data__excludepackagedata = data["exclude-package-data"] + if not isinstance(data__excludepackagedata, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be object", value=data__excludepackagedata, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='type') + data__excludepackagedata_is_dict = isinstance(data__excludepackagedata, dict) + if data__excludepackagedata_is_dict: + data__excludepackagedata_keys = set(data__excludepackagedata.keys()) + for data__excludepackagedata_key, data__excludepackagedata_val in data__excludepackagedata.items(): + if REGEX_PATTERNS['^.*$'].search(data__excludepackagedata_key): + if data__excludepackagedata_key in data__excludepackagedata_keys: + data__excludepackagedata_keys.remove(data__excludepackagedata_key) + if not isinstance(data__excludepackagedata_val, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}".format(**locals()) + " must be array", value=data__excludepackagedata_val, name="" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__excludepackagedata_val_is_list = isinstance(data__excludepackagedata_val, (list, tuple)) + if data__excludepackagedata_val_is_list: + data__excludepackagedata_val_len = len(data__excludepackagedata_val) + for data__excludepackagedata_val_x, data__excludepackagedata_val_item in enumerate(data__excludepackagedata_val): + if not isinstance(data__excludepackagedata_val_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}[{data__excludepackagedata_val_x}]".format(**locals()) + " must be string", value=data__excludepackagedata_val_item, name="" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}[{data__excludepackagedata_val_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if data__excludepackagedata_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must not contain "+str(data__excludepackagedata_keys)+" properties", value=data__excludepackagedata, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='additionalProperties') + data__excludepackagedata_len = len(data__excludepackagedata) + if data__excludepackagedata_len != 0: + data__excludepackagedata_property_names = True + for data__excludepackagedata_key in data__excludepackagedata: + try: + data__excludepackagedata_key_one_of_count4 = 0 + if data__excludepackagedata_key_one_of_count4 < 2: + try: + if isinstance(data__excludepackagedata_key, str): + if not custom_formats["python-module-name"](data__excludepackagedata_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be python-module-name", value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'format': 'python-module-name'}, rule='format') + data__excludepackagedata_key_one_of_count4 += 1 + except JsonSchemaValueException: pass + if data__excludepackagedata_key_one_of_count4 < 2: + try: + if data__excludepackagedata_key != "*": + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be same as const definition: *", value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'const': '*'}, rule='const') + data__excludepackagedata_key_one_of_count4 += 1 + except JsonSchemaValueException: pass + if data__excludepackagedata_key_one_of_count4 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be valid exactly by one definition" + (" (" + str(data__excludepackagedata_key_one_of_count4) + " matches found)"), value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, rule='oneOf') + except JsonSchemaValueException: + data__excludepackagedata_property_names = False + if not data__excludepackagedata_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be named by propertyName definition", value=data__excludepackagedata, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='propertyNames') + if "namespace-packages" in data_keys: + data_keys.remove("namespace-packages") + data__namespacepackages = data["namespace-packages"] + if not isinstance(data__namespacepackages, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".namespace-packages must be array", value=data__namespacepackages, name="" + (name_prefix or "data") + ".namespace-packages", definition={'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html'}, rule='type') + data__namespacepackages_is_list = isinstance(data__namespacepackages, (list, tuple)) + if data__namespacepackages_is_list: + data__namespacepackages_len = len(data__namespacepackages) + for data__namespacepackages_x, data__namespacepackages_item in enumerate(data__namespacepackages): + if not isinstance(data__namespacepackages_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + " must be string", value=data__namespacepackages_item, name="" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='type') + if isinstance(data__namespacepackages_item, str): + if not custom_formats["python-module-name"](data__namespacepackages_item): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + " must be python-module-name", value=data__namespacepackages_item, name="" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='format') + if "py-modules" in data_keys: + data_keys.remove("py-modules") + data__pymodules = data["py-modules"] + if not isinstance(data__pymodules, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".py-modules must be array", value=data__pymodules, name="" + (name_prefix or "data") + ".py-modules", definition={'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, rule='type') + data__pymodules_is_list = isinstance(data__pymodules, (list, tuple)) + if data__pymodules_is_list: + data__pymodules_len = len(data__pymodules) + for data__pymodules_x, data__pymodules_item in enumerate(data__pymodules): + if not isinstance(data__pymodules_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + " must be string", value=data__pymodules_item, name="" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='type') + if isinstance(data__pymodules_item, str): + if not custom_formats["python-module-name"](data__pymodules_item): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + " must be python-module-name", value=data__pymodules_item, name="" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='format') + if "data-files" in data_keys: + data_keys.remove("data-files") + data__datafiles = data["data-files"] + if not isinstance(data__datafiles, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".data-files must be object", value=data__datafiles, name="" + (name_prefix or "data") + ".data-files", definition={'$$description': ['**DEPRECATED**: dict-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', "Please notice this don't work with wheels. See `data files support", '`_'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='type') + data__datafiles_is_dict = isinstance(data__datafiles, dict) + if data__datafiles_is_dict: + data__datafiles_keys = set(data__datafiles.keys()) + for data__datafiles_key, data__datafiles_val in data__datafiles.items(): + if REGEX_PATTERNS['^.*$'].search(data__datafiles_key): + if data__datafiles_key in data__datafiles_keys: + data__datafiles_keys.remove(data__datafiles_key) + if not isinstance(data__datafiles_val, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".data-files.{data__datafiles_key}".format(**locals()) + " must be array", value=data__datafiles_val, name="" + (name_prefix or "data") + ".data-files.{data__datafiles_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__datafiles_val_is_list = isinstance(data__datafiles_val, (list, tuple)) + if data__datafiles_val_is_list: + data__datafiles_val_len = len(data__datafiles_val) + for data__datafiles_val_x, data__datafiles_val_item in enumerate(data__datafiles_val): + if not isinstance(data__datafiles_val_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".data-files.{data__datafiles_key}[{data__datafiles_val_x}]".format(**locals()) + " must be string", value=data__datafiles_val_item, name="" + (name_prefix or "data") + ".data-files.{data__datafiles_key}[{data__datafiles_val_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "cmdclass" in data_keys: + data_keys.remove("cmdclass") + data__cmdclass = data["cmdclass"] + if not isinstance(data__cmdclass, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".cmdclass must be object", value=data__cmdclass, name="" + (name_prefix or "data") + ".cmdclass", definition={'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', ' cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, rule='type') + data__cmdclass_is_dict = isinstance(data__cmdclass, dict) + if data__cmdclass_is_dict: + data__cmdclass_keys = set(data__cmdclass.keys()) + for data__cmdclass_key, data__cmdclass_val in data__cmdclass.items(): + if REGEX_PATTERNS['^.*$'].search(data__cmdclass_key): + if data__cmdclass_key in data__cmdclass_keys: + data__cmdclass_keys.remove(data__cmdclass_key) + if not isinstance(data__cmdclass_val, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + " must be string", value=data__cmdclass_val, name="" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'python-qualified-identifier'}, rule='type') + if isinstance(data__cmdclass_val, str): + if not custom_formats["python-qualified-identifier"](data__cmdclass_val): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + " must be python-qualified-identifier", value=data__cmdclass_val, name="" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'python-qualified-identifier'}, rule='format') + if "license-files" in data_keys: + data_keys.remove("license-files") + data__licensefiles = data["license-files"] + if not isinstance(data__licensefiles, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license-files must be array", value=data__licensefiles, name="" + (name_prefix or "data") + ".license-files", definition={'type': 'array', 'items': {'type': 'string'}, '$$description': ['PROVISIONAL: List of glob patterns for all license files being distributed.', '(might become standard with PEP 639).'], 'default': ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, rule='type') + data__licensefiles_is_list = isinstance(data__licensefiles, (list, tuple)) + if data__licensefiles_is_list: + data__licensefiles_len = len(data__licensefiles) + for data__licensefiles_x, data__licensefiles_item in enumerate(data__licensefiles): + if not isinstance(data__licensefiles_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license-files[{data__licensefiles_x}]".format(**locals()) + " must be string", value=data__licensefiles_item, name="" + (name_prefix or "data") + ".license-files[{data__licensefiles_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + else: data["license-files"] = ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'] + if "dynamic" in data_keys: + data_keys.remove("dynamic") + data__dynamic = data["dynamic"] + if not isinstance(data__dynamic, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must be object", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, 'readme': {'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}, rule='type') + data__dynamic_is_dict = isinstance(data__dynamic, dict) + if data__dynamic_is_dict: + data__dynamic_keys = set(data__dynamic.keys()) + if "version" in data__dynamic_keys: + data__dynamic_keys.remove("version") + data__dynamic__version = data__dynamic["version"] + data__dynamic__version_one_of_count5 = 0 + if data__dynamic__version_one_of_count5 < 2: + try: + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_attr_directive(data__dynamic__version, custom_formats, (name_prefix or "data") + ".dynamic.version") + data__dynamic__version_one_of_count5 += 1 + except JsonSchemaValueException: pass + if data__dynamic__version_one_of_count5 < 2: + try: + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__version, custom_formats, (name_prefix or "data") + ".dynamic.version") + data__dynamic__version_one_of_count5 += 1 + except JsonSchemaValueException: pass + if data__dynamic__version_one_of_count5 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.version must be valid exactly by one definition" + (" (" + str(data__dynamic__version_one_of_count5) + " matches found)"), value=data__dynamic__version, name="" + (name_prefix or "data") + ".dynamic.version", definition={'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, rule='oneOf') + if "classifiers" in data__dynamic_keys: + data__dynamic_keys.remove("classifiers") + data__dynamic__classifiers = data__dynamic["classifiers"] + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__classifiers, custom_formats, (name_prefix or "data") + ".dynamic.classifiers") + if "description" in data__dynamic_keys: + data__dynamic_keys.remove("description") + data__dynamic__description = data__dynamic["description"] + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__description, custom_formats, (name_prefix or "data") + ".dynamic.description") + if "dependencies" in data__dynamic_keys: + data__dynamic_keys.remove("dependencies") + data__dynamic__dependencies = data__dynamic["dependencies"] + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__dependencies, custom_formats, (name_prefix or "data") + ".dynamic.dependencies") + if "entry-points" in data__dynamic_keys: + data__dynamic_keys.remove("entry-points") + data__dynamic__entrypoints = data__dynamic["entry-points"] + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__entrypoints, custom_formats, (name_prefix or "data") + ".dynamic.entry-points") + if "optional-dependencies" in data__dynamic_keys: + data__dynamic_keys.remove("optional-dependencies") + data__dynamic__optionaldependencies = data__dynamic["optional-dependencies"] + if not isinstance(data__dynamic__optionaldependencies, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be object", value=data__dynamic__optionaldependencies, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, rule='type') + data__dynamic__optionaldependencies_is_dict = isinstance(data__dynamic__optionaldependencies, dict) + if data__dynamic__optionaldependencies_is_dict: + data__dynamic__optionaldependencies_keys = set(data__dynamic__optionaldependencies.keys()) + for data__dynamic__optionaldependencies_key, data__dynamic__optionaldependencies_val in data__dynamic__optionaldependencies.items(): + if REGEX_PATTERNS['.+'].search(data__dynamic__optionaldependencies_key): + if data__dynamic__optionaldependencies_key in data__dynamic__optionaldependencies_keys: + data__dynamic__optionaldependencies_keys.remove(data__dynamic__optionaldependencies_key) + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__optionaldependencies_val, custom_formats, (name_prefix or "data") + ".dynamic.optional-dependencies.{data__dynamic__optionaldependencies_key}") + if data__dynamic__optionaldependencies_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must not contain "+str(data__dynamic__optionaldependencies_keys)+" properties", value=data__dynamic__optionaldependencies, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, rule='additionalProperties') + data__dynamic__optionaldependencies_len = len(data__dynamic__optionaldependencies) + if data__dynamic__optionaldependencies_len != 0: + data__dynamic__optionaldependencies_property_names = True + for data__dynamic__optionaldependencies_key in data__dynamic__optionaldependencies: + try: + if isinstance(data__dynamic__optionaldependencies_key, str): + if not custom_formats["python-identifier"](data__dynamic__optionaldependencies_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be python-identifier", value=data__dynamic__optionaldependencies_key, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'format': 'python-identifier'}, rule='format') + except JsonSchemaValueException: + data__dynamic__optionaldependencies_property_names = False + if not data__dynamic__optionaldependencies_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be named by propertyName definition", value=data__dynamic__optionaldependencies, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, rule='propertyNames') + if "readme" in data__dynamic_keys: + data__dynamic_keys.remove("readme") + data__dynamic__readme = data__dynamic["readme"] + data__dynamic__readme_any_of_count6 = 0 + if not data__dynamic__readme_any_of_count6: + try: + validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data__dynamic__readme, custom_formats, (name_prefix or "data") + ".dynamic.readme") + data__dynamic__readme_any_of_count6 += 1 + except JsonSchemaValueException: pass + if not data__dynamic__readme_any_of_count6: + try: + data__dynamic__readme_is_dict = isinstance(data__dynamic__readme, dict) + if data__dynamic__readme_is_dict: + data__dynamic__readme_keys = set(data__dynamic__readme.keys()) + if "content-type" in data__dynamic__readme_keys: + data__dynamic__readme_keys.remove("content-type") + data__dynamic__readme__contenttype = data__dynamic__readme["content-type"] + if not isinstance(data__dynamic__readme__contenttype, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme.content-type must be string", value=data__dynamic__readme__contenttype, name="" + (name_prefix or "data") + ".dynamic.readme.content-type", definition={'type': 'string'}, rule='type') + data__dynamic__readme_any_of_count6 += 1 + except JsonSchemaValueException: pass + if not data__dynamic__readme_any_of_count6: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme cannot be validated by any definition", value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}, rule='anyOf') + data__dynamic__readme_is_dict = isinstance(data__dynamic__readme, dict) + if data__dynamic__readme_is_dict: + data__dynamic__readme_len = len(data__dynamic__readme) + if not all(prop in data__dynamic__readme for prop in ['file']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme must contain ['file'] properties", value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}, rule='required') + if data__dynamic_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must not contain "+str(data__dynamic_keys)+" properties", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, 'readme': {'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}, rule='additionalProperties') + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://setuptools.pypa.io/en/latest/references/keywords.html', 'title': '``tool.setuptools`` table', '$$description': ['Please notice for the time being the ``setuptools`` project does not specify', 'a way of configuring builds via ``pyproject.toml``.', 'Therefore this schema should be taken just as a *"thought experiment"* on how', 'this *might be done*, by following the principles established in', '`ini2toml `_.', 'It considers only ``setuptools`` `parameters', '`_', 'that can currently be configured via ``setup.cfg`` and are not covered by :pep:`621`', 'but intentionally excludes ``dependency_links`` and ``setup_requires``.', 'NOTE: ``scripts`` was renamed to ``script-files`` to avoid confusion with', 'entry-point based scripts (defined in :pep:`621`).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'description': 'Whether the project can be safely installed and run from a zip file.', 'type': 'boolean'}, 'script-files': {'description': 'Legacy way of defining scripts (entry-points are preferred).', 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}}, {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': ''}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'oneOf': [{'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['**DEPRECATED**: dict-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', "Please notice this don't work with wheels. See `data files support", '`_'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', ' cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['PROVISIONAL: List of glob patterns for all license files being distributed.', '(might become standard with PEP 639).'], 'default': ['LICEN[CS]E*', ' COPYING*', ' NOTICE*', 'AUTHORS*'], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'format': 'python-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}}}, 'readme': {'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'properties': {'content-type': {'type': 'string'}}}], 'required': ['file']}}}}, 'definitions': {'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}, rule='additionalProperties') + return data + +def validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_file_directive(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_len = len(data) + if not all(prop in data for prop in ['file']): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain ['file'] properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, rule='required') + data_keys = set(data.keys()) + if "file" in data_keys: + data_keys.remove("file") + data__file = data["file"] + data__file_one_of_count7 = 0 + if data__file_one_of_count7 < 2: + try: + if not isinstance(data__file, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".file must be string", value=data__file, name="" + (name_prefix or "data") + ".file", definition={'type': 'string'}, rule='type') + data__file_one_of_count7 += 1 + except JsonSchemaValueException: pass + if data__file_one_of_count7 < 2: + try: + if not isinstance(data__file, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".file must be array", value=data__file, name="" + (name_prefix or "data") + ".file", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__file_is_list = isinstance(data__file, (list, tuple)) + if data__file_is_list: + data__file_len = len(data__file) + for data__file_x, data__file_item in enumerate(data__file): + if not isinstance(data__file_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".file[{data__file_x}]".format(**locals()) + " must be string", value=data__file_item, name="" + (name_prefix or "data") + ".file[{data__file_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + data__file_one_of_count7 += 1 + except JsonSchemaValueException: pass + if data__file_one_of_count7 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".file must be valid exactly by one definition" + (" (" + str(data__file_one_of_count7) + " matches found)"), value=data__file, name="" + (name_prefix or "data") + ".file", definition={'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}, rule='oneOf') + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, rule='additionalProperties') + return data + +def validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_attr_directive(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_len = len(data) + if not all(prop in data for prop in ['attr']): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain ['attr'] properties", value=data, name="" + (name_prefix or "data") + "", definition={'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, rule='required') + data_keys = set(data.keys()) + if "attr" in data_keys: + data_keys.remove("attr") + data__attr = data["attr"] + if not isinstance(data__attr, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".attr must be string", value=data__attr, name="" + (name_prefix or "data") + ".attr", definition={'type': 'string'}, rule='type') + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string'}}, 'required': ['attr']}, rule='additionalProperties') + return data + +def validate_https___setuptools_pypa_io_en_latest_references_keywords_html__definitions_find_directive(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_keys = set(data.keys()) + if "find" in data_keys: + data_keys.remove("find") + data__find = data["find"] + if not isinstance(data__find, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find must be object", value=data__find, name="" + (name_prefix or "data") + ".find", definition={'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}, rule='type') + data__find_is_dict = isinstance(data__find, dict) + if data__find_is_dict: + data__find_keys = set(data__find.keys()) + if "where" in data__find_keys: + data__find_keys.remove("where") + data__find__where = data__find["where"] + if not isinstance(data__find__where, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.where must be array", value=data__find__where, name="" + (name_prefix or "data") + ".find.where", definition={'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, rule='type') + data__find__where_is_list = isinstance(data__find__where, (list, tuple)) + if data__find__where_is_list: + data__find__where_len = len(data__find__where) + for data__find__where_x, data__find__where_item in enumerate(data__find__where): + if not isinstance(data__find__where_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.where[{data__find__where_x}]".format(**locals()) + " must be string", value=data__find__where_item, name="" + (name_prefix or "data") + ".find.where[{data__find__where_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "exclude" in data__find_keys: + data__find_keys.remove("exclude") + data__find__exclude = data__find["exclude"] + if not isinstance(data__find__exclude, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.exclude must be array", value=data__find__exclude, name="" + (name_prefix or "data") + ".find.exclude", definition={'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, rule='type') + data__find__exclude_is_list = isinstance(data__find__exclude, (list, tuple)) + if data__find__exclude_is_list: + data__find__exclude_len = len(data__find__exclude) + for data__find__exclude_x, data__find__exclude_item in enumerate(data__find__exclude): + if not isinstance(data__find__exclude_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.exclude[{data__find__exclude_x}]".format(**locals()) + " must be string", value=data__find__exclude_item, name="" + (name_prefix or "data") + ".find.exclude[{data__find__exclude_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "include" in data__find_keys: + data__find_keys.remove("include") + data__find__include = data__find["include"] + if not isinstance(data__find__include, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.include must be array", value=data__find__include, name="" + (name_prefix or "data") + ".find.include", definition={'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, rule='type') + data__find__include_is_list = isinstance(data__find__include, (list, tuple)) + if data__find__include_is_list: + data__find__include_len = len(data__find__include) + for data__find__include_x, data__find__include_item in enumerate(data__find__include): + if not isinstance(data__find__include_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.include[{data__find__include_x}]".format(**locals()) + " must be string", value=data__find__include_item, name="" + (name_prefix or "data") + ".find.include[{data__find__include_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "namespaces" in data__find_keys: + data__find_keys.remove("namespaces") + data__find__namespaces = data__find["namespaces"] + if not isinstance(data__find__namespaces, (bool)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.namespaces must be boolean", value=data__find__namespaces, name="" + (name_prefix or "data") + ".find.namespaces", definition={'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}, rule='type') + if data__find_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".find must not contain "+str(data__find_keys)+" properties", value=data__find, name="" + (name_prefix or "data") + ".find", definition={'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}, rule='additionalProperties') + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}, rule='additionalProperties') + return data + +def validate_https___docs_python_org_3_install(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://docs.python.org/3/install/', 'title': '``tool.distutils`` table', '$$description': ['Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` scripts via `distutils configuration files', '`_.', '``tool.distutils`` subtables could be used with the same purpose', '(NOT CURRENTLY IMPLEMENTED).'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_keys = set(data.keys()) + if "global" in data_keys: + data_keys.remove("global") + data__global = data["global"] + if not isinstance(data__global, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".global must be object", value=data__global, name="" + (name_prefix or "data") + ".global", definition={'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}, rule='type') + for data_key, data_val in data.items(): + if REGEX_PATTERNS['.+'].search(data_key): + if data_key in data_keys: + data_keys.remove(data_key) + if not isinstance(data_val, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".{data_key}".format(**locals()) + " must be object", value=data_val, name="" + (name_prefix or "data") + ".{data_key}".format(**locals()) + "", definition={'type': 'object'}, rule='type') + return data + +def validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'gui-scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_len = len(data) + if not all(prop in data for prop in ['name']): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain ['name'] properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'gui-scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, rule='required') + data_keys = set(data.keys()) + if "name" in data_keys: + data_keys.remove("name") + data__name = data["name"] + if not isinstance(data__name, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".name must be string", value=data__name, name="" + (name_prefix or "data") + ".name", definition={'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, rule='type') + if isinstance(data__name, str): + if not custom_formats["pep508-identifier"](data__name): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".name must be pep508-identifier", value=data__name, name="" + (name_prefix or "data") + ".name", definition={'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, rule='format') + if "version" in data_keys: + data_keys.remove("version") + data__version = data["version"] + if not isinstance(data__version, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".version must be string", value=data__version, name="" + (name_prefix or "data") + ".version", definition={'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, rule='type') + if isinstance(data__version, str): + if not custom_formats["pep440"](data__version): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".version must be pep440", value=data__version, name="" + (name_prefix or "data") + ".version", definition={'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, rule='format') + if "description" in data_keys: + data_keys.remove("description") + data__description = data["description"] + if not isinstance(data__description, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".description must be string", value=data__description, name="" + (name_prefix or "data") + ".description", definition={'type': 'string', '$$description': ['The `summary description of the project', '`_']}, rule='type') + if "readme" in data_keys: + data_keys.remove("readme") + data__readme = data["readme"] + data__readme_one_of_count8 = 0 + if data__readme_one_of_count8 < 2: + try: + if not isinstance(data__readme, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must be string", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, rule='type') + data__readme_one_of_count8 += 1 + except JsonSchemaValueException: pass + if data__readme_one_of_count8 < 2: + try: + if not isinstance(data__readme, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must be object", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}, rule='type') + data__readme_any_of_count9 = 0 + if not data__readme_any_of_count9: + try: + data__readme_is_dict = isinstance(data__readme, dict) + if data__readme_is_dict: + data__readme_len = len(data__readme) + if not all(prop in data__readme for prop in ['file']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must contain ['file'] properties", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, rule='required') + data__readme_keys = set(data__readme.keys()) + if "file" in data__readme_keys: + data__readme_keys.remove("file") + data__readme__file = data__readme["file"] + if not isinstance(data__readme__file, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme.file must be string", value=data__readme__file, name="" + (name_prefix or "data") + ".readme.file", definition={'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}, rule='type') + data__readme_any_of_count9 += 1 + except JsonSchemaValueException: pass + if not data__readme_any_of_count9: + try: + data__readme_is_dict = isinstance(data__readme, dict) + if data__readme_is_dict: + data__readme_len = len(data__readme) + if not all(prop in data__readme for prop in ['text']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must contain ['text'] properties", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}, rule='required') + data__readme_keys = set(data__readme.keys()) + if "text" in data__readme_keys: + data__readme_keys.remove("text") + data__readme__text = data__readme["text"] + if not isinstance(data__readme__text, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme.text must be string", value=data__readme__text, name="" + (name_prefix or "data") + ".readme.text", definition={'type': 'string', 'description': 'Full text describing the project.'}, rule='type') + data__readme_any_of_count9 += 1 + except JsonSchemaValueException: pass + if not data__readme_any_of_count9: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme cannot be validated by any definition", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, rule='anyOf') + data__readme_is_dict = isinstance(data__readme, dict) + if data__readme_is_dict: + data__readme_len = len(data__readme) + if not all(prop in data__readme for prop in ['content-type']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must contain ['content-type'] properties", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}, rule='required') + data__readme_keys = set(data__readme.keys()) + if "content-type" in data__readme_keys: + data__readme_keys.remove("content-type") + data__readme__contenttype = data__readme["content-type"] + if not isinstance(data__readme__contenttype, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme.content-type must be string", value=data__readme__contenttype, name="" + (name_prefix or "data") + ".readme.content-type", definition={'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}, rule='type') + data__readme_one_of_count8 += 1 + except JsonSchemaValueException: pass + if data__readme_one_of_count8 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must be valid exactly by one definition" + (" (" + str(data__readme_one_of_count8) + " matches found)"), value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, rule='oneOf') + if "requires-python" in data_keys: + data_keys.remove("requires-python") + data__requirespython = data["requires-python"] + if not isinstance(data__requirespython, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".requires-python must be string", value=data__requirespython, name="" + (name_prefix or "data") + ".requires-python", definition={'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, rule='type') + if isinstance(data__requirespython, str): + if not custom_formats["pep508-versionspec"](data__requirespython): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".requires-python must be pep508-versionspec", value=data__requirespython, name="" + (name_prefix or "data") + ".requires-python", definition={'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, rule='format') + if "license" in data_keys: + data_keys.remove("license") + data__license = data["license"] + data__license_one_of_count10 = 0 + if data__license_one_of_count10 < 2: + try: + data__license_is_dict = isinstance(data__license, dict) + if data__license_is_dict: + data__license_len = len(data__license) + if not all(prop in data__license for prop in ['file']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license must contain ['file'] properties", value=data__license, name="" + (name_prefix or "data") + ".license", definition={'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, rule='required') + data__license_keys = set(data__license.keys()) + if "file" in data__license_keys: + data__license_keys.remove("file") + data__license__file = data__license["file"] + if not isinstance(data__license__file, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license.file must be string", value=data__license__file, name="" + (name_prefix or "data") + ".license.file", definition={'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}, rule='type') + data__license_one_of_count10 += 1 + except JsonSchemaValueException: pass + if data__license_one_of_count10 < 2: + try: + data__license_is_dict = isinstance(data__license, dict) + if data__license_is_dict: + data__license_len = len(data__license) + if not all(prop in data__license for prop in ['text']): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license must contain ['text'] properties", value=data__license, name="" + (name_prefix or "data") + ".license", definition={'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}, rule='required') + data__license_keys = set(data__license.keys()) + if "text" in data__license_keys: + data__license_keys.remove("text") + data__license__text = data__license["text"] + if not isinstance(data__license__text, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license.text must be string", value=data__license__text, name="" + (name_prefix or "data") + ".license.text", definition={'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}, rule='type') + data__license_one_of_count10 += 1 + except JsonSchemaValueException: pass + if data__license_one_of_count10 != 1: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".license must be valid exactly by one definition" + (" (" + str(data__license_one_of_count10) + " matches found)"), value=data__license, name="" + (name_prefix or "data") + ".license", definition={'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, rule='oneOf') + if "authors" in data_keys: + data_keys.remove("authors") + data__authors = data["authors"] + if not isinstance(data__authors, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".authors must be array", value=data__authors, name="" + (name_prefix or "data") + ".authors", definition={'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, rule='type') + data__authors_is_list = isinstance(data__authors, (list, tuple)) + if data__authors_is_list: + data__authors_len = len(data__authors) + for data__authors_x, data__authors_item in enumerate(data__authors): + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_author(data__authors_item, custom_formats, (name_prefix or "data") + ".authors[{data__authors_x}]") + if "maintainers" in data_keys: + data_keys.remove("maintainers") + data__maintainers = data["maintainers"] + if not isinstance(data__maintainers, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".maintainers must be array", value=data__maintainers, name="" + (name_prefix or "data") + ".maintainers", definition={'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, rule='type') + data__maintainers_is_list = isinstance(data__maintainers, (list, tuple)) + if data__maintainers_is_list: + data__maintainers_len = len(data__maintainers) + for data__maintainers_x, data__maintainers_item in enumerate(data__maintainers): + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_author(data__maintainers_item, custom_formats, (name_prefix or "data") + ".maintainers[{data__maintainers_x}]") + if "keywords" in data_keys: + data_keys.remove("keywords") + data__keywords = data["keywords"] + if not isinstance(data__keywords, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".keywords must be array", value=data__keywords, name="" + (name_prefix or "data") + ".keywords", definition={'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, rule='type') + data__keywords_is_list = isinstance(data__keywords, (list, tuple)) + if data__keywords_is_list: + data__keywords_len = len(data__keywords) + for data__keywords_x, data__keywords_item in enumerate(data__keywords): + if not isinstance(data__keywords_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".keywords[{data__keywords_x}]".format(**locals()) + " must be string", value=data__keywords_item, name="" + (name_prefix or "data") + ".keywords[{data__keywords_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type') + if "classifiers" in data_keys: + data_keys.remove("classifiers") + data__classifiers = data["classifiers"] + if not isinstance(data__classifiers, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".classifiers must be array", value=data__classifiers, name="" + (name_prefix or "data") + ".classifiers", definition={'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, rule='type') + data__classifiers_is_list = isinstance(data__classifiers, (list, tuple)) + if data__classifiers_is_list: + data__classifiers_len = len(data__classifiers) + for data__classifiers_x, data__classifiers_item in enumerate(data__classifiers): + if not isinstance(data__classifiers_item, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + " must be string", value=data__classifiers_item, name="" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, rule='type') + if isinstance(data__classifiers_item, str): + if not custom_formats["trove-classifier"](data__classifiers_item): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + " must be trove-classifier", value=data__classifiers_item, name="" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, rule='format') + if "urls" in data_keys: + data_keys.remove("urls") + data__urls = data["urls"] + if not isinstance(data__urls, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls must be object", value=data__urls, name="" + (name_prefix or "data") + ".urls", definition={'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, rule='type') + data__urls_is_dict = isinstance(data__urls, dict) + if data__urls_is_dict: + data__urls_keys = set(data__urls.keys()) + for data__urls_key, data__urls_val in data__urls.items(): + if REGEX_PATTERNS['^.+$'].search(data__urls_key): + if data__urls_key in data__urls_keys: + data__urls_keys.remove(data__urls_key) + if not isinstance(data__urls_val, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + " must be string", value=data__urls_val, name="" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'url'}, rule='type') + if isinstance(data__urls_val, str): + if not custom_formats["url"](data__urls_val): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + " must be url", value=data__urls_val, name="" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'url'}, rule='format') + if data__urls_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls must not contain "+str(data__urls_keys)+" properties", value=data__urls, name="" + (name_prefix or "data") + ".urls", definition={'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, rule='additionalProperties') + if "scripts" in data_keys: + data_keys.remove("scripts") + data__scripts = data["scripts"] + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_entry_point_group(data__scripts, custom_formats, (name_prefix or "data") + ".scripts") + if "gui-scripts" in data_keys: + data_keys.remove("gui-scripts") + data__guiscripts = data["gui-scripts"] + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_entry_point_group(data__guiscripts, custom_formats, (name_prefix or "data") + ".gui-scripts") + if "entry-points" in data_keys: + data_keys.remove("entry-points") + data__entrypoints = data["entry-points"] + data__entrypoints_is_dict = isinstance(data__entrypoints, dict) + if data__entrypoints_is_dict: + data__entrypoints_keys = set(data__entrypoints.keys()) + for data__entrypoints_key, data__entrypoints_val in data__entrypoints.items(): + if REGEX_PATTERNS['^.+$'].search(data__entrypoints_key): + if data__entrypoints_key in data__entrypoints_keys: + data__entrypoints_keys.remove(data__entrypoints_key) + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_entry_point_group(data__entrypoints_val, custom_formats, (name_prefix or "data") + ".entry-points.{data__entrypoints_key}") + if data__entrypoints_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".entry-points must not contain "+str(data__entrypoints_keys)+" properties", value=data__entrypoints, name="" + (name_prefix or "data") + ".entry-points", definition={'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, rule='additionalProperties') + data__entrypoints_len = len(data__entrypoints) + if data__entrypoints_len != 0: + data__entrypoints_property_names = True + for data__entrypoints_key in data__entrypoints: + try: + if isinstance(data__entrypoints_key, str): + if not custom_formats["python-entrypoint-group"](data__entrypoints_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".entry-points must be python-entrypoint-group", value=data__entrypoints_key, name="" + (name_prefix or "data") + ".entry-points", definition={'format': 'python-entrypoint-group'}, rule='format') + except JsonSchemaValueException: + data__entrypoints_property_names = False + if not data__entrypoints_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".entry-points must be named by propertyName definition", value=data__entrypoints, name="" + (name_prefix or "data") + ".entry-points", definition={'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, rule='propertyNames') + if "dependencies" in data_keys: + data_keys.remove("dependencies") + data__dependencies = data["dependencies"] + if not isinstance(data__dependencies, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dependencies must be array", value=data__dependencies, name="" + (name_prefix or "data") + ".dependencies", definition={'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, rule='type') + data__dependencies_is_list = isinstance(data__dependencies, (list, tuple)) + if data__dependencies_is_list: + data__dependencies_len = len(data__dependencies) + for data__dependencies_x, data__dependencies_item in enumerate(data__dependencies): + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_dependency(data__dependencies_item, custom_formats, (name_prefix or "data") + ".dependencies[{data__dependencies_x}]") + if "optional-dependencies" in data_keys: + data_keys.remove("optional-dependencies") + data__optionaldependencies = data["optional-dependencies"] + if not isinstance(data__optionaldependencies, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must be object", value=data__optionaldependencies, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='type') + data__optionaldependencies_is_dict = isinstance(data__optionaldependencies, dict) + if data__optionaldependencies_is_dict: + data__optionaldependencies_keys = set(data__optionaldependencies.keys()) + for data__optionaldependencies_key, data__optionaldependencies_val in data__optionaldependencies.items(): + if REGEX_PATTERNS['^.+$'].search(data__optionaldependencies_key): + if data__optionaldependencies_key in data__optionaldependencies_keys: + data__optionaldependencies_keys.remove(data__optionaldependencies_key) + if not isinstance(data__optionaldependencies_val, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies.{data__optionaldependencies_key}".format(**locals()) + " must be array", value=data__optionaldependencies_val, name="" + (name_prefix or "data") + ".optional-dependencies.{data__optionaldependencies_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, rule='type') + data__optionaldependencies_val_is_list = isinstance(data__optionaldependencies_val, (list, tuple)) + if data__optionaldependencies_val_is_list: + data__optionaldependencies_val_len = len(data__optionaldependencies_val) + for data__optionaldependencies_val_x, data__optionaldependencies_val_item in enumerate(data__optionaldependencies_val): + validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_dependency(data__optionaldependencies_val_item, custom_formats, (name_prefix or "data") + ".optional-dependencies.{data__optionaldependencies_key}[{data__optionaldependencies_val_x}]") + if data__optionaldependencies_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must not contain "+str(data__optionaldependencies_keys)+" properties", value=data__optionaldependencies, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='additionalProperties') + data__optionaldependencies_len = len(data__optionaldependencies) + if data__optionaldependencies_len != 0: + data__optionaldependencies_property_names = True + for data__optionaldependencies_key in data__optionaldependencies: + try: + if isinstance(data__optionaldependencies_key, str): + if not custom_formats["pep508-identifier"](data__optionaldependencies_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must be pep508-identifier", value=data__optionaldependencies_key, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'format': 'pep508-identifier'}, rule='format') + except JsonSchemaValueException: + data__optionaldependencies_property_names = False + if not data__optionaldependencies_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must be named by propertyName definition", value=data__optionaldependencies, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='propertyNames') + if "dynamic" in data_keys: + data_keys.remove("dynamic") + data__dynamic = data["dynamic"] + if not isinstance(data__dynamic, (list, tuple)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must be array", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}, rule='type') + data__dynamic_is_list = isinstance(data__dynamic, (list, tuple)) + if data__dynamic_is_list: + data__dynamic_len = len(data__dynamic) + for data__dynamic_x, data__dynamic_item in enumerate(data__dynamic): + if data__dynamic_item not in ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic[{data__dynamic_x}]".format(**locals()) + " must be one of ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']", value=data__dynamic_item, name="" + (name_prefix or "data") + ".dynamic[{data__dynamic_x}]".format(**locals()) + "", definition={'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}, rule='enum') + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-project-metadata/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'gui-scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, rule='additionalProperties') + try: + try: + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_len = len(data) + if not all(prop in data for prop in ['dynamic']): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain ['dynamic'] properties", value=data, name="" + (name_prefix or "data") + "", definition={'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, rule='required') + data_keys = set(data.keys()) + if "dynamic" in data_keys: + data_keys.remove("dynamic") + data__dynamic = data["dynamic"] + data__dynamic_is_list = isinstance(data__dynamic, (list, tuple)) + if data__dynamic_is_list: + data__dynamic_contains = False + for data__dynamic_key in data__dynamic: + try: + if data__dynamic_key != "version": + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must be same as const definition: version", value=data__dynamic_key, name="" + (name_prefix or "data") + ".dynamic", definition={'const': 'version'}, rule='const') + data__dynamic_contains = True + break + except JsonSchemaValueException: pass + if not data__dynamic_contains: + raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must contain one of contains definition", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}, rule='contains') + except JsonSchemaValueException: pass + else: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must NOT match a disallowed definition", value=data, name="" + (name_prefix or "data") + "", definition={'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', ' If the core metadata specification lists a field as "Required", then', ' the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', ' The required fields are: Metadata-Version, Name, Version.', ' All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, rule='not') + except JsonSchemaValueException: + pass + else: + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_len = len(data) + if not all(prop in data for prop in ['version']): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain ['version'] properties", value=data, name="" + (name_prefix or "data") + "", definition={'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, rule='required') + return data + +def validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_dependency(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be string", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}, rule='type') + if isinstance(data, str): + if not custom_formats["pep508"](data): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be pep508", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}, rule='format') + return data + +def validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_entry_point_group(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_keys = set(data.keys()) + for data_key, data_val in data.items(): + if REGEX_PATTERNS['^.+$'].search(data_key): + if data_key in data_keys: + data_keys.remove(data_key) + if not isinstance(data_val, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".{data_key}".format(**locals()) + " must be string", value=data_val, name="" + (name_prefix or "data") + ".{data_key}".format(**locals()) + "", definition={'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}, rule='type') + if isinstance(data_val, str): + if not custom_formats["python-entrypoint-reference"](data_val): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".{data_key}".format(**locals()) + " must be python-entrypoint-reference", value=data_val, name="" + (name_prefix or "data") + ".{data_key}".format(**locals()) + "", definition={'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}, rule='format') + if data_keys: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, rule='additionalProperties') + data_len = len(data) + if data_len != 0: + data_property_names = True + for data_key in data: + try: + if isinstance(data_key, str): + if not custom_formats["python-entrypoint-name"](data_key): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be python-entrypoint-name", value=data_key, name="" + (name_prefix or "data") + "", definition={'format': 'python-entrypoint-name'}, rule='format') + except JsonSchemaValueException: + data_property_names = False + if not data_property_names: + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be named by propertyName definition", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, rule='propertyNames') + return data + +def validate_https___packaging_python_org_en_latest_specifications_declaring_project_metadata___definitions_author(data, custom_formats={}, name_prefix=None): + if not isinstance(data, (dict)): + raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://www.python.org/dev/peps/pep-0621/#authors-maintainers', 'type': 'object', 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, rule='type') + data_is_dict = isinstance(data, dict) + if data_is_dict: + data_keys = set(data.keys()) + if "name" in data_keys: + data_keys.remove("name") + data__name = data["name"] + if not isinstance(data__name, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".name must be string", value=data__name, name="" + (name_prefix or "data") + ".name", definition={'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, rule='type') + if "email" in data_keys: + data_keys.remove("email") + data__email = data["email"] + if not isinstance(data__email, (str)): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".email must be string", value=data__email, name="" + (name_prefix or "data") + ".email", definition={'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}, rule='type') + if isinstance(data__email, str): + if not REGEX_PATTERNS["idn-email_re_pattern"].match(data__email): + raise JsonSchemaValueException("" + (name_prefix or "data") + ".email must be idn-email", value=data__email, name="" + (name_prefix or "data") + ".email", definition={'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}, rule='format') + return data \ No newline at end of file diff --git a/libs/common/setuptools/config/_validate_pyproject/formats.py b/libs/common/setuptools/config/_validate_pyproject/formats.py new file mode 100644 index 00000000..638ac119 --- /dev/null +++ b/libs/common/setuptools/config/_validate_pyproject/formats.py @@ -0,0 +1,259 @@ +import logging +import os +import re +import string +import typing +from itertools import chain as _chain + +_logger = logging.getLogger(__name__) + +# ------------------------------------------------------------------------------------- +# PEP 440 + +VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+VERSION_REGEX = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.X | re.I)
+
+
+def pep440(version: str) -> bool:
+    return VERSION_REGEX.match(version) is not None
+
+
+# -------------------------------------------------------------------------------------
+# PEP 508
+
+PEP508_IDENTIFIER_PATTERN = r"([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])"
+PEP508_IDENTIFIER_REGEX = re.compile(f"^{PEP508_IDENTIFIER_PATTERN}$", re.I)
+
+
+def pep508_identifier(name: str) -> bool:
+    return PEP508_IDENTIFIER_REGEX.match(name) is not None
+
+
+try:
+    try:
+        from packaging import requirements as _req
+    except ImportError:  # pragma: no cover
+        # let's try setuptools vendored version
+        from setuptools._vendor.packaging import requirements as _req  # type: ignore
+
+    def pep508(value: str) -> bool:
+        try:
+            _req.Requirement(value)
+            return True
+        except _req.InvalidRequirement:
+            return False
+
+except ImportError:  # pragma: no cover
+    _logger.warning(
+        "Could not find an installation of `packaging`. Requirements, dependencies and "
+        "versions might not be validated. "
+        "To enforce validation, please install `packaging`."
+    )
+
+    def pep508(value: str) -> bool:
+        return True
+
+
+def pep508_versionspec(value: str) -> bool:
+    """Expression that can be used to specify/lock versions (including ranges)"""
+    if any(c in value for c in (";", "]", "@")):
+        # In PEP 508:
+        # conditional markers, extras and URL specs are not included in the
+        # versionspec
+        return False
+    # Let's pretend we have a dependency called `requirement` with the given
+    # version spec, then we can re-use the pep508 function for validation:
+    return pep508(f"requirement{value}")
+
+
+# -------------------------------------------------------------------------------------
+# PEP 517
+
+
+def pep517_backend_reference(value: str) -> bool:
+    module, _, obj = value.partition(":")
+    identifiers = (i.strip() for i in _chain(module.split("."), obj.split(".")))
+    return all(python_identifier(i) for i in identifiers if i)
+
+
+# -------------------------------------------------------------------------------------
+# Classifiers - PEP 301
+
+
+def _download_classifiers() -> str:
+    import ssl
+    from email.message import Message
+    from urllib.request import urlopen
+
+    url = "https://pypi.org/pypi?:action=list_classifiers"
+    context = ssl.create_default_context()
+    with urlopen(url, context=context) as response:
+        headers = Message()
+        headers["content_type"] = response.getheader("content-type", "text/plain")
+        return response.read().decode(headers.get_param("charset", "utf-8"))
+
+
+class _TroveClassifier:
+    """The ``trove_classifiers`` package is the official way of validating classifiers,
+    however this package might not be always available.
+    As a workaround we can still download a list from PyPI.
+    We also don't want to be over strict about it, so simply skipping silently is an
+    option (classifiers will be validated anyway during the upload to PyPI).
+    """
+
+    def __init__(self):
+        self.downloaded: typing.Union[None, False, typing.Set[str]] = None
+        self._skip_download = False
+        # None => not cached yet
+        # False => cache not available
+        self.__name__ = "trove_classifier"  # Emulate a public function
+
+    def _disable_download(self):
+        # This is a private API. Only setuptools has the consent of using it.
+        self._skip_download = True
+
+    def __call__(self, value: str) -> bool:
+        if self.downloaded is False or self._skip_download is True:
+            return True
+
+        if os.getenv("NO_NETWORK") or os.getenv("VALIDATE_PYPROJECT_NO_NETWORK"):
+            self.downloaded = False
+            msg = (
+                "Install ``trove-classifiers`` to ensure proper validation. "
+                "Skipping download of classifiers list from PyPI (NO_NETWORK)."
+            )
+            _logger.debug(msg)
+            return True
+
+        if self.downloaded is None:
+            msg = (
+                "Install ``trove-classifiers`` to ensure proper validation. "
+                "Meanwhile a list of classifiers will be downloaded from PyPI."
+            )
+            _logger.debug(msg)
+            try:
+                self.downloaded = set(_download_classifiers().splitlines())
+            except Exception:
+                self.downloaded = False
+                _logger.debug("Problem with download, skipping validation")
+                return True
+
+        return value in self.downloaded or value.lower().startswith("private ::")
+
+
+try:
+    from trove_classifiers import classifiers as _trove_classifiers
+
+    def trove_classifier(value: str) -> bool:
+        return value in _trove_classifiers or value.lower().startswith("private ::")
+
+except ImportError:  # pragma: no cover
+    trove_classifier = _TroveClassifier()
+
+
+# -------------------------------------------------------------------------------------
+# Non-PEP related
+
+
+def url(value: str) -> bool:
+    from urllib.parse import urlparse
+
+    try:
+        parts = urlparse(value)
+        if not parts.scheme:
+            _logger.warning(
+                "For maximum compatibility please make sure to include a "
+                "`scheme` prefix in your URL (e.g. 'http://'). "
+                f"Given value: {value}"
+            )
+            if not (value.startswith("/") or value.startswith("\\") or "@" in value):
+                parts = urlparse(f"http://{value}")
+
+        return bool(parts.scheme and parts.netloc)
+    except Exception:
+        return False
+
+
+# https://packaging.python.org/specifications/entry-points/
+ENTRYPOINT_PATTERN = r"[^\[\s=]([^=]*[^\s=])?"
+ENTRYPOINT_REGEX = re.compile(f"^{ENTRYPOINT_PATTERN}$", re.I)
+RECOMMEDED_ENTRYPOINT_PATTERN = r"[\w.-]+"
+RECOMMEDED_ENTRYPOINT_REGEX = re.compile(f"^{RECOMMEDED_ENTRYPOINT_PATTERN}$", re.I)
+ENTRYPOINT_GROUP_PATTERN = r"\w+(\.\w+)*"
+ENTRYPOINT_GROUP_REGEX = re.compile(f"^{ENTRYPOINT_GROUP_PATTERN}$", re.I)
+
+
+def python_identifier(value: str) -> bool:
+    return value.isidentifier()
+
+
+def python_qualified_identifier(value: str) -> bool:
+    if value.startswith(".") or value.endswith("."):
+        return False
+    return all(python_identifier(m) for m in value.split("."))
+
+
+def python_module_name(value: str) -> bool:
+    return python_qualified_identifier(value)
+
+
+def python_entrypoint_group(value: str) -> bool:
+    return ENTRYPOINT_GROUP_REGEX.match(value) is not None
+
+
+def python_entrypoint_name(value: str) -> bool:
+    if not ENTRYPOINT_REGEX.match(value):
+        return False
+    if not RECOMMEDED_ENTRYPOINT_REGEX.match(value):
+        msg = f"Entry point `{value}` does not follow recommended pattern: "
+        msg += RECOMMEDED_ENTRYPOINT_PATTERN
+        _logger.warning(msg)
+    return True
+
+
+def python_entrypoint_reference(value: str) -> bool:
+    module, _, rest = value.partition(":")
+    if "[" in rest:
+        obj, _, extras_ = rest.partition("[")
+        if extras_.strip()[-1] != "]":
+            return False
+        extras = (x.strip() for x in extras_.strip(string.whitespace + "[]").split(","))
+        if not all(pep508_identifier(e) for e in extras):
+            return False
+        _logger.warning(f"`{value}` - using extras for entry points is not recommended")
+    else:
+        obj = rest
+
+    module_parts = module.split(".")
+    identifiers = _chain(module_parts, obj.split(".")) if rest else module_parts
+    return all(python_identifier(i.strip()) for i in identifiers)
diff --git a/libs/common/setuptools/config/expand.py b/libs/common/setuptools/config/expand.py
new file mode 100644
index 00000000..c8db2c4b
--- /dev/null
+++ b/libs/common/setuptools/config/expand.py
@@ -0,0 +1,462 @@
+"""Utility functions to expand configuration directives or special values
+(such glob patterns).
+
+We can split the process of interpreting configuration files into 2 steps:
+
+1. The parsing the file contents from strings to value objects
+   that can be understand by Python (for example a string with a comma
+   separated list of keywords into an actual Python list of strings).
+
+2. The expansion (or post-processing) of these values according to the
+   semantics ``setuptools`` assign to them (for example a configuration field
+   with the ``file:`` directive should be expanded from a list of file paths to
+   a single string with the contents of those files concatenated)
+
+This module focus on the second step, and therefore allow sharing the expansion
+functions among several configuration file formats.
+
+**PRIVATE MODULE**: API reserved for setuptools internal usage only.
+"""
+import ast
+import importlib
+import io
+import os
+import pathlib
+import sys
+import warnings
+from glob import iglob
+from configparser import ConfigParser
+from importlib.machinery import ModuleSpec
+from itertools import chain
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    cast
+)
+from pathlib import Path
+from types import ModuleType
+
+from distutils.errors import DistutilsOptionError
+
+from .._path import same_path as _same_path
+
+if TYPE_CHECKING:
+    from setuptools.dist import Distribution  # noqa
+    from setuptools.discovery import ConfigDiscovery  # noqa
+    from distutils.dist import DistributionMetadata  # noqa
+
+chain_iter = chain.from_iterable
+_Path = Union[str, os.PathLike]
+_K = TypeVar("_K")
+_V = TypeVar("_V", covariant=True)
+
+
+class StaticModule:
+    """Proxy to a module object that avoids executing arbitrary code."""
+
+    def __init__(self, name: str, spec: ModuleSpec):
+        module = ast.parse(pathlib.Path(spec.origin).read_bytes())
+        vars(self).update(locals())
+        del self.self
+
+    def _find_assignments(self) -> Iterator[Tuple[ast.AST, ast.AST]]:
+        for statement in self.module.body:
+            if isinstance(statement, ast.Assign):
+                yield from ((target, statement.value) for target in statement.targets)
+            elif isinstance(statement, ast.AnnAssign) and statement.value:
+                yield (statement.target, statement.value)
+
+    def __getattr__(self, attr):
+        """Attempt to load an attribute "statically", via :func:`ast.literal_eval`."""
+        try:
+            return next(
+                ast.literal_eval(value)
+                for target, value in self._find_assignments()
+                if isinstance(target, ast.Name) and target.id == attr
+            )
+        except Exception as e:
+            raise AttributeError(f"{self.name} has no attribute {attr}") from e
+
+
+def glob_relative(
+    patterns: Iterable[str], root_dir: Optional[_Path] = None
+) -> List[str]:
+    """Expand the list of glob patterns, but preserving relative paths.
+
+    :param list[str] patterns: List of glob patterns
+    :param str root_dir: Path to which globs should be relative
+                         (current directory by default)
+    :rtype: list
+    """
+    glob_characters = {'*', '?', '[', ']', '{', '}'}
+    expanded_values = []
+    root_dir = root_dir or os.getcwd()
+    for value in patterns:
+
+        # Has globby characters?
+        if any(char in value for char in glob_characters):
+            # then expand the glob pattern while keeping paths *relative*:
+            glob_path = os.path.abspath(os.path.join(root_dir, value))
+            expanded_values.extend(sorted(
+                os.path.relpath(path, root_dir).replace(os.sep, "/")
+                for path in iglob(glob_path, recursive=True)))
+
+        else:
+            # take the value as-is
+            path = os.path.relpath(value, root_dir).replace(os.sep, "/")
+            expanded_values.append(path)
+
+    return expanded_values
+
+
+def read_files(filepaths: Union[str, bytes, Iterable[_Path]], root_dir=None) -> str:
+    """Return the content of the files concatenated using ``\n`` as str
+
+    This function is sandboxed and won't reach anything outside ``root_dir``
+
+    (By default ``root_dir`` is the current directory).
+    """
+    from setuptools.extern.more_itertools import always_iterable
+
+    root_dir = os.path.abspath(root_dir or os.getcwd())
+    _filepaths = (os.path.join(root_dir, path) for path in always_iterable(filepaths))
+    return '\n'.join(
+        _read_file(path)
+        for path in _filter_existing_files(_filepaths)
+        if _assert_local(path, root_dir)
+    )
+
+
+def _filter_existing_files(filepaths: Iterable[_Path]) -> Iterator[_Path]:
+    for path in filepaths:
+        if os.path.isfile(path):
+            yield path
+        else:
+            warnings.warn(f"File {path!r} cannot be found")
+
+
+def _read_file(filepath: Union[bytes, _Path]) -> str:
+    with io.open(filepath, encoding='utf-8') as f:
+        return f.read()
+
+
+def _assert_local(filepath: _Path, root_dir: str):
+    if Path(os.path.abspath(root_dir)) not in Path(os.path.abspath(filepath)).parents:
+        msg = f"Cannot access {filepath!r} (or anything outside {root_dir!r})"
+        raise DistutilsOptionError(msg)
+
+    return True
+
+
+def read_attr(
+    attr_desc: str,
+    package_dir: Optional[Mapping[str, str]] = None,
+    root_dir: Optional[_Path] = None
+):
+    """Reads the value of an attribute from a module.
+
+    This function will try to read the attributed statically first
+    (via :func:`ast.literal_eval`), and only evaluate the module if it fails.
+
+    Examples:
+        read_attr("package.attr")
+        read_attr("package.module.attr")
+
+    :param str attr_desc: Dot-separated string describing how to reach the
+        attribute (see examples above)
+    :param dict[str, str] package_dir: Mapping of package names to their
+        location in disk (represented by paths relative to ``root_dir``).
+    :param str root_dir: Path to directory containing all the packages in
+        ``package_dir`` (current directory by default).
+    :rtype: str
+    """
+    root_dir = root_dir or os.getcwd()
+    attrs_path = attr_desc.strip().split('.')
+    attr_name = attrs_path.pop()
+    module_name = '.'.join(attrs_path)
+    module_name = module_name or '__init__'
+    _parent_path, path, module_name = _find_module(module_name, package_dir, root_dir)
+    spec = _find_spec(module_name, path)
+
+    try:
+        return getattr(StaticModule(module_name, spec), attr_name)
+    except Exception:
+        # fallback to evaluate module
+        module = _load_spec(spec, module_name)
+        return getattr(module, attr_name)
+
+
+def _find_spec(module_name: str, module_path: Optional[_Path]) -> ModuleSpec:
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+    spec = spec or importlib.util.find_spec(module_name)
+
+    if spec is None:
+        raise ModuleNotFoundError(module_name)
+
+    return spec
+
+
+def _load_spec(spec: ModuleSpec, module_name: str) -> ModuleType:
+    name = getattr(spec, "__name__", module_name)
+    if name in sys.modules:
+        return sys.modules[name]
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[name] = module  # cache (it also ensures `==` works on loaded items)
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+def _find_module(
+    module_name: str, package_dir: Optional[Mapping[str, str]], root_dir: _Path
+) -> Tuple[_Path, Optional[str], str]:
+    """Given a module (that could normally be imported by ``module_name``
+    after the build is complete), find the path to the parent directory where
+    it is contained and the canonical name that could be used to import it
+    considering the ``package_dir`` in the build configuration and ``root_dir``
+    """
+    parent_path = root_dir
+    module_parts = module_name.split('.')
+    if package_dir:
+        if module_parts[0] in package_dir:
+            # A custom path was specified for the module we want to import
+            custom_path = package_dir[module_parts[0]]
+            parts = custom_path.rsplit('/', 1)
+            if len(parts) > 1:
+                parent_path = os.path.join(root_dir, parts[0])
+                parent_module = parts[1]
+            else:
+                parent_module = custom_path
+            module_name = ".".join([parent_module, *module_parts[1:]])
+        elif '' in package_dir:
+            # A custom parent directory was specified for all root modules
+            parent_path = os.path.join(root_dir, package_dir[''])
+
+    path_start = os.path.join(parent_path, *module_name.split("."))
+    candidates = chain(
+        (f"{path_start}.py", os.path.join(path_start, "__init__.py")),
+        iglob(f"{path_start}.*")
+    )
+    module_path = next((x for x in candidates if os.path.isfile(x)), None)
+    return parent_path, module_path, module_name
+
+
+def resolve_class(
+    qualified_class_name: str,
+    package_dir: Optional[Mapping[str, str]] = None,
+    root_dir: Optional[_Path] = None
+) -> Callable:
+    """Given a qualified class name, return the associated class object"""
+    root_dir = root_dir or os.getcwd()
+    idx = qualified_class_name.rfind('.')
+    class_name = qualified_class_name[idx + 1 :]
+    pkg_name = qualified_class_name[:idx]
+
+    _parent_path, path, module_name = _find_module(pkg_name, package_dir, root_dir)
+    module = _load_spec(_find_spec(module_name, path), module_name)
+    return getattr(module, class_name)
+
+
+def cmdclass(
+    values: Dict[str, str],
+    package_dir: Optional[Mapping[str, str]] = None,
+    root_dir: Optional[_Path] = None
+) -> Dict[str, Callable]:
+    """Given a dictionary mapping command names to strings for qualified class
+    names, apply :func:`resolve_class` to the dict values.
+    """
+    return {k: resolve_class(v, package_dir, root_dir) for k, v in values.items()}
+
+
+def find_packages(
+    *,
+    namespaces=True,
+    fill_package_dir: Optional[Dict[str, str]] = None,
+    root_dir: Optional[_Path] = None,
+    **kwargs
+) -> List[str]:
+    """Works similarly to :func:`setuptools.find_packages`, but with all
+    arguments given as keyword arguments. Moreover, ``where`` can be given
+    as a list (the results will be simply concatenated).
+
+    When the additional keyword argument ``namespaces`` is ``True``, it will
+    behave like :func:`setuptools.find_namespace_packages`` (i.e. include
+    implicit namespaces as per :pep:`420`).
+
+    The ``where`` argument will be considered relative to ``root_dir`` (or the current
+    working directory when ``root_dir`` is not given).
+
+    If the ``fill_package_dir`` argument is passed, this function will consider it as a
+    similar data structure to the ``package_dir`` configuration parameter add fill-in
+    any missing package location.
+
+    :rtype: list
+    """
+    from setuptools.discovery import construct_package_dir
+    from setuptools.extern.more_itertools import unique_everseen, always_iterable
+
+    if namespaces:
+        from setuptools.discovery import PEP420PackageFinder as PackageFinder
+    else:
+        from setuptools.discovery import PackageFinder  # type: ignore
+
+    root_dir = root_dir or os.curdir
+    where = kwargs.pop('where', ['.'])
+    packages: List[str] = []
+    fill_package_dir = {} if fill_package_dir is None else fill_package_dir
+    search = list(unique_everseen(always_iterable(where)))
+
+    if len(search) == 1 and all(not _same_path(search[0], x) for x in (".", root_dir)):
+        fill_package_dir.setdefault("", search[0])
+
+    for path in search:
+        package_path = _nest_path(root_dir, path)
+        pkgs = PackageFinder.find(package_path, **kwargs)
+        packages.extend(pkgs)
+        if pkgs and not (
+            fill_package_dir.get("") == path
+            or os.path.samefile(package_path, root_dir)
+        ):
+            fill_package_dir.update(construct_package_dir(pkgs, path))
+
+    return packages
+
+
+def _nest_path(parent: _Path, path: _Path) -> str:
+    path = parent if path in {".", ""} else os.path.join(parent, path)
+    return os.path.normpath(path)
+
+
+def version(value: Union[Callable, Iterable[Union[str, int]], str]) -> str:
+    """When getting the version directly from an attribute,
+    it should be normalised to string.
+    """
+    if callable(value):
+        value = value()
+
+    value = cast(Iterable[Union[str, int]], value)
+
+    if not isinstance(value, str):
+        if hasattr(value, '__iter__'):
+            value = '.'.join(map(str, value))
+        else:
+            value = '%s' % value
+
+    return value
+
+
+def canonic_package_data(package_data: dict) -> dict:
+    if "*" in package_data:
+        package_data[""] = package_data.pop("*")
+    return package_data
+
+
+def canonic_data_files(
+    data_files: Union[list, dict], root_dir: Optional[_Path] = None
+) -> List[Tuple[str, List[str]]]:
+    """For compatibility with ``setup.py``, ``data_files`` should be a list
+    of pairs instead of a dict.
+
+    This function also expands glob patterns.
+    """
+    if isinstance(data_files, list):
+        return data_files
+
+    return [
+        (dest, glob_relative(patterns, root_dir))
+        for dest, patterns in data_files.items()
+    ]
+
+
+def entry_points(text: str, text_source="entry-points") -> Dict[str, dict]:
+    """Given the contents of entry-points file,
+    process it into a 2-level dictionary (``dict[str, dict[str, str]]``).
+    The first level keys are entry-point groups, the second level keys are
+    entry-point names, and the second level values are references to objects
+    (that correspond to the entry-point value).
+    """
+    parser = ConfigParser(default_section=None, delimiters=("=",))  # type: ignore
+    parser.optionxform = str  # case sensitive
+    parser.read_string(text, text_source)
+    groups = {k: dict(v.items()) for k, v in parser.items()}
+    groups.pop(parser.default_section, None)
+    return groups
+
+
+class EnsurePackagesDiscovered:
+    """Some expand functions require all the packages to already be discovered before
+    they run, e.g. :func:`read_attr`, :func:`resolve_class`, :func:`cmdclass`.
+
+    Therefore in some cases we will need to run autodiscovery during the evaluation of
+    the configuration. However, it is better to postpone calling package discovery as
+    much as possible, because some parameters can influence it (e.g. ``package_dir``),
+    and those might not have been processed yet.
+    """
+
+    def __init__(self, distribution: "Distribution"):
+        self._dist = distribution
+        self._called = False
+
+    def __call__(self):
+        """Trigger the automatic package discovery, if it is still necessary."""
+        if not self._called:
+            self._called = True
+            self._dist.set_defaults(name=False)  # Skip name, we can still be parsing
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, _exc_type, _exc_value, _traceback):
+        if self._called:
+            self._dist.set_defaults.analyse_name()  # Now we can set a default name
+
+    def _get_package_dir(self) -> Mapping[str, str]:
+        self()
+        pkg_dir = self._dist.package_dir
+        return {} if pkg_dir is None else pkg_dir
+
+    @property
+    def package_dir(self) -> Mapping[str, str]:
+        """Proxy to ``package_dir`` that may trigger auto-discovery when used."""
+        return LazyMappingProxy(self._get_package_dir)
+
+
+class LazyMappingProxy(Mapping[_K, _V]):
+    """Mapping proxy that delays resolving the target object, until really needed.
+
+    >>> def obtain_mapping():
+    ...     print("Running expensive function!")
+    ...     return {"key": "value", "other key": "other value"}
+    >>> mapping = LazyMappingProxy(obtain_mapping)
+    >>> mapping["key"]
+    Running expensive function!
+    'value'
+    >>> mapping["other key"]
+    'other value'
+    """
+
+    def __init__(self, obtain_mapping_value: Callable[[], Mapping[_K, _V]]):
+        self._obtain = obtain_mapping_value
+        self._value: Optional[Mapping[_K, _V]] = None
+
+    def _target(self) -> Mapping[_K, _V]:
+        if self._value is None:
+            self._value = self._obtain()
+        return self._value
+
+    def __getitem__(self, key: _K) -> _V:
+        return self._target()[key]
+
+    def __len__(self) -> int:
+        return len(self._target())
+
+    def __iter__(self) -> Iterator[_K]:
+        return iter(self._target())
diff --git a/libs/common/setuptools/config/pyprojecttoml.py b/libs/common/setuptools/config/pyprojecttoml.py
new file mode 100644
index 00000000..d995f0bc
--- /dev/null
+++ b/libs/common/setuptools/config/pyprojecttoml.py
@@ -0,0 +1,493 @@
+"""
+Load setuptools configuration from ``pyproject.toml`` files.
+
+**PRIVATE MODULE**: API reserved for setuptools internal usage only.
+"""
+import logging
+import os
+import warnings
+from contextlib import contextmanager
+from functools import partial
+from typing import TYPE_CHECKING, Callable, Dict, Optional, Mapping, Union
+
+from setuptools.errors import FileError, OptionError
+
+from . import expand as _expand
+from ._apply_pyprojecttoml import apply as _apply
+from ._apply_pyprojecttoml import _PREVIOUSLY_DEFINED, _WouldIgnoreField
+
+if TYPE_CHECKING:
+    from setuptools.dist import Distribution  # noqa
+
+_Path = Union[str, os.PathLike]
+_logger = logging.getLogger(__name__)
+
+
+def load_file(filepath: _Path) -> dict:
+    from setuptools.extern import tomli  # type: ignore
+
+    with open(filepath, "rb") as file:
+        return tomli.load(file)
+
+
+def validate(config: dict, filepath: _Path) -> bool:
+    from . import _validate_pyproject as validator
+
+    trove_classifier = validator.FORMAT_FUNCTIONS.get("trove-classifier")
+    if hasattr(trove_classifier, "_disable_download"):
+        # Improve reproducibility by default. See issue 31 for validate-pyproject.
+        trove_classifier._disable_download()  # type: ignore
+
+    try:
+        return validator.validate(config)
+    except validator.ValidationError as ex:
+        summary = f"configuration error: {ex.summary}"
+        if ex.name.strip("`") != "project":
+            # Probably it is just a field missing/misnamed, not worthy the verbosity...
+            _logger.debug(summary)
+            _logger.debug(ex.details)
+
+        error = f"invalid pyproject.toml config: {ex.name}."
+        raise ValueError(f"{error}\n{summary}") from None
+
+
+def apply_configuration(
+    dist: "Distribution",
+    filepath: _Path,
+    ignore_option_errors=False,
+) -> "Distribution":
+    """Apply the configuration from a ``pyproject.toml`` file into an existing
+    distribution object.
+    """
+    config = read_configuration(filepath, True, ignore_option_errors, dist)
+    return _apply(dist, config, filepath)
+
+
+def read_configuration(
+    filepath: _Path,
+    expand=True,
+    ignore_option_errors=False,
+    dist: Optional["Distribution"] = None,
+):
+    """Read given configuration file and returns options from it as a dict.
+
+    :param str|unicode filepath: Path to configuration file in the ``pyproject.toml``
+        format.
+
+    :param bool expand: Whether to expand directives and other computed values
+        (i.e. post-process the given configuration)
+
+    :param bool ignore_option_errors: Whether to silently ignore
+        options, values of which could not be resolved (e.g. due to exceptions
+        in directives such as file:, attr:, etc.).
+        If False exceptions are propagated as expected.
+
+    :param Distribution|None: Distribution object to which the configuration refers.
+        If not given a dummy object will be created and discarded after the
+        configuration is read. This is used for auto-discovery of packages in the case
+        a dynamic configuration (e.g. ``attr`` or ``cmdclass``) is expanded.
+        When ``expand=False`` this object is simply ignored.
+
+    :rtype: dict
+    """
+    filepath = os.path.abspath(filepath)
+
+    if not os.path.isfile(filepath):
+        raise FileError(f"Configuration file {filepath!r} does not exist.")
+
+    asdict = load_file(filepath) or {}
+    project_table = asdict.get("project", {})
+    tool_table = asdict.get("tool", {})
+    setuptools_table = tool_table.get("setuptools", {})
+    if not asdict or not (project_table or setuptools_table):
+        return {}  # User is not using pyproject to configure setuptools
+
+    if setuptools_table:
+        # TODO: Remove the following once the feature stabilizes:
+        msg = "Support for `[tool.setuptools]` in `pyproject.toml` is still *beta*."
+        warnings.warn(msg, _BetaConfiguration)
+
+    # There is an overall sense in the community that making include_package_data=True
+    # the default would be an improvement.
+    # `ini2toml` backfills include_package_data=False when nothing is explicitly given,
+    # therefore setting a default here is backwards compatible.
+    orig_setuptools_table = setuptools_table.copy()
+    if dist and getattr(dist, "include_package_data") is not None:
+        setuptools_table.setdefault("include-package-data", dist.include_package_data)
+    else:
+        setuptools_table.setdefault("include-package-data", True)
+    # Persist changes:
+    asdict["tool"] = tool_table
+    tool_table["setuptools"] = setuptools_table
+
+    try:
+        # Don't complain about unrelated errors (e.g. tools not using the "tool" table)
+        subset = {"project": project_table, "tool": {"setuptools": setuptools_table}}
+        validate(subset, filepath)
+    except Exception as ex:
+        # TODO: Remove the following once the feature stabilizes:
+        if _skip_bad_config(project_table, orig_setuptools_table, dist):
+            return {}
+        # TODO: After the previous statement is removed the try/except can be replaced
+        # by the _ignore_errors context manager.
+        if ignore_option_errors:
+            _logger.debug(f"ignored error: {ex.__class__.__name__} - {ex}")
+        else:
+            raise  # re-raise exception
+
+    if expand:
+        root_dir = os.path.dirname(filepath)
+        return expand_configuration(asdict, root_dir, ignore_option_errors, dist)
+
+    return asdict
+
+
+def _skip_bad_config(
+    project_cfg: dict, setuptools_cfg: dict, dist: Optional["Distribution"]
+) -> bool:
+    """Be temporarily forgiving with invalid ``pyproject.toml``"""
+    # See pypa/setuptools#3199 and pypa/cibuildwheel#1064
+
+    if dist is None or (
+        dist.metadata.name is None
+        and dist.metadata.version is None
+        and dist.install_requires is None
+    ):
+        # It seems that the build is not getting any configuration from other places
+        return False
+
+    if setuptools_cfg:
+        # If `[tool.setuptools]` is set, then `pyproject.toml` config is intentional
+        return False
+
+    given_config = set(project_cfg.keys())
+    popular_subset = {"name", "version", "python_requires", "requires-python"}
+    if given_config <= popular_subset:
+        # It seems that the docs in cibuildtool has been inadvertently encouraging users
+        # to create `pyproject.toml` files that are not compliant with the standards.
+        # Let's be forgiving for the time being.
+        warnings.warn(_InvalidFile.message(), _InvalidFile, stacklevel=2)
+        return True
+
+    return False
+
+
+def expand_configuration(
+    config: dict,
+    root_dir: Optional[_Path] = None,
+    ignore_option_errors: bool = False,
+    dist: Optional["Distribution"] = None,
+) -> dict:
+    """Given a configuration with unresolved fields (e.g. dynamic, cmdclass, ...)
+    find their final values.
+
+    :param dict config: Dict containing the configuration for the distribution
+    :param str root_dir: Top-level directory for the distribution/project
+        (the same directory where ``pyproject.toml`` is place)
+    :param bool ignore_option_errors: see :func:`read_configuration`
+    :param Distribution|None: Distribution object to which the configuration refers.
+        If not given a dummy object will be created and discarded after the
+        configuration is read. Used in the case a dynamic configuration
+        (e.g. ``attr`` or ``cmdclass``).
+
+    :rtype: dict
+    """
+    return _ConfigExpander(config, root_dir, ignore_option_errors, dist).expand()
+
+
+class _ConfigExpander:
+    def __init__(
+        self,
+        config: dict,
+        root_dir: Optional[_Path] = None,
+        ignore_option_errors: bool = False,
+        dist: Optional["Distribution"] = None,
+    ):
+        self.config = config
+        self.root_dir = root_dir or os.getcwd()
+        self.project_cfg = config.get("project", {})
+        self.dynamic = self.project_cfg.get("dynamic", [])
+        self.setuptools_cfg = config.get("tool", {}).get("setuptools", {})
+        self.dynamic_cfg = self.setuptools_cfg.get("dynamic", {})
+        self.ignore_option_errors = ignore_option_errors
+        self._dist = dist
+
+    def _ensure_dist(self) -> "Distribution":
+        from setuptools.dist import Distribution
+
+        attrs = {"src_root": self.root_dir, "name": self.project_cfg.get("name", None)}
+        return self._dist or Distribution(attrs)
+
+    def _process_field(self, container: dict, field: str, fn: Callable):
+        if field in container:
+            with _ignore_errors(self.ignore_option_errors):
+                container[field] = fn(container[field])
+
+    def _canonic_package_data(self, field="package-data"):
+        package_data = self.setuptools_cfg.get(field, {})
+        return _expand.canonic_package_data(package_data)
+
+    def expand(self):
+        self._expand_packages()
+        self._canonic_package_data()
+        self._canonic_package_data("exclude-package-data")
+
+        # A distribution object is required for discovering the correct package_dir
+        dist = self._ensure_dist()
+        ctx = _EnsurePackagesDiscovered(dist, self.project_cfg, self.setuptools_cfg)
+        with ctx as ensure_discovered:
+            package_dir = ensure_discovered.package_dir
+            self._expand_data_files()
+            self._expand_cmdclass(package_dir)
+            self._expand_all_dynamic(dist, package_dir)
+
+        return self.config
+
+    def _expand_packages(self):
+        packages = self.setuptools_cfg.get("packages")
+        if packages is None or isinstance(packages, (list, tuple)):
+            return
+
+        find = packages.get("find")
+        if isinstance(find, dict):
+            find["root_dir"] = self.root_dir
+            find["fill_package_dir"] = self.setuptools_cfg.setdefault("package-dir", {})
+            with _ignore_errors(self.ignore_option_errors):
+                self.setuptools_cfg["packages"] = _expand.find_packages(**find)
+
+    def _expand_data_files(self):
+        data_files = partial(_expand.canonic_data_files, root_dir=self.root_dir)
+        self._process_field(self.setuptools_cfg, "data-files", data_files)
+
+    def _expand_cmdclass(self, package_dir: Mapping[str, str]):
+        root_dir = self.root_dir
+        cmdclass = partial(_expand.cmdclass, package_dir=package_dir, root_dir=root_dir)
+        self._process_field(self.setuptools_cfg, "cmdclass", cmdclass)
+
+    def _expand_all_dynamic(self, dist: "Distribution", package_dir: Mapping[str, str]):
+        special = (  # need special handling
+            "version",
+            "readme",
+            "entry-points",
+            "scripts",
+            "gui-scripts",
+            "classifiers",
+            "dependencies",
+            "optional-dependencies",
+        )
+        # `_obtain` functions are assumed to raise appropriate exceptions/warnings.
+        obtained_dynamic = {
+            field: self._obtain(dist, field, package_dir)
+            for field in self.dynamic
+            if field not in special
+        }
+        obtained_dynamic.update(
+            self._obtain_entry_points(dist, package_dir) or {},
+            version=self._obtain_version(dist, package_dir),
+            readme=self._obtain_readme(dist),
+            classifiers=self._obtain_classifiers(dist),
+            dependencies=self._obtain_dependencies(dist),
+            optional_dependencies=self._obtain_optional_dependencies(dist),
+        )
+        # `None` indicates there is nothing in `tool.setuptools.dynamic` but the value
+        # might have already been set by setup.py/extensions, so avoid overwriting.
+        updates = {k: v for k, v in obtained_dynamic.items() if v is not None}
+        self.project_cfg.update(updates)
+
+    def _ensure_previously_set(self, dist: "Distribution", field: str):
+        previous = _PREVIOUSLY_DEFINED[field](dist)
+        if previous is None and not self.ignore_option_errors:
+            msg = (
+                f"No configuration found for dynamic {field!r}.\n"
+                "Some dynamic fields need to be specified via `tool.setuptools.dynamic`"
+                "\nothers must be specified via the equivalent attribute in `setup.py`."
+            )
+            raise OptionError(msg)
+
+    def _expand_directive(
+        self, specifier: str, directive, package_dir: Mapping[str, str]
+    ):
+        with _ignore_errors(self.ignore_option_errors):
+            root_dir = self.root_dir
+            if "file" in directive:
+                return _expand.read_files(directive["file"], root_dir)
+            if "attr" in directive:
+                return _expand.read_attr(directive["attr"], package_dir, root_dir)
+            raise ValueError(f"invalid `{specifier}`: {directive!r}")
+        return None
+
+    def _obtain(self, dist: "Distribution", field: str, package_dir: Mapping[str, str]):
+        if field in self.dynamic_cfg:
+            return self._expand_directive(
+                f"tool.setuptools.dynamic.{field}",
+                self.dynamic_cfg[field],
+                package_dir,
+            )
+        self._ensure_previously_set(dist, field)
+        return None
+
+    def _obtain_version(self, dist: "Distribution", package_dir: Mapping[str, str]):
+        # Since plugins can set version, let's silently skip if it cannot be obtained
+        if "version" in self.dynamic and "version" in self.dynamic_cfg:
+            return _expand.version(self._obtain(dist, "version", package_dir))
+        return None
+
+    def _obtain_readme(self, dist: "Distribution") -> Optional[Dict[str, str]]:
+        if "readme" not in self.dynamic:
+            return None
+
+        dynamic_cfg = self.dynamic_cfg
+        if "readme" in dynamic_cfg:
+            return {
+                "text": self._obtain(dist, "readme", {}),
+                "content-type": dynamic_cfg["readme"].get("content-type", "text/x-rst"),
+            }
+
+        self._ensure_previously_set(dist, "readme")
+        return None
+
+    def _obtain_entry_points(
+        self, dist: "Distribution", package_dir: Mapping[str, str]
+    ) -> Optional[Dict[str, dict]]:
+        fields = ("entry-points", "scripts", "gui-scripts")
+        if not any(field in self.dynamic for field in fields):
+            return None
+
+        text = self._obtain(dist, "entry-points", package_dir)
+        if text is None:
+            return None
+
+        groups = _expand.entry_points(text)
+        expanded = {"entry-points": groups}
+
+        def _set_scripts(field: str, group: str):
+            if group in groups:
+                value = groups.pop(group)
+                if field not in self.dynamic:
+                    msg = _WouldIgnoreField.message(field, value)
+                    warnings.warn(msg, _WouldIgnoreField)
+                # TODO: Don't set field when support for pyproject.toml stabilizes
+                #       instead raise an error as specified in PEP 621
+                expanded[field] = value
+
+        _set_scripts("scripts", "console_scripts")
+        _set_scripts("gui-scripts", "gui_scripts")
+
+        return expanded
+
+    def _obtain_classifiers(self, dist: "Distribution"):
+        if "classifiers" in self.dynamic:
+            value = self._obtain(dist, "classifiers", {})
+            if value:
+                return value.splitlines()
+        return None
+
+    def _obtain_dependencies(self, dist: "Distribution"):
+        if "dependencies" in self.dynamic:
+            value = self._obtain(dist, "dependencies", {})
+            if value:
+                return _parse_requirements_list(value)
+        return None
+
+    def _obtain_optional_dependencies(self, dist: "Distribution"):
+        if "optional-dependencies" not in self.dynamic:
+            return None
+        if "optional-dependencies" in self.dynamic_cfg:
+            optional_dependencies_map = self.dynamic_cfg["optional-dependencies"]
+            assert isinstance(optional_dependencies_map, dict)
+            return {
+                group: _parse_requirements_list(self._expand_directive(
+                    f"tool.setuptools.dynamic.optional-dependencies.{group}",
+                    directive,
+                    {},
+                ))
+                for group, directive in optional_dependencies_map.items()
+            }
+        self._ensure_previously_set(dist, "optional-dependencies")
+        return None
+
+
+def _parse_requirements_list(value):
+    return [
+        line
+        for line in value.splitlines()
+        if line.strip() and not line.strip().startswith("#")
+    ]
+
+
+@contextmanager
+def _ignore_errors(ignore_option_errors: bool):
+    if not ignore_option_errors:
+        yield
+        return
+
+    try:
+        yield
+    except Exception as ex:
+        _logger.debug(f"ignored error: {ex.__class__.__name__} - {ex}")
+
+
+class _EnsurePackagesDiscovered(_expand.EnsurePackagesDiscovered):
+    def __init__(
+        self, distribution: "Distribution", project_cfg: dict, setuptools_cfg: dict
+    ):
+        super().__init__(distribution)
+        self._project_cfg = project_cfg
+        self._setuptools_cfg = setuptools_cfg
+
+    def __enter__(self):
+        """When entering the context, the values of ``packages``, ``py_modules`` and
+        ``package_dir`` that are missing in ``dist`` are copied from ``setuptools_cfg``.
+        """
+        dist, cfg = self._dist, self._setuptools_cfg
+        package_dir: Dict[str, str] = cfg.setdefault("package-dir", {})
+        package_dir.update(dist.package_dir or {})
+        dist.package_dir = package_dir  # needs to be the same object
+
+        dist.set_defaults._ignore_ext_modules()  # pyproject.toml-specific behaviour
+
+        # Set `name`, `py_modules` and `packages` in dist to short-circuit
+        # auto-discovery, but avoid overwriting empty lists purposefully set by users.
+        if dist.metadata.name is None:
+            dist.metadata.name = self._project_cfg.get("name")
+        if dist.py_modules is None:
+            dist.py_modules = cfg.get("py-modules")
+        if dist.packages is None:
+            dist.packages = cfg.get("packages")
+
+        return super().__enter__()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """When exiting the context, if values of ``packages``, ``py_modules`` and
+        ``package_dir`` are missing in ``setuptools_cfg``, copy from ``dist``.
+        """
+        # If anything was discovered set them back, so they count in the final config.
+        self._setuptools_cfg.setdefault("packages", self._dist.packages)
+        self._setuptools_cfg.setdefault("py-modules", self._dist.py_modules)
+        return super().__exit__(exc_type, exc_value, traceback)
+
+
+class _BetaConfiguration(UserWarning):
+    """Explicitly inform users that some `pyproject.toml` configuration is *beta*"""
+
+
+class _InvalidFile(UserWarning):
+    """The given `pyproject.toml` file is invalid and would be ignored.
+    !!\n\n
+    ############################
+    # Invalid `pyproject.toml` #
+    ############################
+
+    Any configurations in `pyproject.toml` will be ignored.
+    Please note that future releases of setuptools will halt the build process
+    if an invalid file is given.
+
+    To prevent setuptools from considering `pyproject.toml` please
+    DO NOT include the `[project]` or `[tool.setuptools]` tables in your file.
+    \n\n!!
+    """
+
+    @classmethod
+    def message(cls):
+        from inspect import cleandoc
+        return cleandoc(cls.__doc__)
diff --git a/libs/common/setuptools/config.py b/libs/common/setuptools/config/setupcfg.py
similarity index 56%
rename from libs/common/setuptools/config.py
rename to libs/common/setuptools/config/setupcfg.py
index 9b9a0c45..c2a974de 100644
--- a/libs/common/setuptools/config.py
+++ b/libs/common/setuptools/config/setupcfg.py
@@ -1,26 +1,47 @@
-from __future__ import absolute_import, unicode_literals
-import io
-import os
-import sys
+"""
+Load setuptools configuration from ``setup.cfg`` files.
 
-import warnings
+**API will be made private in the future**
+"""
+import os
+
+import contextlib
 import functools
+import warnings
 from collections import defaultdict
 from functools import partial
 from functools import wraps
-from importlib import import_module
+from typing import (TYPE_CHECKING, Callable, Any, Dict, Generic, Iterable, List,
+                    Optional, Tuple, TypeVar, Union)
 
 from distutils.errors import DistutilsOptionError, DistutilsFileError
-from setuptools.extern.packaging.version import LegacyVersion, parse
+from setuptools.extern.packaging.requirements import Requirement, InvalidRequirement
+from setuptools.extern.packaging.version import Version, InvalidVersion
 from setuptools.extern.packaging.specifiers import SpecifierSet
-from setuptools.extern.six import string_types, PY3
+from setuptools._deprecation_warning import SetuptoolsDeprecationWarning
 
+from . import expand
 
-__metaclass__ = type
+if TYPE_CHECKING:
+    from setuptools.dist import Distribution  # noqa
+    from distutils.dist import DistributionMetadata  # noqa
+
+_Path = Union[str, os.PathLike]
+SingleCommandOptions = Dict["str", Tuple["str", Any]]
+"""Dict that associate the name of the options of a particular command to a
+tuple. The first element of the tuple indicates the origin of the option value
+(e.g. the name of the configuration file where it was read from),
+while the second element of the tuple is the option value itself
+"""
+AllCommandOptions = Dict["str", SingleCommandOptions]  # cmd name => its options
+Target = TypeVar("Target", bound=Union["Distribution", "DistributionMetadata"])
 
 
 def read_configuration(
-        filepath, find_others=False, ignore_option_errors=False):
+    filepath: _Path,
+    find_others=False,
+    ignore_option_errors=False
+) -> dict:
     """Read given configuration file and returns options from it as a dict.
 
     :param str|unicode filepath: Path to configuration file
@@ -36,37 +57,53 @@ def read_configuration(
 
     :rtype: dict
     """
-    from setuptools.dist import Distribution, _Distribution
+    from setuptools.dist import Distribution
+
+    dist = Distribution()
+    filenames = dist.find_config_files() if find_others else []
+    handlers = _apply(dist, filepath, filenames, ignore_option_errors)
+    return configuration_to_dict(handlers)
+
+
+def apply_configuration(dist: "Distribution", filepath: _Path) -> "Distribution":
+    """Apply the configuration from a ``setup.cfg`` file into an existing
+    distribution object.
+    """
+    _apply(dist, filepath)
+    dist._finalize_requires()
+    return dist
+
+
+def _apply(
+    dist: "Distribution", filepath: _Path,
+    other_files: Iterable[_Path] = (),
+    ignore_option_errors: bool = False,
+) -> Tuple["ConfigHandler", ...]:
+    """Read configuration from ``filepath`` and applies to the ``dist`` object."""
+    from setuptools.dist import _Distribution
 
     filepath = os.path.abspath(filepath)
 
     if not os.path.isfile(filepath):
-        raise DistutilsFileError(
-            'Configuration file %s does not exist.' % filepath)
+        raise DistutilsFileError('Configuration file %s does not exist.' % filepath)
 
     current_directory = os.getcwd()
     os.chdir(os.path.dirname(filepath))
+    filenames = [*other_files, filepath]
 
     try:
-        dist = Distribution()
-
-        filenames = dist.find_config_files() if find_others else []
-        if filepath not in filenames:
-            filenames.append(filepath)
-
         _Distribution.parse_config_files(dist, filenames=filenames)
-
         handlers = parse_configuration(
-            dist, dist.command_options,
-            ignore_option_errors=ignore_option_errors)
-
+            dist, dist.command_options, ignore_option_errors=ignore_option_errors
+        )
+        dist._finalize_license_files()
     finally:
         os.chdir(current_directory)
 
-    return configuration_to_dict(handlers)
+    return handlers
 
 
-def _get_option(target_obj, key):
+def _get_option(target_obj: Target, key: str):
     """
     Given a target object and option key, get that option from
     the target object, either through a get_{key} method or
@@ -78,7 +115,7 @@ def _get_option(target_obj, key):
     return getter()
 
 
-def configuration_to_dict(handlers):
+def configuration_to_dict(handlers: Tuple["ConfigHandler", ...]) -> dict:
     """Returns configuration data gathered by given handlers as a dict.
 
     :param list[ConfigHandler] handlers: Handlers list,
@@ -86,7 +123,7 @@ def configuration_to_dict(handlers):
 
     :rtype: dict
     """
-    config_dict = defaultdict(dict)
+    config_dict: dict = defaultdict(dict)
 
     for handler in handlers:
         for option in handler.set_options:
@@ -97,7 +134,10 @@ def configuration_to_dict(handlers):
 
 
 def parse_configuration(
-        distribution, command_options, ignore_option_errors=False):
+    distribution: "Distribution",
+    command_options: AllCommandOptions,
+    ignore_option_errors=False
+) -> Tuple["ConfigMetadataHandler", "ConfigOptionsHandler"]:
     """Performs additional parsing of configuration options
     for a distribution.
 
@@ -111,36 +151,88 @@ def parse_configuration(
         If False exceptions are propagated as expected.
     :rtype: list
     """
-    options = ConfigOptionsHandler(
-        distribution, command_options, ignore_option_errors)
-    options.parse()
+    with expand.EnsurePackagesDiscovered(distribution) as ensure_discovered:
+        options = ConfigOptionsHandler(
+            distribution,
+            command_options,
+            ignore_option_errors,
+            ensure_discovered,
+        )
 
-    meta = ConfigMetadataHandler(
-        distribution.metadata, command_options, ignore_option_errors,
-        distribution.package_dir)
-    meta.parse()
+        options.parse()
+        if not distribution.package_dir:
+            distribution.package_dir = options.package_dir  # Filled by `find_packages`
+
+        meta = ConfigMetadataHandler(
+            distribution.metadata,
+            command_options,
+            ignore_option_errors,
+            ensure_discovered,
+            distribution.package_dir,
+            distribution.src_root,
+        )
+        meta.parse()
 
     return meta, options
 
 
-class ConfigHandler:
+def _warn_accidental_env_marker_misconfig(label: str, orig_value: str, parsed: list):
+    """Because users sometimes misinterpret this configuration:
+
+    [options.extras_require]
+    foo = bar;python_version<"4"
+
+    It looks like one requirement with an environment marker
+    but because there is no newline, it's parsed as two requirements
+    with a semicolon as separator.
+
+    Therefore, if:
+        * input string does not contain a newline AND
+        * parsed result contains two requirements AND
+        * parsing of the two parts from the result (";")
+        leads in a valid Requirement with a valid marker
+    a UserWarning is shown to inform the user about the possible problem.
+    """
+    if "\n" in orig_value or len(parsed) != 2:
+        return
+
+    with contextlib.suppress(InvalidRequirement):
+        original_requirements_str = ";".join(parsed)
+        req = Requirement(original_requirements_str)
+        if req.marker is not None:
+            msg = (
+                f"One of the parsed requirements in `{label}` "
+                f"looks like a valid environment marker: '{parsed[1]}'\n"
+                "Make sure that the config is correct and check "
+                "https://setuptools.pypa.io/en/latest/userguide/declarative_config.html#opt-2"  # noqa: E501
+            )
+            warnings.warn(msg, UserWarning)
+
+
+class ConfigHandler(Generic[Target]):
     """Handles metadata supplied in configuration files."""
 
-    section_prefix = None
+    section_prefix: str
     """Prefix for config sections handled by this handler.
     Must be provided by class heirs.
 
     """
 
-    aliases = {}
+    aliases: Dict[str, str] = {}
     """Options aliases.
     For compatibility with various packages. E.g.: d2to1 and pbr.
     Note: `-` in keys is replaced with `_` by config parser.
 
     """
 
-    def __init__(self, target_obj, options, ignore_option_errors=False):
-        sections = {}
+    def __init__(
+        self,
+        target_obj: Target,
+        options: AllCommandOptions,
+        ignore_option_errors,
+        ensure_discovered: expand.EnsurePackagesDiscovered,
+    ):
+        sections: AllCommandOptions = {}
 
         section_prefix = self.section_prefix
         for section_name, section_options in options.items():
@@ -153,13 +245,15 @@ class ConfigHandler:
         self.ignore_option_errors = ignore_option_errors
         self.target_obj = target_obj
         self.sections = sections
-        self.set_options = []
+        self.set_options: List[str] = []
+        self.ensure_discovered = ensure_discovered
 
     @property
     def parsers(self):
         """Metadata item name to parser function mapping."""
         raise NotImplementedError(
-            '%s must provide .parsers property' % self.__class__.__name__)
+            '%s must provide .parsers property' % self.__class__.__name__
+        )
 
     def __setitem__(self, option_name, value):
         unknown = tuple()
@@ -232,7 +326,8 @@ class ConfigHandler:
             key, sep, val = line.partition(separator)
             if sep != separator:
                 raise DistutilsOptionError(
-                    'Unable to parse option value to dict: %s' % value)
+                    'Unable to parse option value to dict: %s' % value
+                )
             result[key.strip()] = val.strip()
 
         return result
@@ -258,17 +353,20 @@ class ConfigHandler:
         :param key:
         :rtype: callable
         """
+
         def parser(value):
             exclude_directive = 'file:'
             if value.startswith(exclude_directive):
                 raise ValueError(
                     'Only strings are accepted for the {0} field, '
-                    'files are not accepted'.format(key))
+                    'files are not accepted'.format(key)
+                )
             return value
+
         return parser
 
     @classmethod
-    def _parse_file(cls, value):
+    def _parse_file(cls, value, root_dir: _Path):
         """Represents value as a string, allowing including text
         from nearest files using `file:` directive.
 
@@ -283,34 +381,17 @@ class ConfigHandler:
         """
         include_directive = 'file:'
 
-        if not isinstance(value, string_types):
+        if not isinstance(value, str):
             return value
 
         if not value.startswith(include_directive):
             return value
 
-        spec = value[len(include_directive):]
-        filepaths = (os.path.abspath(path.strip()) for path in spec.split(','))
-        return '\n'.join(
-            cls._read_file(path)
-            for path in filepaths
-            if (cls._assert_local(path) or True)
-            and os.path.isfile(path)
-        )
+        spec = value[len(include_directive) :]
+        filepaths = (path.strip() for path in spec.split(','))
+        return expand.read_files(filepaths, root_dir)
 
-    @staticmethod
-    def _assert_local(filepath):
-        if not filepath.startswith(os.getcwd()):
-            raise DistutilsOptionError(
-                '`file:` directive can not access %s' % filepath)
-
-    @staticmethod
-    def _read_file(filepath):
-        with io.open(filepath, encoding='utf-8') as f:
-            return f.read()
-
-    @classmethod
-    def _parse_attr(cls, value, package_dir=None):
+    def _parse_attr(self, value, package_dir, root_dir: _Path):
         """Represents value as a module attribute.
 
         Examples:
@@ -324,35 +405,11 @@ class ConfigHandler:
         if not value.startswith(attr_directive):
             return value
 
-        attrs_path = value.replace(attr_directive, '').strip().split('.')
-        attr_name = attrs_path.pop()
+        attr_desc = value.replace(attr_directive, '')
 
-        module_name = '.'.join(attrs_path)
-        module_name = module_name or '__init__'
-
-        parent_path = os.getcwd()
-        if package_dir:
-            if attrs_path[0] in package_dir:
-                # A custom path was specified for the module we want to import
-                custom_path = package_dir[attrs_path[0]]
-                parts = custom_path.rsplit('/', 1)
-                if len(parts) > 1:
-                    parent_path = os.path.join(os.getcwd(), parts[0])
-                    module_name = parts[1]
-                else:
-                    module_name = custom_path
-            elif '' in package_dir:
-                # A custom parent directory was specified for all root modules
-                parent_path = os.path.join(os.getcwd(), package_dir[''])
-        sys.path.insert(0, parent_path)
-        try:
-            module = import_module(module_name)
-            value = getattr(module, attr_name)
-
-        finally:
-            sys.path = sys.path[1:]
-
-        return value
+        # Make sure package_dir is populated correctly, so `attr:` directives can work
+        package_dir.update(self.ensure_discovered.package_dir)
+        return expand.read_attr(attr_desc, package_dir, root_dir)
 
     @classmethod
     def _get_parser_compound(cls, *parse_methods):
@@ -363,6 +420,7 @@ class ConfigHandler:
         :param parse_methods:
         :rtype: callable
         """
+
         def parse(value):
             parsed = value
 
@@ -374,33 +432,43 @@ class ConfigHandler:
         return parse
 
     @classmethod
-    def _parse_section_to_dict(cls, section_options, values_parser=None):
+    def _parse_section_to_dict_with_key(cls, section_options, values_parser):
         """Parses section options into a dictionary.
 
-        Optionally applies a given parser to values.
+        Applies a given parser to each option in a section.
 
         :param dict section_options:
-        :param callable values_parser:
+        :param callable values_parser: function with 2 args corresponding to key, value
         :rtype: dict
         """
         value = {}
-        values_parser = values_parser or (lambda val: val)
         for key, (_, val) in section_options.items():
-            value[key] = values_parser(val)
+            value[key] = values_parser(key, val)
         return value
 
+    @classmethod
+    def _parse_section_to_dict(cls, section_options, values_parser=None):
+        """Parses section options into a dictionary.
+
+        Optionally applies a given parser to each value.
+
+        :param dict section_options:
+        :param callable values_parser: function with 1 arg corresponding to option value
+        :rtype: dict
+        """
+        parser = (lambda _, v: values_parser(v)) if values_parser else (lambda _, v: v)
+        return cls._parse_section_to_dict_with_key(section_options, parser)
+
     def parse_section(self, section_options):
         """Parses configuration file section.
 
         :param dict section_options:
         """
         for (name, (_, value)) in section_options.items():
-            try:
+            with contextlib.suppress(KeyError):
+                # Keep silent for a new option may appear anytime.
                 self[name] = value
 
-            except KeyError:
-                pass  # Keep silent for a new option may appear anytime.
-
     def parse(self):
         """Parses configuration file items from one
         or more related sections.
@@ -412,26 +480,29 @@ class ConfigHandler:
             if section_name:  # [section.option] variant
                 method_postfix = '_%s' % section_name
 
-            section_parser_method = getattr(
+            section_parser_method: Optional[Callable] = getattr(
                 self,
                 # Dots in section names are translated into dunderscores.
                 ('parse_section%s' % method_postfix).replace('.', '__'),
-                None)
+                None,
+            )
 
             if section_parser_method is None:
                 raise DistutilsOptionError(
-                    'Unsupported distribution option section: [%s.%s]' % (
-                        self.section_prefix, section_name))
+                    'Unsupported distribution option section: [%s.%s]'
+                    % (self.section_prefix, section_name)
+                )
 
             section_parser_method(section_options)
 
     def _deprecated_config_handler(self, func, msg, warning_class):
-        """ this function will wrap around parameters that are deprecated
+        """this function will wrap around parameters that are deprecated
 
         :param msg: deprecation message
         :param warning_class: class of warning exception to be raised
         :param func: function to be wrapped around
         """
+
         @wraps(func)
         def config_handler(*args, **kwargs):
             warnings.warn(msg, warning_class)
@@ -440,7 +511,7 @@ class ConfigHandler:
         return config_handler
 
 
-class ConfigMetadataHandler(ConfigHandler):
+class ConfigMetadataHandler(ConfigHandler["DistributionMetadata"]):
 
     section_prefix = 'metadata'
 
@@ -457,17 +528,24 @@ class ConfigMetadataHandler(ConfigHandler):
 
     """
 
-    def __init__(self, target_obj, options, ignore_option_errors=False,
-                 package_dir=None):
-        super(ConfigMetadataHandler, self).__init__(target_obj, options,
-                                                    ignore_option_errors)
+    def __init__(
+        self,
+        target_obj: "DistributionMetadata",
+        options: AllCommandOptions,
+        ignore_option_errors: bool,
+        ensure_discovered: expand.EnsurePackagesDiscovered,
+        package_dir: Optional[dict] = None,
+        root_dir: _Path = os.curdir
+    ):
+        super().__init__(target_obj, options, ignore_option_errors, ensure_discovered)
         self.package_dir = package_dir
+        self.root_dir = root_dir
 
     @property
     def parsers(self):
         """Metadata item name to parser function mapping."""
         parse_list = self._parse_list
-        parse_file = self._parse_file
+        parse_file = partial(self._parse_file, root_dir=self.root_dir)
         parse_dict = self._parse_dict
         exclude_files_parser = self._exclude_files_parser
 
@@ -479,10 +557,17 @@ class ConfigMetadataHandler(ConfigHandler):
                 parse_list,
                 "The requires parameter is deprecated, please use "
                 "install_requires for runtime dependencies.",
-                DeprecationWarning),
+                SetuptoolsDeprecationWarning,
+            ),
             'obsoletes': parse_list,
             'classifiers': self._get_parser_compound(parse_file, parse_list),
             'license': exclude_files_parser('license'),
+            'license_file': self._deprecated_config_handler(
+                exclude_files_parser('license_file'),
+                "The license_file parameter is deprecated, "
+                "use license_files instead.",
+                SetuptoolsDeprecationWarning,
+            ),
             'license_files': parse_list,
             'description': parse_file,
             'long_description': parse_file,
@@ -497,13 +582,15 @@ class ConfigMetadataHandler(ConfigHandler):
         :rtype: str
 
         """
-        version = self._parse_file(value)
+        version = self._parse_file(value, self.root_dir)
 
         if version != value:
             version = version.strip()
             # Be strict about versions loaded from file because it's easy to
             # accidentally include newlines and other unintended content
-            if isinstance(parse(version), LegacyVersion):
+            try:
+                Version(version)
+            except InvalidVersion:
                 tmpl = (
                     'Version loaded from {value} does not '
                     'comply with PEP 440: {version}'
@@ -512,53 +599,76 @@ class ConfigMetadataHandler(ConfigHandler):
 
             return version
 
-        version = self._parse_attr(value, self.package_dir)
-
-        if callable(version):
-            version = version()
-
-        if not isinstance(version, string_types):
-            if hasattr(version, '__iter__'):
-                version = '.'.join(map(str, version))
-            else:
-                version = '%s' % version
-
-        return version
+        return expand.version(self._parse_attr(value, self.package_dir, self.root_dir))
 
 
-class ConfigOptionsHandler(ConfigHandler):
+class ConfigOptionsHandler(ConfigHandler["Distribution"]):
 
     section_prefix = 'options'
 
+    def __init__(
+        self,
+        target_obj: "Distribution",
+        options: AllCommandOptions,
+        ignore_option_errors: bool,
+        ensure_discovered: expand.EnsurePackagesDiscovered,
+    ):
+        super().__init__(target_obj, options, ignore_option_errors, ensure_discovered)
+        self.root_dir = target_obj.src_root
+        self.package_dir: Dict[str, str] = {}  # To be filled by `find_packages`
+
+    @classmethod
+    def _parse_list_semicolon(cls, value):
+        return cls._parse_list(value, separator=';')
+
+    def _parse_file_in_root(self, value):
+        return self._parse_file(value, root_dir=self.root_dir)
+
+    def _parse_requirements_list(self, label: str, value: str):
+        # Parse a requirements list, either by reading in a `file:`, or a list.
+        parsed = self._parse_list_semicolon(self._parse_file_in_root(value))
+        _warn_accidental_env_marker_misconfig(label, value, parsed)
+        # Filter it to only include lines that are not comments. `parse_list`
+        # will have stripped each line and filtered out empties.
+        return [line for line in parsed if not line.startswith("#")]
+
     @property
     def parsers(self):
         """Metadata item name to parser function mapping."""
         parse_list = self._parse_list
-        parse_list_semicolon = partial(self._parse_list, separator=';')
         parse_bool = self._parse_bool
         parse_dict = self._parse_dict
+        parse_cmdclass = self._parse_cmdclass
 
         return {
             'zip_safe': parse_bool,
-            'use_2to3': parse_bool,
             'include_package_data': parse_bool,
             'package_dir': parse_dict,
-            'use_2to3_fixers': parse_list,
-            'use_2to3_exclude_fixers': parse_list,
-            'convert_2to3_doctests': parse_list,
             'scripts': parse_list,
             'eager_resources': parse_list,
             'dependency_links': parse_list,
-            'namespace_packages': parse_list,
-            'install_requires': parse_list_semicolon,
-            'setup_requires': parse_list_semicolon,
-            'tests_require': parse_list_semicolon,
+            'namespace_packages': self._deprecated_config_handler(
+                parse_list,
+                "The namespace_packages parameter is deprecated, "
+                "consider using implicit namespaces instead (PEP 420).",
+                SetuptoolsDeprecationWarning,
+            ),
+            'install_requires': partial(
+                self._parse_requirements_list, "install_requires"
+            ),
+            'setup_requires': self._parse_list_semicolon,
+            'tests_require': self._parse_list_semicolon,
             'packages': self._parse_packages,
-            'entry_points': self._parse_file,
+            'entry_points': self._parse_file_in_root,
             'py_modules': parse_list,
             'python_requires': SpecifierSet,
+            'cmdclass': parse_cmdclass,
         }
 
+    def _parse_cmdclass(self, value):
+        package_dir = self.ensure_discovered.package_dir
+        return expand.cmdclass(self._parse_dict(value), package_dir, self.root_dir)
+
     def _parse_packages(self, value):
         """Parses `packages` option value.
 
@@ -571,21 +681,18 @@ class ConfigOptionsHandler(ConfigHandler):
         if trimmed_value not in find_directives:
             return self._parse_list(value)
 
-        findns = trimmed_value == find_directives[1]
-        if findns and not PY3:
-            raise DistutilsOptionError(
-                'find_namespace: directive is unsupported on Python < 3.3')
-
         # Read function arguments from a dedicated section.
         find_kwargs = self.parse_section_packages__find(
-            self.sections.get('packages.find', {}))
+            self.sections.get('packages.find', {})
+        )
 
-        if findns:
-            from setuptools import find_namespace_packages as find_packages
-        else:
-            from setuptools import find_packages
+        find_kwargs.update(
+            namespaces=(trimmed_value == find_directives[1]),
+            root_dir=self.root_dir,
+            fill_package_dir=self.package_dir,
+        )
 
-        return find_packages(**find_kwargs)
+        return expand.find_packages(**find_kwargs)
 
     def parse_section_packages__find(self, section_options):
         """Parses `packages.find` configuration file section.
@@ -594,13 +701,13 @@ class ConfigOptionsHandler(ConfigHandler):
 
         :param dict section_options:
         """
-        section_data = self._parse_section_to_dict(
-            section_options, self._parse_list)
+        section_data = self._parse_section_to_dict(section_options, self._parse_list)
 
         valid_keys = ['where', 'include', 'exclude']
 
         find_kwargs = dict(
-            [(k, v) for k, v in section_data.items() if k in valid_keys and v])
+            [(k, v) for k, v in section_data.items() if k in valid_keys and v]
+        )
 
         where = find_kwargs.get('where')
         if where is not None:
@@ -617,14 +724,8 @@ class ConfigOptionsHandler(ConfigHandler):
         self['entry_points'] = parsed
 
     def _parse_package_data(self, section_options):
-        parsed = self._parse_section_to_dict(section_options, self._parse_list)
-
-        root = parsed.get('*')
-        if root:
-            parsed[''] = root
-            del parsed['*']
-
-        return parsed
+        package_data = self._parse_section_to_dict(section_options, self._parse_list)
+        return expand.canonic_package_data(package_data)
 
     def parse_section_package_data(self, section_options):
         """Parses `package_data` configuration file section.
@@ -638,17 +739,19 @@ class ConfigOptionsHandler(ConfigHandler):
 
         :param dict section_options:
         """
-        self['exclude_package_data'] = self._parse_package_data(
-            section_options)
+        self['exclude_package_data'] = self._parse_package_data(section_options)
 
     def parse_section_extras_require(self, section_options):
         """Parses `extras_require` configuration file section.
 
         :param dict section_options:
         """
-        parse_list = partial(self._parse_list, separator=';')
-        self['extras_require'] = self._parse_section_to_dict(
-            section_options, parse_list)
+        parsed = self._parse_section_to_dict_with_key(
+            section_options,
+            lambda k, v: self._parse_requirements_list(f"extras_require[{k}]", v)
+        )
+
+        self['extras_require'] = parsed
 
     def parse_section_data_files(self, section_options):
         """Parses `data_files` configuration file section.
@@ -656,4 +759,4 @@ class ConfigOptionsHandler(ConfigHandler):
         :param dict section_options:
         """
         parsed = self._parse_section_to_dict(section_options, self._parse_list)
-        self['data_files'] = [(k, v) for k, v in parsed.items()]
+        self['data_files'] = expand.canonic_data_files(parsed, self.root_dir)
diff --git a/libs/common/setuptools/dep_util.py b/libs/common/setuptools/dep_util.py
index 2931c13e..521eb716 100644
--- a/libs/common/setuptools/dep_util.py
+++ b/libs/common/setuptools/dep_util.py
@@ -1,5 +1,6 @@
 from distutils.dep_util import newer_group
 
+
 # yes, this is was almost entirely copy-pasted from
 # 'newer_pairwise()', this is just another convenience
 # function.
@@ -10,7 +11,8 @@ def newer_pairwise_group(sources_groups, targets):
     of 'newer_group()'.
     """
     if len(sources_groups) != len(targets):
-        raise ValueError("'sources_group' and 'targets' must be the same length")
+        raise ValueError(
+            "'sources_group' and 'targets' must be the same length")
 
     # build a pair of lists (sources_groups, targets) where source is newer
     n_sources = []
diff --git a/libs/common/setuptools/depends.py b/libs/common/setuptools/depends.py
index a37675cb..adffd12d 100644
--- a/libs/common/setuptools/depends.py
+++ b/libs/common/setuptools/depends.py
@@ -1,12 +1,12 @@
 import sys
 import marshal
 import contextlib
-from distutils.version import StrictVersion
+import dis
 
-from .py33compat import Bytecode
+from setuptools.extern.packaging import version
 
-from .py27compat import find_module, PY_COMPILED, PY_FROZEN, PY_SOURCE
-from . import py27compat
+from ._imp import find_module, PY_COMPILED, PY_FROZEN, PY_SOURCE
+from . import _imp
 
 
 __all__ = [
@@ -22,7 +22,7 @@ class Require:
             attribute=None, format=None):
 
         if format is None and requested_version is not None:
-            format = StrictVersion
+            format = version.Version
 
         if format is not None:
             requested_version = format(requested_version)
@@ -41,7 +41,7 @@ class Require:
     def version_ok(self, version):
         """Is 'version' sufficiently up-to-date?"""
         return self.attribute is None or self.format is None or \
-            str(version) != "unknown" and version >= self.requested_version
+            str(version) != "unknown" and self.format(version) >= self.requested_version
 
     def get_version(self, paths=None, default="unknown"):
         """Get version number of installed module, 'None', or 'default'
@@ -79,7 +79,7 @@ class Require:
         version = self.get_version(paths)
         if version is None:
             return False
-        return self.version_ok(version)
+        return self.version_ok(str(version))
 
 
 def maybe_close(f):
@@ -111,12 +111,12 @@ def get_module_constant(module, symbol, default=-1, paths=None):
             f.read(8)  # skip magic & date
             code = marshal.load(f)
         elif kind == PY_FROZEN:
-            code = py27compat.get_frozen_object(module, paths)
+            code = _imp.get_frozen_object(module, paths)
         elif kind == PY_SOURCE:
             code = compile(f.read(), path, 'exec')
         else:
             # Not something we can parse; we'll have to import it.  :(
-            imported = py27compat.get_module(module, paths, info)
+            imported = _imp.get_module(module, paths, info)
             return getattr(imported, symbol, None)
 
     return extract_constant(code, symbol, default)
@@ -146,7 +146,7 @@ def extract_constant(code, symbol, default=-1):
 
     const = default
 
-    for byte_code in Bytecode(code):
+    for byte_code in dis.Bytecode(code):
         op = byte_code.opcode
         arg = byte_code.arg
 
diff --git a/libs/common/setuptools/discovery.py b/libs/common/setuptools/discovery.py
new file mode 100644
index 00000000..98fc2a7f
--- /dev/null
+++ b/libs/common/setuptools/discovery.py
@@ -0,0 +1,600 @@
+"""Automatic discovery of Python modules and packages (for inclusion in the
+distribution) and other config values.
+
+For the purposes of this module, the following nomenclature is used:
+
+- "src-layout": a directory representing a Python project that contains a "src"
+  folder. Everything under the "src" folder is meant to be included in the
+  distribution when packaging the project. Example::
+
+    .
+    ├── tox.ini
+    ├── pyproject.toml
+    └── src/
+        └── mypkg/
+            ├── __init__.py
+            ├── mymodule.py
+            └── my_data_file.txt
+
+- "flat-layout": a Python project that does not use "src-layout" but instead
+  have a directory under the project root for each package::
+
+    .
+    ├── tox.ini
+    ├── pyproject.toml
+    └── mypkg/
+        ├── __init__.py
+        ├── mymodule.py
+        └── my_data_file.txt
+
+- "single-module": a project that contains a single Python script direct under
+  the project root (no directory used)::
+
+    .
+    ├── tox.ini
+    ├── pyproject.toml
+    └── mymodule.py
+
+"""
+
+import itertools
+import os
+from fnmatch import fnmatchcase
+from glob import glob
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Union
+)
+
+import _distutils_hack.override  # noqa: F401
+
+from distutils import log
+from distutils.util import convert_path
+
+_Path = Union[str, os.PathLike]
+_Filter = Callable[[str], bool]
+StrIter = Iterator[str]
+
+chain_iter = itertools.chain.from_iterable
+
+if TYPE_CHECKING:
+    from setuptools import Distribution  # noqa
+
+
+def _valid_name(path: _Path) -> bool:
+    # Ignore invalid names that cannot be imported directly
+    return os.path.basename(path).isidentifier()
+
+
+class _Finder:
+    """Base class that exposes functionality for module/package finders"""
+
+    ALWAYS_EXCLUDE: Tuple[str, ...] = ()
+    DEFAULT_EXCLUDE: Tuple[str, ...] = ()
+
+    @classmethod
+    def find(
+        cls,
+        where: _Path = '.',
+        exclude: Iterable[str] = (),
+        include: Iterable[str] = ('*',)
+    ) -> List[str]:
+        """Return a list of all Python items (packages or modules, depending on
+        the finder implementation) found within directory 'where'.
+
+        'where' is the root directory which will be searched.
+        It should be supplied as a "cross-platform" (i.e. URL-style) path;
+        it will be converted to the appropriate local path syntax.
+
+        'exclude' is a sequence of names to exclude; '*' can be used
+        as a wildcard in the names.
+        When finding packages, 'foo.*' will exclude all subpackages of 'foo'
+        (but not 'foo' itself).
+
+        'include' is a sequence of names to include.
+        If it's specified, only the named items will be included.
+        If it's not specified, all found items will be included.
+        'include' can contain shell style wildcard patterns just like
+        'exclude'.
+        """
+
+        exclude = exclude or cls.DEFAULT_EXCLUDE
+        return list(
+            cls._find_iter(
+                convert_path(str(where)),
+                cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude),
+                cls._build_filter(*include),
+            )
+        )
+
+    @classmethod
+    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
+        raise NotImplementedError
+
+    @staticmethod
+    def _build_filter(*patterns: str) -> _Filter:
+        """
+        Given a list of patterns, return a callable that will be true only if
+        the input matches at least one of the patterns.
+        """
+        return lambda name: any(fnmatchcase(name, pat) for pat in patterns)
+
+
+class PackageFinder(_Finder):
+    """
+    Generate a list of all Python packages found within a directory
+    """
+
+    ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")
+
+    @classmethod
+    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
+        """
+        All the packages found in 'where' that pass the 'include' filter, but
+        not the 'exclude' filter.
+        """
+        for root, dirs, files in os.walk(str(where), followlinks=True):
+            # Copy dirs to iterate over it, then empty dirs.
+            all_dirs = dirs[:]
+            dirs[:] = []
+
+            for dir in all_dirs:
+                full_path = os.path.join(root, dir)
+                rel_path = os.path.relpath(full_path, where)
+                package = rel_path.replace(os.path.sep, '.')
+
+                # Skip directory trees that are not valid packages
+                if '.' in dir or not cls._looks_like_package(full_path, package):
+                    continue
+
+                # Should this package be included?
+                if include(package) and not exclude(package):
+                    yield package
+
+                # Keep searching subdirectories, as there may be more packages
+                # down there, even if the parent was excluded.
+                dirs.append(dir)
+
+    @staticmethod
+    def _looks_like_package(path: _Path, _package_name: str) -> bool:
+        """Does a directory look like a package?"""
+        return os.path.isfile(os.path.join(path, '__init__.py'))
+
+
+class PEP420PackageFinder(PackageFinder):
+    @staticmethod
+    def _looks_like_package(_path: _Path, _package_name: str) -> bool:
+        return True
+
+
+class ModuleFinder(_Finder):
+    """Find isolated Python modules.
+    This function will **not** recurse subdirectories.
+    """
+
+    @classmethod
+    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
+        for file in glob(os.path.join(where, "*.py")):
+            module, _ext = os.path.splitext(os.path.basename(file))
+
+            if not cls._looks_like_module(module):
+                continue
+
+            if include(module) and not exclude(module):
+                yield module
+
+    _looks_like_module = staticmethod(_valid_name)
+
+
+# We have to be extra careful in the case of flat layout to not include files
+# and directories not meant for distribution (e.g. tool-related)
+
+
+class FlatLayoutPackageFinder(PEP420PackageFinder):
+    _EXCLUDE = (
+        "ci",
+        "bin",
+        "doc",
+        "docs",
+        "documentation",
+        "manpages",
+        "news",
+        "changelog",
+        "test",
+        "tests",
+        "unit_test",
+        "unit_tests",
+        "example",
+        "examples",
+        "scripts",
+        "tools",
+        "util",
+        "utils",
+        "python",
+        "build",
+        "dist",
+        "venv",
+        "env",
+        "requirements",
+        # ---- Task runners / Build tools ----
+        "tasks",  # invoke
+        "fabfile",  # fabric
+        "site_scons",  # SCons
+        # ---- Other tools ----
+        "benchmark",
+        "benchmarks",
+        "exercise",
+        "exercises",
+        # ---- Hidden directories/Private packages ----
+        "[._]*",
+    )
+
+    DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
+    """Reserved package names"""
+
+    @staticmethod
+    def _looks_like_package(_path: _Path, package_name: str) -> bool:
+        names = package_name.split('.')
+        # Consider PEP 561
+        root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
+        return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])
+
+
+class FlatLayoutModuleFinder(ModuleFinder):
+    DEFAULT_EXCLUDE = (
+        "setup",
+        "conftest",
+        "test",
+        "tests",
+        "example",
+        "examples",
+        "build",
+        # ---- Task runners ----
+        "toxfile",
+        "noxfile",
+        "pavement",
+        "dodo",
+        "tasks",
+        "fabfile",
+        # ---- Other tools ----
+        "[Ss][Cc]onstruct",  # SCons
+        "conanfile",  # Connan: C/C++ build tool
+        "manage",  # Django
+        "benchmark",
+        "benchmarks",
+        "exercise",
+        "exercises",
+        # ---- Hidden files/Private modules ----
+        "[._]*",
+    )
+    """Reserved top-level module names"""
+
+
+def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]:
+    nested = PEP420PackageFinder.find(pkg_dir)
+    return [root_pkg] + [".".join((root_pkg, n)) for n in nested]
+
+
+class ConfigDiscovery:
+    """Fill-in metadata and options that can be automatically derived
+    (from other metadata/options, the file system or conventions)
+    """
+
+    def __init__(self, distribution: "Distribution"):
+        self.dist = distribution
+        self._called = False
+        self._disabled = False
+        self._skip_ext_modules = False
+
+    def _disable(self):
+        """Internal API to disable automatic discovery"""
+        self._disabled = True
+
+    def _ignore_ext_modules(self):
+        """Internal API to disregard ext_modules.
+
+        Normally auto-discovery would not be triggered if ``ext_modules`` are set
+        (this is done for backward compatibility with existing packages relying on
+        ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
+        to ignore given ``ext_modules`` and proceed with the auto-discovery if
+        ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
+        metadata).
+        """
+        self._skip_ext_modules = True
+
+    @property
+    def _root_dir(self) -> _Path:
+        # The best is to wait until `src_root` is set in dist, before using _root_dir.
+        return self.dist.src_root or os.curdir
+
+    @property
+    def _package_dir(self) -> Dict[str, str]:
+        if self.dist.package_dir is None:
+            return {}
+        return self.dist.package_dir
+
+    def __call__(self, force=False, name=True, ignore_ext_modules=False):
+        """Automatically discover missing configuration fields
+        and modifies the given ``distribution`` object in-place.
+
+        Note that by default this will only have an effect the first time the
+        ``ConfigDiscovery`` object is called.
+
+        To repeatedly invoke automatic discovery (e.g. when the project
+        directory changes), please use ``force=True`` (or create a new
+        ``ConfigDiscovery`` instance).
+        """
+        if force is False and (self._called or self._disabled):
+            # Avoid overhead of multiple calls
+            return
+
+        self._analyse_package_layout(ignore_ext_modules)
+        if name:
+            self.analyse_name()  # depends on ``packages`` and ``py_modules``
+
+        self._called = True
+
+    def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
+        """``True`` if the user has specified some form of package/module listing"""
+        ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
+        ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
+        return (
+            self.dist.packages is not None
+            or self.dist.py_modules is not None
+            or ext_modules
+            or hasattr(self.dist, "configuration") and self.dist.configuration
+            # ^ Some projects use numpy.distutils.misc_util.Configuration
+        )
+
+    def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
+        if self._explicitly_specified(ignore_ext_modules):
+            # For backward compatibility, just try to find modules/packages
+            # when nothing is given
+            return True
+
+        log.debug(
+            "No `packages` or `py_modules` configuration, performing "
+            "automatic discovery."
+        )
+
+        return (
+            self._analyse_explicit_layout()
+            or self._analyse_src_layout()
+            # flat-layout is the trickiest for discovery so it should be last
+            or self._analyse_flat_layout()
+        )
+
+    def _analyse_explicit_layout(self) -> bool:
+        """The user can explicitly give a package layout via ``package_dir``"""
+        package_dir = self._package_dir.copy()  # don't modify directly
+        package_dir.pop("", None)  # This falls under the "src-layout" umbrella
+        root_dir = self._root_dir
+
+        if not package_dir:
+            return False
+
+        log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
+        pkgs = chain_iter(
+            _find_packages_within(pkg, os.path.join(root_dir, parent_dir))
+            for pkg, parent_dir in package_dir.items()
+        )
+        self.dist.packages = list(pkgs)
+        log.debug(f"discovered packages -- {self.dist.packages}")
+        return True
+
+    def _analyse_src_layout(self) -> bool:
+        """Try to find all packages or modules under the ``src`` directory
+        (or anything pointed by ``package_dir[""]``).
+
+        The "src-layout" is relatively safe for automatic discovery.
+        We assume that everything within is meant to be included in the
+        distribution.
+
+        If ``package_dir[""]`` is not given, but the ``src`` directory exists,
+        this function will set ``package_dir[""] = "src"``.
+        """
+        package_dir = self._package_dir
+        src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
+        if not os.path.isdir(src_dir):
+            return False
+
+        log.debug(f"`src-layout` detected -- analysing {src_dir}")
+        package_dir.setdefault("", os.path.basename(src_dir))
+        self.dist.package_dir = package_dir  # persist eventual modifications
+        self.dist.packages = PEP420PackageFinder.find(src_dir)
+        self.dist.py_modules = ModuleFinder.find(src_dir)
+        log.debug(f"discovered packages -- {self.dist.packages}")
+        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
+        return True
+
+    def _analyse_flat_layout(self) -> bool:
+        """Try to find all packages and modules under the project root.
+
+        Since the ``flat-layout`` is more dangerous in terms of accidentally including
+        extra files/directories, this function is more conservative and will raise an
+        error if multiple packages or modules are found.
+
+        This assumes that multi-package dists are uncommon and refuse to support that
+        use case in order to be able to prevent unintended errors.
+        """
+        log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
+        return self._analyse_flat_packages() or self._analyse_flat_modules()
+
+    def _analyse_flat_packages(self) -> bool:
+        self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
+        top_level = remove_nested_packages(remove_stubs(self.dist.packages))
+        log.debug(f"discovered packages -- {self.dist.packages}")
+        self._ensure_no_accidental_inclusion(top_level, "packages")
+        return bool(top_level)
+
+    def _analyse_flat_modules(self) -> bool:
+        self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
+        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
+        self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
+        return bool(self.dist.py_modules)
+
+    def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str):
+        if len(detected) > 1:
+            from inspect import cleandoc
+
+            from setuptools.errors import PackageDiscoveryError
+
+            msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.
+
+            To avoid accidental inclusion of unwanted files or directories,
+            setuptools will not proceed with this build.
+
+            If you are trying to create a single distribution with multiple {kind}
+            on purpose, you should not rely on automatic discovery.
+            Instead, consider the following options:
+
+            1. set up custom discovery (`find` directive with `include` or `exclude`)
+            2. use a `src-layout`
+            3. explicitly set `py_modules` or `packages` with a list of names
+
+            To find more information, look for "package discovery" on setuptools docs.
+            """
+            raise PackageDiscoveryError(cleandoc(msg))
+
+    def analyse_name(self):
+        """The packages/modules are the essential contribution of the author.
+        Therefore the name of the distribution can be derived from them.
+        """
+        if self.dist.metadata.name or self.dist.name:
+            # get_name() is not reliable (can return "UNKNOWN")
+            return None
+
+        log.debug("No `name` configuration, performing automatic discovery")
+
+        name = (
+            self._find_name_single_package_or_module()
+            or self._find_name_from_packages()
+        )
+        if name:
+            self.dist.metadata.name = name
+
+    def _find_name_single_package_or_module(self) -> Optional[str]:
+        """Exactly one module or package"""
+        for field in ('packages', 'py_modules'):
+            items = getattr(self.dist, field, None) or []
+            if items and len(items) == 1:
+                log.debug(f"Single module/package detected, name: {items[0]}")
+                return items[0]
+
+        return None
+
+    def _find_name_from_packages(self) -> Optional[str]:
+        """Try to find the root package that is not a PEP 420 namespace"""
+        if not self.dist.packages:
+            return None
+
+        packages = remove_stubs(sorted(self.dist.packages, key=len))
+        package_dir = self.dist.package_dir or {}
+
+        parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
+        if parent_pkg:
+            log.debug(f"Common parent package detected, name: {parent_pkg}")
+            return parent_pkg
+
+        log.warn("No parent package detected, impossible to derive `name`")
+        return None
+
+
+def remove_nested_packages(packages: List[str]) -> List[str]:
+    """Remove nested packages from a list of packages.
+
+    >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
+    ['a']
+    >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
+    ['a', 'b', 'c.d', 'g.h']
+    """
+    pkgs = sorted(packages, key=len)
+    top_level = pkgs[:]
+    size = len(pkgs)
+    for i, name in enumerate(reversed(pkgs)):
+        if any(name.startswith(f"{other}.") for other in top_level):
+            top_level.pop(size - i - 1)
+
+    return top_level
+
+
+def remove_stubs(packages: List[str]) -> List[str]:
+    """Remove type stubs (:pep:`561`) from a list of packages.
+
+    >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
+    ['a', 'a.b', 'b']
+    """
+    return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]
+
+
+def find_parent_package(
+    packages: List[str], package_dir: Mapping[str, str], root_dir: _Path
+) -> Optional[str]:
+    """Find the parent package that is not a namespace."""
+    packages = sorted(packages, key=len)
+    common_ancestors = []
+    for i, name in enumerate(packages):
+        if not all(n.startswith(f"{name}.") for n in packages[i+1:]):
+            # Since packages are sorted by length, this condition is able
+            # to find a list of all common ancestors.
+            # When there is divergence (e.g. multiple root packages)
+            # the list will be empty
+            break
+        common_ancestors.append(name)
+
+    for name in common_ancestors:
+        pkg_path = find_package_path(name, package_dir, root_dir)
+        init = os.path.join(pkg_path, "__init__.py")
+        if os.path.isfile(init):
+            return name
+
+    return None
+
+
+def find_package_path(
+    name: str, package_dir: Mapping[str, str], root_dir: _Path
+) -> str:
+    """Given a package name, return the path where it should be found on
+    disk, considering the ``package_dir`` option.
+
+    >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
+    >>> path.replace(os.sep, "/")
+    './root/is/nested/my/pkg'
+
+    >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
+    >>> path.replace(os.sep, "/")
+    './root/is/nested/pkg'
+
+    >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
+    >>> path.replace(os.sep, "/")
+    './root/is/nested'
+
+    >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
+    >>> path.replace(os.sep, "/")
+    './other/pkg'
+    """
+    parts = name.split(".")
+    for i in range(len(parts), 0, -1):
+        # Look backwards, the most specific package_dir first
+        partial_name = ".".join(parts[:i])
+        if partial_name in package_dir:
+            parent = package_dir[partial_name]
+            return os.path.join(root_dir, parent, *parts[i:])
+
+    parent = package_dir.get("") or ""
+    return os.path.join(root_dir, *parent.split("/"), *parts)
+
+
+def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]:
+    parent_pkgs = remove_nested_packages(packages)
+    prefix = Path(package_path).parts
+    return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}
diff --git a/libs/common/setuptools/dist.py b/libs/common/setuptools/dist.py
index f6453a08..1c71e5ee 100644
--- a/libs/common/setuptools/dist.py
+++ b/libs/common/setuptools/dist.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 __all__ = ['Distribution']
 
 import io
@@ -11,32 +10,44 @@ import distutils.log
 import distutils.core
 import distutils.cmd
 import distutils.dist
+import distutils.command
 from distutils.util import strtobool
 from distutils.debug import DEBUG
 from distutils.fancy_getopt import translate_longopt
+from glob import iglob
 import itertools
+import textwrap
+from typing import List, Optional, TYPE_CHECKING
+from pathlib import Path
 
 from collections import defaultdict
 from email import message_from_file
 
-from distutils.errors import (
-    DistutilsOptionError, DistutilsPlatformError, DistutilsSetupError,
-)
+from distutils.errors import DistutilsOptionError, DistutilsSetupError
 from distutils.util import rfc822_escape
-from distutils.version import StrictVersion
 
-from setuptools.extern import six
 from setuptools.extern import packaging
 from setuptools.extern import ordered_set
-from setuptools.extern.six.moves import map, filter, filterfalse
+from setuptools.extern.more_itertools import unique_everseen, partition
+
+from ._importlib import metadata
 
 from . import SetuptoolsDeprecationWarning
 
-from setuptools.depends import Require
+import setuptools
+import setuptools.command
 from setuptools import windows_support
 from setuptools.monkey import get_unpatched
-from setuptools.config import parse_configuration
+from setuptools.config import setupcfg, pyprojecttoml
+from setuptools.discovery import ConfigDiscovery
+
 import pkg_resources
+from setuptools.extern.packaging import version
+from . import _reqs
+from . import _entry_points
+
+if TYPE_CHECKING:
+    from email.message import Message
 
 __import__('setuptools.extern.packaging.specifiers')
 __import__('setuptools.extern.packaging.version')
@@ -49,133 +60,151 @@ def _get_unpatched(cls):
 
 def get_metadata_version(self):
     mv = getattr(self, 'metadata_version', None)
-
     if mv is None:
-        if self.long_description_content_type or self.provides_extras:
-            mv = StrictVersion('2.1')
-        elif (self.maintainer is not None or
-              self.maintainer_email is not None or
-              getattr(self, 'python_requires', None) is not None or
-              self.project_urls):
-            mv = StrictVersion('1.2')
-        elif (self.provides or self.requires or self.obsoletes or
-                self.classifiers or self.download_url):
-            mv = StrictVersion('1.1')
-        else:
-            mv = StrictVersion('1.0')
-
+        mv = version.Version('2.1')
         self.metadata_version = mv
-
     return mv
 
 
+def rfc822_unescape(content: str) -> str:
+    """Reverse RFC-822 escaping by removing leading whitespaces from content."""
+    lines = content.splitlines()
+    if len(lines) == 1:
+        return lines[0].lstrip()
+    return '\n'.join((lines[0].lstrip(), textwrap.dedent('\n'.join(lines[1:]))))
+
+
+def _read_field_from_msg(msg: "Message", field: str) -> Optional[str]:
+    """Read Message header field."""
+    value = msg[field]
+    if value == 'UNKNOWN':
+        return None
+    return value
+
+
+def _read_field_unescaped_from_msg(msg: "Message", field: str) -> Optional[str]:
+    """Read Message header field and apply rfc822_unescape."""
+    value = _read_field_from_msg(msg, field)
+    if value is None:
+        return value
+    return rfc822_unescape(value)
+
+
+def _read_list_from_msg(msg: "Message", field: str) -> Optional[List[str]]:
+    """Read Message header field and return all results as list."""
+    values = msg.get_all(field, None)
+    if values == []:
+        return None
+    return values
+
+
+def _read_payload_from_msg(msg: "Message") -> Optional[str]:
+    value = msg.get_payload().strip()
+    if value == 'UNKNOWN' or not value:
+        return None
+    return value
+
+
 def read_pkg_file(self, file):
     """Reads the metadata values from a file object."""
     msg = message_from_file(file)
 
-    def _read_field(name):
-        value = msg[name]
-        if value == 'UNKNOWN':
-            return None
-        return value
-
-    def _read_list(name):
-        values = msg.get_all(name, None)
-        if values == []:
-            return None
-        return values
-
-    self.metadata_version = StrictVersion(msg['metadata-version'])
-    self.name = _read_field('name')
-    self.version = _read_field('version')
-    self.description = _read_field('summary')
+    self.metadata_version = version.Version(msg['metadata-version'])
+    self.name = _read_field_from_msg(msg, 'name')
+    self.version = _read_field_from_msg(msg, 'version')
+    self.description = _read_field_from_msg(msg, 'summary')
     # we are filling author only.
-    self.author = _read_field('author')
+    self.author = _read_field_from_msg(msg, 'author')
     self.maintainer = None
-    self.author_email = _read_field('author-email')
+    self.author_email = _read_field_from_msg(msg, 'author-email')
     self.maintainer_email = None
-    self.url = _read_field('home-page')
-    self.license = _read_field('license')
+    self.url = _read_field_from_msg(msg, 'home-page')
+    self.download_url = _read_field_from_msg(msg, 'download-url')
+    self.license = _read_field_unescaped_from_msg(msg, 'license')
 
-    if 'download-url' in msg:
-        self.download_url = _read_field('download-url')
-    else:
-        self.download_url = None
-
-    self.long_description = _read_field('description')
-    self.description = _read_field('summary')
+    self.long_description = _read_field_unescaped_from_msg(msg, 'description')
+    if (
+        self.long_description is None and
+        self.metadata_version >= version.Version('2.1')
+    ):
+        self.long_description = _read_payload_from_msg(msg)
+    self.description = _read_field_from_msg(msg, 'summary')
 
     if 'keywords' in msg:
-        self.keywords = _read_field('keywords').split(',')
+        self.keywords = _read_field_from_msg(msg, 'keywords').split(',')
 
-    self.platforms = _read_list('platform')
-    self.classifiers = _read_list('classifier')
+    self.platforms = _read_list_from_msg(msg, 'platform')
+    self.classifiers = _read_list_from_msg(msg, 'classifier')
 
     # PEP 314 - these fields only exist in 1.1
-    if self.metadata_version == StrictVersion('1.1'):
-        self.requires = _read_list('requires')
-        self.provides = _read_list('provides')
-        self.obsoletes = _read_list('obsoletes')
+    if self.metadata_version == version.Version('1.1'):
+        self.requires = _read_list_from_msg(msg, 'requires')
+        self.provides = _read_list_from_msg(msg, 'provides')
+        self.obsoletes = _read_list_from_msg(msg, 'obsoletes')
     else:
         self.requires = None
         self.provides = None
         self.obsoletes = None
 
+    self.license_files = _read_list_from_msg(msg, 'license-file')
+
+
+def single_line(val):
+    """
+    Quick and dirty validation for Summary pypa/setuptools#1390.
+    """
+    if '\n' in val:
+        # TODO: Replace with `raise ValueError("newlines not allowed")`
+        # after reviewing #2893.
+        warnings.warn("newlines not allowed and will break in the future")
+        val = val.strip().split('\n')[0]
+    return val
+
 
 # Based on Python 3.5 version
-def write_pkg_file(self, file):
-    """Write the PKG-INFO format data to a file object.
-    """
+def write_pkg_file(self, file):  # noqa: C901  # is too complex (14)  # FIXME
+    """Write the PKG-INFO format data to a file object."""
     version = self.get_metadata_version()
 
-    if six.PY2:
-        def write_field(key, value):
-            file.write("%s: %s\n" % (key, self._encode_field(value)))
-    else:
-        def write_field(key, value):
-            file.write("%s: %s\n" % (key, value))
+    def write_field(key, value):
+        file.write("%s: %s\n" % (key, value))
 
     write_field('Metadata-Version', str(version))
     write_field('Name', self.get_name())
     write_field('Version', self.get_version())
-    write_field('Summary', self.get_description())
-    write_field('Home-page', self.get_url())
 
-    if version < StrictVersion('1.2'):
-        write_field('Author', self.get_contact())
-        write_field('Author-email', self.get_contact_email())
-    else:
-        optional_fields = (
-            ('Author', 'author'),
-            ('Author-email', 'author_email'),
-            ('Maintainer', 'maintainer'),
-            ('Maintainer-email', 'maintainer_email'),
-        )
+    summary = self.get_description()
+    if summary:
+        write_field('Summary', single_line(summary))
 
-        for field, attr in optional_fields:
-            attr_val = getattr(self, attr)
+    optional_fields = (
+        ('Home-page', 'url'),
+        ('Download-URL', 'download_url'),
+        ('Author', 'author'),
+        ('Author-email', 'author_email'),
+        ('Maintainer', 'maintainer'),
+        ('Maintainer-email', 'maintainer_email'),
+    )
 
-            if attr_val is not None:
-                write_field(field, attr_val)
+    for field, attr in optional_fields:
+        attr_val = getattr(self, attr, None)
+        if attr_val is not None:
+            write_field(field, attr_val)
+
+    license = self.get_license()
+    if license:
+        write_field('License', rfc822_escape(license))
 
-    write_field('License', self.get_license())
-    if self.download_url:
-        write_field('Download-URL', self.download_url)
     for project_url in self.project_urls.items():
-        write_field('Project-URL',  '%s, %s' % project_url)
-
-    long_desc = rfc822_escape(self.get_long_description())
-    write_field('Description', long_desc)
+        write_field('Project-URL', '%s, %s' % project_url)
 
     keywords = ','.join(self.get_keywords())
     if keywords:
         write_field('Keywords', keywords)
 
-    if version >= StrictVersion('1.2'):
-        for platform in self.get_platforms():
-            write_field('Platform', platform)
-    else:
-        self._write_list(file, 'Platform', self.get_platforms())
+    platforms = self.get_platforms() or []
+    for platform in platforms:
+        write_field('Platform', platform)
 
     self._write_list(file, 'Classifier', self.get_classifiers())
 
@@ -190,27 +219,31 @@ def write_pkg_file(self, file):
 
     # PEP 566
     if self.long_description_content_type:
-        write_field(
-            'Description-Content-Type',
-            self.long_description_content_type
-        )
+        write_field('Description-Content-Type', self.long_description_content_type)
     if self.provides_extras:
         for extra in self.provides_extras:
             write_field('Provides-Extra', extra)
 
+    self._write_list(file, 'License-File', self.license_files or [])
+
+    long_description = self.get_long_description()
+    if long_description:
+        file.write("\n%s" % long_description)
+        if not long_description.endswith("\n"):
+            file.write("\n")
+
 
 sequence = tuple, list
 
 
 def check_importable(dist, attr, value):
     try:
-        ep = pkg_resources.EntryPoint.parse('x=' + value)
+        ep = metadata.EntryPoint(value=value, name=None, group=None)
         assert not ep.extras
-    except (TypeError, ValueError, AttributeError, AssertionError):
+    except (TypeError, ValueError, AttributeError, AssertionError) as e:
         raise DistutilsSetupError(
-            "%r must be importable 'module:attrs' string (got %r)"
-            % (attr, value)
-        )
+            "%r must be importable 'module:attrs' string (got %r)" % (attr, value)
+        ) from e
 
 
 def assert_string_list(dist, attr, value):
@@ -221,10 +254,10 @@ def assert_string_list(dist, attr, value):
         assert isinstance(value, (list, tuple))
         # verify that elements of value are strings
         assert ''.join(value) != value
-    except (TypeError, ValueError, AttributeError, AssertionError):
+    except (TypeError, ValueError, AttributeError, AssertionError) as e:
         raise DistutilsSetupError(
             "%r must be a list of strings (got %r)" % (attr, value)
-        )
+        ) from e
 
 
 def check_nsp(dist, attr, value):
@@ -234,34 +267,41 @@ def check_nsp(dist, attr, value):
     for nsp in ns_packages:
         if not dist.has_contents_for(nsp):
             raise DistutilsSetupError(
-                "Distribution contains no modules or packages for " +
-                "namespace package %r" % nsp
+                "Distribution contains no modules or packages for "
+                + "namespace package %r" % nsp
             )
         parent, sep, child = nsp.rpartition('.')
         if parent and parent not in ns_packages:
             distutils.log.warn(
                 "WARNING: %r is declared as a package namespace, but %r"
-                " is not: please correct this in setup.py", nsp, parent
+                " is not: please correct this in setup.py",
+                nsp,
+                parent,
             )
+        msg = (
+            "The namespace_packages parameter is deprecated, "
+            "consider using implicit namespaces instead (PEP 420)."
+        )
+        warnings.warn(msg, SetuptoolsDeprecationWarning)
 
 
 def check_extras(dist, attr, value):
     """Verify that extras_require mapping is valid"""
     try:
         list(itertools.starmap(_check_extra, value.items()))
-    except (TypeError, ValueError, AttributeError):
+    except (TypeError, ValueError, AttributeError) as e:
         raise DistutilsSetupError(
             "'extras_require' must be a dictionary whose values are "
             "strings or lists of strings containing valid project/version "
             "requirement specifiers."
-        )
+        ) from e
 
 
 def _check_extra(extra, reqs):
     name, sep, marker = extra.partition(':')
     if marker and pkg_resources.invalid_marker(marker):
         raise DistutilsSetupError("Invalid environment marker: " + marker)
-    list(pkg_resources.parse_requirements(reqs))
+    list(_reqs.parse(reqs))
 
 
 def assert_bool(dist, attr, value):
@@ -271,10 +311,17 @@ def assert_bool(dist, attr, value):
         raise DistutilsSetupError(tmpl.format(attr=attr, value=value))
 
 
+def invalid_unless_false(dist, attr, value):
+    if not value:
+        warnings.warn(f"{attr} is ignored.", DistDeprecationWarning)
+        return
+    raise DistutilsSetupError(f"{attr} is invalid.")
+
+
 def check_requirements(dist, attr, value):
     """Verify that install_requires is a valid requirements list"""
     try:
-        list(pkg_resources.parse_requirements(value))
+        list(_reqs.parse(value))
         if isinstance(value, (dict, set)):
             raise TypeError("Unordered types are not allowed")
     except (TypeError, ValueError) as error:
@@ -282,31 +329,30 @@ def check_requirements(dist, attr, value):
             "{attr!r} must be a string or list of strings "
             "containing valid project/version requirement specifiers; {error}"
         )
-        raise DistutilsSetupError(tmpl.format(attr=attr, error=error))
+        raise DistutilsSetupError(tmpl.format(attr=attr, error=error)) from error
 
 
 def check_specifier(dist, attr, value):
     """Verify that value is a valid version specifier"""
     try:
         packaging.specifiers.SpecifierSet(value)
-    except packaging.specifiers.InvalidSpecifier as error:
+    except (packaging.specifiers.InvalidSpecifier, AttributeError) as error:
         tmpl = (
-            "{attr!r} must be a string "
-            "containing valid version specifiers; {error}"
+            "{attr!r} must be a string " "containing valid version specifiers; {error}"
         )
-        raise DistutilsSetupError(tmpl.format(attr=attr, error=error))
+        raise DistutilsSetupError(tmpl.format(attr=attr, error=error)) from error
 
 
 def check_entry_points(dist, attr, value):
     """Verify that entry_points map is parseable"""
     try:
-        pkg_resources.EntryPoint.parse_map(value)
-    except ValueError as e:
-        raise DistutilsSetupError(e)
+        _entry_points.load(value)
+    except Exception as e:
+        raise DistutilsSetupError(e) from e
 
 
 def check_test_suite(dist, attr, value):
-    if not isinstance(value, six.string_types):
+    if not isinstance(value, str):
         raise DistutilsSetupError("test_suite must be a string")
 
 
@@ -315,12 +361,12 @@ def check_package_data(dist, attr, value):
     if not isinstance(value, dict):
         raise DistutilsSetupError(
             "{!r} must be a dictionary mapping package names to lists of "
-            "string wildcard patterns".format(attr))
+            "string wildcard patterns".format(attr)
+        )
     for k, v in value.items():
-        if not isinstance(k, six.string_types):
+        if not isinstance(k, str):
             raise DistutilsSetupError(
-                "keys of {!r} dict must be strings (got {!r})"
-                .format(attr, k)
+                "keys of {!r} dict must be strings (got {!r})".format(attr, k)
             )
         assert_string_list(dist, 'values of {!r} dict'.format(attr), v)
 
@@ -330,7 +376,8 @@ def check_packages(dist, attr, value):
         if not re.match(r'\w+(\.\w+)*', pkgname):
             distutils.log.warn(
                 "WARNING: %r not a valid package name; please use only "
-                ".-separated package names in setup.py", pkgname
+                ".-separated package names in setup.py",
+                pkgname,
             )
 
 
@@ -338,7 +385,7 @@ _Distribution = get_unpatched(distutils.core.Distribution)
 
 
 class Distribution(_Distribution):
-    """Distribution with support for features, tests, and package data
+    """Distribution with support for tests and package data
 
     This is an enhanced version of 'distutils.dist.Distribution' that
     effectively adds the following new optional keyword arguments to 'setup()':
@@ -365,21 +412,6 @@ class Distribution(_Distribution):
         EasyInstall and requests one of your extras, the corresponding
         additional requirements will be installed if needed.
 
-     'features' **deprecated** -- a dictionary mapping option names to
-        'setuptools.Feature'
-        objects.  Features are a portion of the distribution that can be
-        included or excluded based on user options, inter-feature dependencies,
-        and availability on the current system.  Excluded features are omitted
-        from all setup commands, including source and binary distributions, so
-        you can create multiple distributions from the same source tree.
-        Feature names should be valid Python identifiers, except that they may
-        contain the '-' (minus) sign.  Features can be included or excluded
-        via the command line options '--with-X' and '--without-X', where 'X' is
-        the name of the feature.  Whether a feature is included by default, and
-        whether you are allowed to control this from the command line, is
-        determined by the Feature object.  See the 'Feature' class for more
-        information.
-
      'test_suite' -- the name of a test suite to run for the 'test' command.
         If the user runs 'python setup.py test', the package will be installed,
         and the named test suite will be run.  The format is the same as
@@ -401,15 +433,15 @@ class Distribution(_Distribution):
     for manipulating the distribution's contents.  For example, the 'include()'
     and 'exclude()' methods can be thought of as in-place add and subtract
     commands that add or remove packages, modules, extensions, and so on from
-    the distribution.  They are used by the feature subsystem to configure the
-    distribution for the included and excluded features.
+    the distribution.
     """
 
     _DISTUTILS_UNSUPPORTED_METADATA = {
-        'long_description_content_type': None,
+        'long_description_content_type': lambda: None,
         'project_urls': dict,
         'provides_extras': ordered_set.OrderedSet,
-        'license_files': ordered_set.OrderedSet,
+        'license_file': lambda: None,
+        'license_files': lambda: None,
     }
 
     _patched_dist = None
@@ -432,59 +464,90 @@ class Distribution(_Distribution):
         if not have_package_data:
             self.package_data = {}
         attrs = attrs or {}
-        if 'features' in attrs or 'require_features' in attrs:
-            Feature.warn_deprecated()
-        self.require_features = []
-        self.features = {}
         self.dist_files = []
         # Filter-out setuptools' specific options.
         self.src_root = attrs.pop("src_root", None)
         self.patch_missing_pkg_info(attrs)
         self.dependency_links = attrs.pop('dependency_links', [])
         self.setup_requires = attrs.pop('setup_requires', [])
-        for ep in pkg_resources.iter_entry_points('distutils.setup_keywords'):
+        for ep in metadata.entry_points(group='distutils.setup_keywords'):
             vars(self).setdefault(ep.name, None)
-        _Distribution.__init__(self, {
-            k: v for k, v in attrs.items()
-            if k not in self._DISTUTILS_UNSUPPORTED_METADATA
-        })
+        _Distribution.__init__(
+            self,
+            {
+                k: v
+                for k, v in attrs.items()
+                if k not in self._DISTUTILS_UNSUPPORTED_METADATA
+            },
+        )
 
-        # Fill-in missing metadata fields not supported by distutils.
-        # Note some fields may have been set by other tools (e.g. pbr)
-        # above; they are taken preferrentially to setup() arguments
+        # Save the original dependencies before they are processed into the egg format
+        self._orig_extras_require = {}
+        self._orig_install_requires = []
+        self._tmp_extras_require = defaultdict(ordered_set.OrderedSet)
+
+        self.set_defaults = ConfigDiscovery(self)
+
+        self._set_metadata_defaults(attrs)
+
+        self.metadata.version = self._normalize_version(
+            self._validate_version(self.metadata.version)
+        )
+        self._finalize_requires()
+
+    def _validate_metadata(self):
+        required = {"name"}
+        provided = {
+            key
+            for key in vars(self.metadata)
+            if getattr(self.metadata, key, None) is not None
+        }
+        missing = required - provided
+
+        if missing:
+            msg = f"Required package metadata is missing: {missing}"
+            raise DistutilsSetupError(msg)
+
+    def _set_metadata_defaults(self, attrs):
+        """
+        Fill-in missing metadata fields not supported by distutils.
+        Some fields may have been set by other tools (e.g. pbr).
+        Those fields (vars(self.metadata)) take precedence to
+        supplied attrs.
+        """
         for option, default in self._DISTUTILS_UNSUPPORTED_METADATA.items():
-            for source in self.metadata.__dict__, attrs:
-                if option in source:
-                    value = source[option]
-                    break
-            else:
-                value = default() if default else None
-            setattr(self.metadata, option, value)
+            vars(self.metadata).setdefault(option, attrs.get(option, default()))
 
-        if isinstance(self.metadata.version, numbers.Number):
+    @staticmethod
+    def _normalize_version(version):
+        if isinstance(version, setuptools.sic) or version is None:
+            return version
+
+        normalized = str(packaging.version.Version(version))
+        if version != normalized:
+            tmpl = "Normalizing '{version}' to '{normalized}'"
+            warnings.warn(tmpl.format(**locals()))
+            return normalized
+        return version
+
+    @staticmethod
+    def _validate_version(version):
+        if isinstance(version, numbers.Number):
             # Some people apparently take "version number" too literally :)
-            self.metadata.version = str(self.metadata.version)
+            version = str(version)
 
-        if self.metadata.version is not None:
+        if version is not None:
             try:
-                ver = packaging.version.Version(self.metadata.version)
-                normalized_version = str(ver)
-                if self.metadata.version != normalized_version:
-                    warnings.warn(
-                        "Normalizing '%s' to '%s'" % (
-                            self.metadata.version,
-                            normalized_version,
-                        )
-                    )
-                    self.metadata.version = normalized_version
+                packaging.version.Version(version)
             except (packaging.version.InvalidVersion, TypeError):
                 warnings.warn(
                     "The version specified (%r) is an invalid version, this "
                     "may not work as expected with newer versions of "
                     "setuptools, pip, and PyPI. Please see PEP 440 for more "
-                    "details." % self.metadata.version
+                    "details." % version
                 )
-        self._finalize_requires()
+                return setuptools.sic(version)
+        return version
 
     def _finalize_requires(self):
         """
@@ -495,6 +558,8 @@ class Distribution(_Distribution):
             self.metadata.python_requires = self.python_requires
 
         if getattr(self, 'extras_require', None):
+            # Save original before it is messed by _convert_extras_requirements
+            self._orig_extras_require = self._orig_extras_require or self.extras_require
             for extra in self.extras_require.keys():
                 # Since this gets called multiple times at points where the
                 # keys have become 'converted' extras, ensure that we are only
@@ -503,6 +568,10 @@ class Distribution(_Distribution):
                 if extra:
                     self.metadata.provides_extras.add(extra)
 
+        if getattr(self, 'install_requires', None) and not self._orig_install_requires:
+            # Save original before it is messed by _move_install_requirements_markers
+            self._orig_install_requires = self.install_requires
+
         self._convert_extras_requirements()
         self._move_install_requirements_markers()
 
@@ -513,11 +582,12 @@ class Distribution(_Distribution):
         `"extra:{marker}": ["barbazquux"]`.
         """
         spec_ext_reqs = getattr(self, 'extras_require', None) or {}
-        self._tmp_extras_require = defaultdict(list)
+        tmp = defaultdict(ordered_set.OrderedSet)
+        self._tmp_extras_require = getattr(self, '_tmp_extras_require', tmp)
         for section, v in spec_ext_reqs.items():
             # Do not strip empty sections.
             self._tmp_extras_require[section]
-            for r in pkg_resources.parse_requirements(v):
+            for r in _reqs.parse(v):
                 suffix = self._suffix_for(r)
                 self._tmp_extras_require[section + suffix].append(r)
 
@@ -543,15 +613,16 @@ class Distribution(_Distribution):
             return not req.marker
 
         spec_inst_reqs = getattr(self, 'install_requires', None) or ()
-        inst_reqs = list(pkg_resources.parse_requirements(spec_inst_reqs))
+        inst_reqs = list(_reqs.parse(spec_inst_reqs))
         simple_reqs = filter(is_simple_req, inst_reqs)
-        complex_reqs = filterfalse(is_simple_req, inst_reqs)
+        complex_reqs = itertools.filterfalse(is_simple_req, inst_reqs)
         self.install_requires = list(map(str, simple_reqs))
 
         for r in complex_reqs:
             self._tmp_extras_require[':' + str(r.marker)].append(r)
         self.extras_require = dict(
-            (k, [str(r) for r in map(self._clean_req, v)])
+            # list(dict.fromkeys(...))  ensures a list of unique strings
+            (k, list(dict.fromkeys(str(r) for r in map(self._clean_req, v))))
             for k, v in self._tmp_extras_require.items()
         )
 
@@ -562,23 +633,69 @@ class Distribution(_Distribution):
         req.marker = None
         return req
 
-    def _parse_config_files(self, filenames=None):
+    def _finalize_license_files(self):
+        """Compute names of all license files which should be included."""
+        license_files: Optional[List[str]] = self.metadata.license_files
+        patterns: List[str] = license_files if license_files else []
+
+        license_file: Optional[str] = self.metadata.license_file
+        if license_file and license_file not in patterns:
+            patterns.append(license_file)
+
+        if license_files is None and license_file is None:
+            # Default patterns match the ones wheel uses
+            # See https://wheel.readthedocs.io/en/stable/user_guide.html
+            # -> 'Including license files in the generated wheel file'
+            patterns = ('LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*')
+
+        self.metadata.license_files = list(
+            unique_everseen(self._expand_patterns(patterns))
+        )
+
+    @staticmethod
+    def _expand_patterns(patterns):
+        """
+        >>> list(Distribution._expand_patterns(['LICENSE']))
+        ['LICENSE']
+        >>> list(Distribution._expand_patterns(['setup.cfg', 'LIC*']))
+        ['setup.cfg', 'LICENSE']
+        """
+        return (
+            path
+            for pattern in patterns
+            for path in sorted(iglob(pattern))
+            if not path.endswith('~') and os.path.isfile(path)
+        )
+
+    # FIXME: 'Distribution._parse_config_files' is too complex (14)
+    def _parse_config_files(self, filenames=None):  # noqa: C901
         """
         Adapted from distutils.dist.Distribution.parse_config_files,
         this method provides the same functionality in subtly-improved
         ways.
         """
-        from setuptools.extern.six.moves.configparser import ConfigParser
+        from configparser import ConfigParser
 
         # Ignore install directory options if we have a venv
-        if not six.PY2 and sys.prefix != sys.base_prefix:
-            ignore_options = [
-                'install-base', 'install-platbase', 'install-lib',
-                'install-platlib', 'install-purelib', 'install-headers',
-                'install-scripts', 'install-data', 'prefix', 'exec-prefix',
-                'home', 'user', 'root']
-        else:
-            ignore_options = []
+        ignore_options = (
+            []
+            if sys.prefix == sys.base_prefix
+            else [
+                'install-base',
+                'install-platbase',
+                'install-lib',
+                'install-platlib',
+                'install-purelib',
+                'install-headers',
+                'install-scripts',
+                'install-data',
+                'prefix',
+                'exec-prefix',
+                'home',
+                'user',
+                'root',
+            ]
+        )
 
         ignore_options = frozenset(ignore_options)
 
@@ -589,62 +706,95 @@ class Distribution(_Distribution):
             self.announce("Distribution.parse_config_files():")
 
         parser = ConfigParser()
+        parser.optionxform = str
         for filename in filenames:
             with io.open(filename, encoding='utf-8') as reader:
                 if DEBUG:
                     self.announce("  reading {filename}".format(**locals()))
-                (parser.readfp if six.PY2 else parser.read_file)(reader)
+                parser.read_file(reader)
             for section in parser.sections():
                 options = parser.options(section)
                 opt_dict = self.get_option_dict(section)
 
                 for opt in options:
-                    if opt != '__name__' and opt not in ignore_options:
-                        val = self._try_str(parser.get(section, opt))
-                        opt = opt.replace('-', '_')
-                        opt_dict[opt] = (filename, val)
+                    if opt == '__name__' or opt in ignore_options:
+                        continue
+
+                    val = parser.get(section, opt)
+                    opt = self.warn_dash_deprecation(opt, section)
+                    opt = self.make_option_lowercase(opt, section)
+                    opt_dict[opt] = (filename, val)
 
             # Make the ConfigParser forget everything (so we retain
             # the original filenames that options come from)
             parser.__init__()
 
+        if 'global' not in self.command_options:
+            return
+
         # If there was a "global" section in the config file, use it
         # to set Distribution options.
 
-        if 'global' in self.command_options:
-            for (opt, (src, val)) in self.command_options['global'].items():
-                alias = self.negative_opt.get(opt)
-                try:
-                    if alias:
-                        setattr(self, alias, not strtobool(val))
-                    elif opt in ('verbose', 'dry_run'):  # ugh!
-                        setattr(self, opt, strtobool(val))
-                    else:
-                        setattr(self, opt, val)
-                except ValueError as msg:
-                    raise DistutilsOptionError(msg)
+        for (opt, (src, val)) in self.command_options['global'].items():
+            alias = self.negative_opt.get(opt)
+            if alias:
+                val = not strtobool(val)
+            elif opt in ('verbose', 'dry_run'):  # ugh!
+                val = strtobool(val)
 
-    @staticmethod
-    def _try_str(val):
-        """
-        On Python 2, much of distutils relies on string values being of
-        type 'str' (bytes) and not unicode text. If the value can be safely
-        encoded to bytes using the default encoding, prefer that.
+            try:
+                setattr(self, alias or opt, val)
+            except ValueError as e:
+                raise DistutilsOptionError(e) from e
 
-        Why the default encoding? Because that value can be implicitly
-        decoded back to text if needed.
+    def warn_dash_deprecation(self, opt, section):
+        if section in (
+            'options.extras_require',
+            'options.data_files',
+        ):
+            return opt
 
-        Ref #1653
-        """
-        if not six.PY2:
-            return val
+        underscore_opt = opt.replace('-', '_')
+        commands = list(itertools.chain(
+            distutils.command.__all__,
+            self._setuptools_commands(),
+        ))
+        if (
+            not section.startswith('options')
+            and section != 'metadata'
+            and section not in commands
+        ):
+            return underscore_opt
+
+        if '-' in opt:
+            warnings.warn(
+                "Usage of dash-separated '%s' will not be supported in future "
+                "versions. Please use the underscore name '%s' instead"
+                % (opt, underscore_opt)
+            )
+        return underscore_opt
+
+    def _setuptools_commands(self):
         try:
-            return val.encode()
-        except UnicodeEncodeError:
-            pass
-        return val
+            return metadata.distribution('setuptools').entry_points.names
+        except metadata.PackageNotFoundError:
+            # during bootstrapping, distribution doesn't exist
+            return []
 
-    def _set_command_options(self, command_obj, option_dict=None):
+    def make_option_lowercase(self, opt, section):
+        if section != 'metadata' or opt.islower():
+            return opt
+
+        lowercase_opt = opt.lower()
+        warnings.warn(
+            "Usage of uppercase key '%s' in '%s' will be deprecated in future "
+            "versions. Please use lowercase '%s' instead"
+            % (opt, section, lowercase_opt)
+        )
+        return lowercase_opt
+
+    # FIXME: 'Distribution._set_command_options' is too complex (14)
+    def _set_command_options(self, command_obj, option_dict=None):  # noqa: C901
         """
         Set the options for 'command_obj' from 'option_dict'.  Basically
         this means copying elements of a dictionary ('option_dict') to
@@ -664,11 +814,9 @@ class Distribution(_Distribution):
             self.announce("  setting options for '%s' command:" % command_name)
         for (option, (source, value)) in option_dict.items():
             if DEBUG:
-                self.announce("    %s = %s (from %s)" % (option, value,
-                                                         source))
+                self.announce("    %s = %s (from %s)" % (option, value, source))
             try:
-                bool_opts = [translate_longopt(o)
-                             for o in command_obj.boolean_options]
+                bool_opts = [translate_longopt(o) for o in command_obj.boolean_options]
             except AttributeError:
                 bool_opts = []
             try:
@@ -677,7 +825,7 @@ class Distribution(_Distribution):
                 neg_opt = {}
 
             try:
-                is_string = isinstance(value, six.string_types)
+                is_string = isinstance(value, str)
                 if option in neg_opt and is_string:
                     setattr(command_obj, neg_opt[option], not strtobool(value))
                 elif option in bool_opts and is_string:
@@ -687,36 +835,44 @@ class Distribution(_Distribution):
                 else:
                     raise DistutilsOptionError(
                         "error in %s: command '%s' has no such option '%s'"
-                        % (source, command_name, option))
-            except ValueError as msg:
-                raise DistutilsOptionError(msg)
+                        % (source, command_name, option)
+                    )
+            except ValueError as e:
+                raise DistutilsOptionError(e) from e
+
+    def _get_project_config_files(self, filenames):
+        """Add default file and split between INI and TOML"""
+        tomlfiles = []
+        standard_project_metadata = Path(self.src_root or os.curdir, "pyproject.toml")
+        if filenames is not None:
+            parts = partition(lambda f: Path(f).suffix == ".toml", filenames)
+            filenames = list(parts[0])  # 1st element => predicate is False
+            tomlfiles = list(parts[1])  # 2nd element => predicate is True
+        elif standard_project_metadata.exists():
+            tomlfiles = [standard_project_metadata]
+        return filenames, tomlfiles
 
     def parse_config_files(self, filenames=None, ignore_option_errors=False):
         """Parses configuration files from various levels
         and loads configuration.
-
         """
-        self._parse_config_files(filenames=filenames)
+        inifiles, tomlfiles = self._get_project_config_files(filenames)
+
+        self._parse_config_files(filenames=inifiles)
+
+        setupcfg.parse_configuration(
+            self, self.command_options, ignore_option_errors=ignore_option_errors
+        )
+        for filename in tomlfiles:
+            pyprojecttoml.apply_configuration(self, filename, ignore_option_errors)
 
-        parse_configuration(self, self.command_options,
-                            ignore_option_errors=ignore_option_errors)
         self._finalize_requires()
-
-    def parse_command_line(self):
-        """Process features after parsing command line options"""
-        result = _Distribution.parse_command_line(self)
-        if self.features:
-            self._finalize_features()
-        return result
-
-    def _feature_attrname(self, name):
-        """Convert feature name to corresponding option attribute name"""
-        return 'with_' + name.replace('-', '_')
+        self._finalize_license_files()
 
     def fetch_build_eggs(self, requires):
         """Resolve pre-setup requirements"""
         resolved_dists = pkg_resources.working_set.resolve(
-            pkg_resources.parse_requirements(requires),
+            _reqs.parse(requires),
             installer=self.fetch_build_egg,
             replace_conflicting=True,
         )
@@ -735,27 +891,33 @@ class Distribution(_Distribution):
 
         def by_order(hook):
             return getattr(hook, 'order', 0)
-        eps = map(lambda e: e.load(), pkg_resources.iter_entry_points(group))
-        for ep in sorted(eps, key=by_order):
+
+        defined = metadata.entry_points(group=group)
+        filtered = itertools.filterfalse(self._removed, defined)
+        loaded = map(lambda e: e.load(), filtered)
+        for ep in sorted(loaded, key=by_order):
             ep(self)
 
+    @staticmethod
+    def _removed(ep):
+        """
+        When removing an entry point, if metadata is loaded
+        from an older version of Setuptools, that removed
+        entry point will attempt to be loaded and will fail.
+        See #2765 for more details.
+        """
+        removed = {
+            # removed 2021-09-05
+            '2to3_doctests',
+        }
+        return ep.name in removed
+
     def _finalize_setup_keywords(self):
-        for ep in pkg_resources.iter_entry_points('distutils.setup_keywords'):
+        for ep in metadata.entry_points(group='distutils.setup_keywords'):
             value = getattr(self, ep.name, None)
             if value is not None:
-                ep.require(installer=self.fetch_build_egg)
                 ep.load()(self, ep.name, value)
 
-    def _finalize_2to3_doctests(self):
-        if getattr(self, 'convert_2to3_doctests', None):
-            # XXX may convert to set here when we can rely on set being builtin
-            self.convert_2to3_doctests = [
-                os.path.abspath(p)
-                for p in self.convert_2to3_doctests
-            ]
-        else:
-            self.convert_2to3_doctests = []
-
     def get_egg_cache_dir(self):
         egg_cache_dir = os.path.join(os.curdir, '.eggs')
         if not os.path.exists(egg_cache_dir):
@@ -763,10 +925,14 @@ class Distribution(_Distribution):
             windows_support.hide_file(egg_cache_dir)
             readme_txt_filename = os.path.join(egg_cache_dir, 'README.txt')
             with open(readme_txt_filename, 'w') as f:
-                f.write('This directory contains eggs that were downloaded '
-                        'by setuptools to build, test, and run plug-ins.\n\n')
-                f.write('This directory caches those eggs to prevent '
-                        'repeated downloads.\n\n')
+                f.write(
+                    'This directory contains eggs that were downloaded '
+                    'by setuptools to build, test, and run plug-ins.\n\n'
+                )
+                f.write(
+                    'This directory caches those eggs to prevent '
+                    'repeated downloads.\n\n'
+                )
                 f.write('However, it is safe to delete this directory.\n\n')
 
         return egg_cache_dir
@@ -774,103 +940,35 @@ class Distribution(_Distribution):
     def fetch_build_egg(self, req):
         """Fetch an egg needed for building"""
         from setuptools.installer import fetch_build_egg
+
         return fetch_build_egg(self, req)
 
-    def _finalize_feature_opts(self):
-        """Add --with-X/--without-X options based on optional features"""
-
-        if not self.features:
-            return
-
-        go = []
-        no = self.negative_opt.copy()
-
-        for name, feature in self.features.items():
-            self._set_feature(name, None)
-            feature.validate(self)
-
-            if feature.optional:
-                descr = feature.description
-                incdef = ' (default)'
-                excdef = ''
-                if not feature.include_by_default():
-                    excdef, incdef = incdef, excdef
-
-                new = (
-                    ('with-' + name, None, 'include ' + descr + incdef),
-                    ('without-' + name, None, 'exclude ' + descr + excdef),
-                )
-                go.extend(new)
-                no['without-' + name] = 'with-' + name
-
-        self.global_options = self.feature_options = go + self.global_options
-        self.negative_opt = self.feature_negopt = no
-
-    def _finalize_features(self):
-        """Add/remove features and resolve dependencies between them"""
-
-        # First, flag all the enabled items (and thus their dependencies)
-        for name, feature in self.features.items():
-            enabled = self.feature_is_included(name)
-            if enabled or (enabled is None and feature.include_by_default()):
-                feature.include_in(self)
-                self._set_feature(name, 1)
-
-        # Then disable the rest, so that off-by-default features don't
-        # get flagged as errors when they're required by an enabled feature
-        for name, feature in self.features.items():
-            if not self.feature_is_included(name):
-                feature.exclude_from(self)
-                self._set_feature(name, 0)
-
     def get_command_class(self, command):
         """Pluggable version of get_command_class()"""
         if command in self.cmdclass:
             return self.cmdclass[command]
 
-        eps = pkg_resources.iter_entry_points('distutils.commands', command)
+        eps = metadata.entry_points(group='distutils.commands', name=command)
         for ep in eps:
-            ep.require(installer=self.fetch_build_egg)
             self.cmdclass[command] = cmdclass = ep.load()
             return cmdclass
         else:
             return _Distribution.get_command_class(self, command)
 
     def print_commands(self):
-        for ep in pkg_resources.iter_entry_points('distutils.commands'):
+        for ep in metadata.entry_points(group='distutils.commands'):
             if ep.name not in self.cmdclass:
-                # don't require extras as the commands won't be invoked
-                cmdclass = ep.resolve()
+                cmdclass = ep.load()
                 self.cmdclass[ep.name] = cmdclass
         return _Distribution.print_commands(self)
 
     def get_command_list(self):
-        for ep in pkg_resources.iter_entry_points('distutils.commands'):
+        for ep in metadata.entry_points(group='distutils.commands'):
             if ep.name not in self.cmdclass:
-                # don't require extras as the commands won't be invoked
-                cmdclass = ep.resolve()
+                cmdclass = ep.load()
                 self.cmdclass[ep.name] = cmdclass
         return _Distribution.get_command_list(self)
 
-    def _set_feature(self, name, status):
-        """Set feature's inclusion status"""
-        setattr(self, self._feature_attrname(name), status)
-
-    def feature_is_included(self, name):
-        """Return 1 if feature is included, 0 if excluded, 'None' if unknown"""
-        return getattr(self, self._feature_attrname(name))
-
-    def include_feature(self, name):
-        """Request inclusion of feature named 'name'"""
-
-        if self.feature_is_included(name) == 0:
-            descr = self.features[name].description
-            raise DistutilsOptionError(
-                descr + " is required, but was excluded or is not available"
-            )
-        self.features[name].include_in(self)
-        self._set_feature(name, 1)
-
     def include(self, **attrs):
         """Add items to distribution that are named in keyword arguments
 
@@ -899,19 +997,18 @@ class Distribution(_Distribution):
         pfx = package + '.'
         if self.packages:
             self.packages = [
-                p for p in self.packages
-                if p != package and not p.startswith(pfx)
+                p for p in self.packages if p != package and not p.startswith(pfx)
             ]
 
         if self.py_modules:
             self.py_modules = [
-                p for p in self.py_modules
-                if p != package and not p.startswith(pfx)
+                p for p in self.py_modules if p != package and not p.startswith(pfx)
             ]
 
         if self.ext_modules:
             self.ext_modules = [
-                p for p in self.ext_modules
+                p
+                for p in self.ext_modules
                 if p.name != package and not p.name.startswith(pfx)
             ]
 
@@ -932,10 +1029,8 @@ class Distribution(_Distribution):
             )
         try:
             old = getattr(self, name)
-        except AttributeError:
-            raise DistutilsSetupError(
-                "%s: No such distribution setting" % name
-            )
+        except AttributeError as e:
+            raise DistutilsSetupError("%s: No such distribution setting" % name) from e
         if old is not None and not isinstance(old, sequence):
             raise DistutilsSetupError(
                 name + ": this setting cannot be changed via include/exclude"
@@ -947,15 +1042,11 @@ class Distribution(_Distribution):
         """Handle 'include()' for list/tuple attrs without a special handler"""
 
         if not isinstance(value, sequence):
-            raise DistutilsSetupError(
-                "%s: setting must be a list (%r)" % (name, value)
-            )
+            raise DistutilsSetupError("%s: setting must be a list (%r)" % (name, value))
         try:
             old = getattr(self, name)
-        except AttributeError:
-            raise DistutilsSetupError(
-                "%s: No such distribution setting" % name
-            )
+        except AttributeError as e:
+            raise DistutilsSetupError("%s: No such distribution setting" % name) from e
         if old is None:
             setattr(self, name, value)
         elif not isinstance(old, sequence):
@@ -1008,6 +1099,7 @@ class Distribution(_Distribution):
             src, alias = aliases[command]
             del aliases[command]  # ensure each alias can expand only once!
             import shlex
+
             args[:1] = shlex.split(alias, True)
             command = args[0]
 
@@ -1088,7 +1180,7 @@ class Distribution(_Distribution):
         """
         import sys
 
-        if six.PY2 or self.help_commands:
+        if self.help_commands:
             return _Distribution.handle_display_options(self, option_order)
 
         # Stdout may be StringIO (e.g. in tests)
@@ -1102,171 +1194,18 @@ class Distribution(_Distribution):
 
         # Print metadata in UTF-8 no matter the platform
         encoding = sys.stdout.encoding
-        errors = sys.stdout.errors
-        newline = sys.platform != 'win32' and '\n' or None
-        line_buffering = sys.stdout.line_buffering
-
-        sys.stdout = io.TextIOWrapper(
-            sys.stdout.detach(), 'utf-8', errors, newline, line_buffering)
+        sys.stdout.reconfigure(encoding='utf-8')
         try:
             return _Distribution.handle_display_options(self, option_order)
         finally:
-            sys.stdout = io.TextIOWrapper(
-                sys.stdout.detach(), encoding, errors, newline, line_buffering)
+            sys.stdout.reconfigure(encoding=encoding)
 
+    def run_command(self, command):
+        self.set_defaults()
+        # Postpone defaults until all explicit configuration is considered
+        # (setup() args, config files, command line and plugins)
 
-class Feature:
-    """
-    **deprecated** -- The `Feature` facility was never completely implemented
-    or supported, `has reported issues
-    `_ and will be removed in
-    a future version.
-
-    A subset of the distribution that can be excluded if unneeded/wanted
-
-    Features are created using these keyword arguments:
-
-      'description' -- a short, human readable description of the feature, to
-         be used in error messages, and option help messages.
-
-      'standard' -- if true, the feature is included by default if it is
-         available on the current system.  Otherwise, the feature is only
-         included if requested via a command line '--with-X' option, or if
-         another included feature requires it.  The default setting is 'False'.
-
-      'available' -- if true, the feature is available for installation on the
-         current system.  The default setting is 'True'.
-
-      'optional' -- if true, the feature's inclusion can be controlled from the
-         command line, using the '--with-X' or '--without-X' options.  If
-         false, the feature's inclusion status is determined automatically,
-         based on 'availabile', 'standard', and whether any other feature
-         requires it.  The default setting is 'True'.
-
-      'require_features' -- a string or sequence of strings naming features
-         that should also be included if this feature is included.  Defaults to
-         empty list.  May also contain 'Require' objects that should be
-         added/removed from the distribution.
-
-      'remove' -- a string or list of strings naming packages to be removed
-         from the distribution if this feature is *not* included.  If the
-         feature *is* included, this argument is ignored.  This argument exists
-         to support removing features that "crosscut" a distribution, such as
-         defining a 'tests' feature that removes all the 'tests' subpackages
-         provided by other features.  The default for this argument is an empty
-         list.  (Note: the named package(s) or modules must exist in the base
-         distribution when the 'setup()' function is initially called.)
-
-      other keywords -- any other keyword arguments are saved, and passed to
-         the distribution's 'include()' and 'exclude()' methods when the
-         feature is included or excluded, respectively.  So, for example, you
-         could pass 'packages=["a","b"]' to cause packages 'a' and 'b' to be
-         added or removed from the distribution as appropriate.
-
-    A feature must include at least one 'requires', 'remove', or other
-    keyword argument.  Otherwise, it can't affect the distribution in any way.
-    Note also that you can subclass 'Feature' to create your own specialized
-    feature types that modify the distribution in other ways when included or
-    excluded.  See the docstrings for the various methods here for more detail.
-    Aside from the methods, the only feature attributes that distributions look
-    at are 'description' and 'optional'.
-    """
-
-    @staticmethod
-    def warn_deprecated():
-        msg = (
-            "Features are deprecated and will be removed in a future "
-            "version. See https://github.com/pypa/setuptools/issues/65."
-        )
-        warnings.warn(msg, DistDeprecationWarning, stacklevel=3)
-
-    def __init__(
-            self, description, standard=False, available=True,
-            optional=True, require_features=(), remove=(), **extras):
-        self.warn_deprecated()
-
-        self.description = description
-        self.standard = standard
-        self.available = available
-        self.optional = optional
-        if isinstance(require_features, (str, Require)):
-            require_features = require_features,
-
-        self.require_features = [
-            r for r in require_features if isinstance(r, str)
-        ]
-        er = [r for r in require_features if not isinstance(r, str)]
-        if er:
-            extras['require_features'] = er
-
-        if isinstance(remove, str):
-            remove = remove,
-        self.remove = remove
-        self.extras = extras
-
-        if not remove and not require_features and not extras:
-            raise DistutilsSetupError(
-                "Feature %s: must define 'require_features', 'remove', or "
-                "at least one of 'packages', 'py_modules', etc."
-            )
-
-    def include_by_default(self):
-        """Should this feature be included by default?"""
-        return self.available and self.standard
-
-    def include_in(self, dist):
-        """Ensure feature and its requirements are included in distribution
-
-        You may override this in a subclass to perform additional operations on
-        the distribution.  Note that this method may be called more than once
-        per feature, and so should be idempotent.
-
-        """
-
-        if not self.available:
-            raise DistutilsPlatformError(
-                self.description + " is required, "
-                "but is not available on this platform"
-            )
-
-        dist.include(**self.extras)
-
-        for f in self.require_features:
-            dist.include_feature(f)
-
-    def exclude_from(self, dist):
-        """Ensure feature is excluded from distribution
-
-        You may override this in a subclass to perform additional operations on
-        the distribution.  This method will be called at most once per
-        feature, and only after all included features have been asked to
-        include themselves.
-        """
-
-        dist.exclude(**self.extras)
-
-        if self.remove:
-            for item in self.remove:
-                dist.exclude_package(item)
-
-    def validate(self, dist):
-        """Verify that feature makes sense in context of distribution
-
-        This method is called by the distribution just before it parses its
-        command line.  It checks to ensure that the 'remove' attribute, if any,
-        contains only valid package/module names that are present in the base
-        distribution when 'setup()' is called.  You may override it in a
-        subclass to perform any other required validation of the feature
-        against a target distribution.
-        """
-
-        for item in self.remove:
-            if not dist.has_contents_for(item):
-                raise DistutilsSetupError(
-                    "%s wants to be able to remove %s, but the distribution"
-                    " doesn't contain any packages or modules under %s"
-                    % (self.description, item, item)
-                )
+        super().run_command(command)
 
 
 class DistDeprecationWarning(SetuptoolsDeprecationWarning):
diff --git a/libs/common/setuptools/errors.py b/libs/common/setuptools/errors.py
index 2701747f..ec7fb3b6 100644
--- a/libs/common/setuptools/errors.py
+++ b/libs/common/setuptools/errors.py
@@ -3,10 +3,33 @@
 Provides exceptions used by setuptools modules.
 """
 
-from distutils.errors import DistutilsError
+from distutils import errors as _distutils_errors
 
 
-class RemovedCommandError(DistutilsError, RuntimeError):
+# Re-export errors from distutils to facilitate the migration to PEP632
+
+ByteCompileError = _distutils_errors.DistutilsByteCompileError
+CCompilerError = _distutils_errors.CCompilerError
+ClassError = _distutils_errors.DistutilsClassError
+CompileError = _distutils_errors.CompileError
+ExecError = _distutils_errors.DistutilsExecError
+FileError = _distutils_errors.DistutilsFileError
+InternalError = _distutils_errors.DistutilsInternalError
+LibError = _distutils_errors.LibError
+LinkError = _distutils_errors.LinkError
+ModuleError = _distutils_errors.DistutilsModuleError
+OptionError = _distutils_errors.DistutilsOptionError
+PlatformError = _distutils_errors.DistutilsPlatformError
+PreprocessError = _distutils_errors.PreprocessError
+SetupError = _distutils_errors.DistutilsSetupError
+TemplateError = _distutils_errors.DistutilsTemplateError
+UnknownFileError = _distutils_errors.UnknownFileError
+
+# The root error class in the hierarchy
+BaseError = _distutils_errors.DistutilsError
+
+
+class RemovedCommandError(BaseError, RuntimeError):
     """Error used for commands that have been removed in setuptools.
 
     Since ``setuptools`` is built on ``distutils``, simply removing a command
@@ -14,3 +37,22 @@ class RemovedCommandError(DistutilsError, RuntimeError):
     error is raised if a command exists in ``distutils`` but has been actively
     removed in ``setuptools``.
     """
+
+
+class PackageDiscoveryError(BaseError, RuntimeError):
+    """Impossible to perform automatic discovery of packages and/or modules.
+
+    The current project layout or given discovery options can lead to problems when
+    scanning the project directory.
+
+    Setuptools might also refuse to complete auto-discovery if an error prone condition
+    is detected (e.g. when a project is organised as a flat-layout but contains
+    multiple directories that can be taken as top-level packages inside a single
+    distribution [*]_). In these situations the users are encouraged to be explicit
+    about which packages to include or to make the discovery parameters more specific.
+
+    .. [*] Since multi-package distributions are uncommon it is very likely that the
+       developers did not intend for all the directories to be packaged, and are just
+       leaving auxiliary code in the repository top-level, such as maintenance-related
+       scripts.
+    """
diff --git a/libs/common/setuptools/extension.py b/libs/common/setuptools/extension.py
index 29468894..58c023f6 100644
--- a/libs/common/setuptools/extension.py
+++ b/libs/common/setuptools/extension.py
@@ -4,8 +4,6 @@ import distutils.core
 import distutils.errors
 import distutils.extension
 
-from setuptools.extern.six.moves import map
-
 from .monkey import get_unpatched
 
 
@@ -30,13 +28,106 @@ _Extension = get_unpatched(distutils.core.Extension)
 
 
 class Extension(_Extension):
-    """Extension that uses '.c' files in place of '.pyx' files"""
+    """
+    Describes a single extension module.
+
+    This means that all source files will be compiled into a single binary file
+    ``.`` (with ```` derived from ``name`` and
+    ```` defined by one of the values in
+    ``importlib.machinery.EXTENSION_SUFFIXES``).
+
+    In the case ``.pyx`` files are passed as ``sources and`` ``Cython`` is **not**
+    installed in the build environment, ``setuptools`` may also try to look for the
+    equivalent ``.cpp`` or ``.c`` files.
+
+    :arg str name:
+      the full name of the extension, including any packages -- ie.
+      *not* a filename or pathname, but Python dotted name
+
+    :arg list[str] sources:
+      list of source filenames, relative to the distribution root
+      (where the setup script lives), in Unix form (slash-separated)
+      for portability.  Source files may be C, C++, SWIG (.i),
+      platform-specific resource files, or whatever else is recognized
+      by the "build_ext" command as source for a Python extension.
+
+    :keyword list[str] include_dirs:
+      list of directories to search for C/C++ header files (in Unix
+      form for portability)
+
+    :keyword list[tuple[str, str|None]] define_macros:
+      list of macros to define; each macro is defined using a 2-tuple:
+      the first item corresponding to the name of the macro and the second
+      item either a string with its value or None to
+      define it without a particular value (equivalent of "#define
+      FOO" in source or -DFOO on Unix C compiler command line)
+
+    :keyword list[str] undef_macros:
+      list of macros to undefine explicitly
+
+    :keyword list[str] library_dirs:
+      list of directories to search for C/C++ libraries at link time
+
+    :keyword list[str] libraries:
+      list of library names (not filenames or paths) to link against
+
+    :keyword list[str] runtime_library_dirs:
+      list of directories to search for C/C++ libraries at run time
+      (for shared extensions, this is when the extension is loaded).
+      Setting this will cause an exception during build on Windows
+      platforms.
+
+    :keyword list[str] extra_objects:
+      list of extra files to link with (eg. object files not implied
+      by 'sources', static library that must be explicitly specified,
+      binary resource files, etc.)
+
+    :keyword list[str] extra_compile_args:
+      any extra platform- and compiler-specific information to use
+      when compiling the source files in 'sources'.  For platforms and
+      compilers where "command line" makes sense, this is typically a
+      list of command-line arguments, but for other platforms it could
+      be anything.
+
+    :keyword list[str] extra_link_args:
+      any extra platform- and compiler-specific information to use
+      when linking object files together to create the extension (or
+      to create a new static Python interpreter).  Similar
+      interpretation as for 'extra_compile_args'.
+
+    :keyword list[str] export_symbols:
+      list of symbols to be exported from a shared extension.  Not
+      used on all platforms, and not generally necessary for Python
+      extensions, which typically export exactly one symbol: "init" +
+      extension_name.
+
+    :keyword list[str] swig_opts:
+      any extra options to pass to SWIG if a source file has the .i
+      extension.
+
+    :keyword list[str] depends:
+      list of files that the extension depends on
+
+    :keyword str language:
+      extension language (i.e. "c", "c++", "objc"). Will be detected
+      from the source extensions if not provided.
+
+    :keyword bool optional:
+      specifies that a build failure in the extension should not abort the
+      build process, but simply not install the failing extension.
+
+    :keyword bool py_limited_api:
+      opt-in flag for the usage of :doc:`Python's limited API `.
+
+    :raises setuptools.errors.PlatformError: if 'runtime_library_dirs' is
+      specified on Windows. (since v63)
+    """
 
     def __init__(self, name, sources, *args, **kw):
         # The *args is needed for compatibility as calls may use positional
         # arguments. py_limited_api may be set only via keyword.
         self.py_limited_api = kw.pop("py_limited_api", False)
-        _Extension.__init__(self, name, sources, *args, **kw)
+        super().__init__(name, sources, *args, **kw)
 
     def _convert_pyx_sources_to_lang(self):
         """
diff --git a/libs/common/setuptools/extern/__init__.py b/libs/common/setuptools/extern/__init__.py
index e8c616f9..d3a6dc99 100644
--- a/libs/common/setuptools/extern/__init__.py
+++ b/libs/common/setuptools/extern/__init__.py
@@ -1,3 +1,4 @@
+import importlib.util
 import sys
 
 
@@ -20,17 +21,10 @@ class VendorImporter:
         yield self.vendor_pkg + '.'
         yield ''
 
-    def find_module(self, fullname, path=None):
-        """
-        Return self when fullname starts with root_name and the
-        target module is one vendored through this importer.
-        """
+    def _module_matches_namespace(self, fullname):
+        """Figure out if the target module is vendored."""
         root, base, target = fullname.partition(self.root_name + '.')
-        if root:
-            return
-        if not any(map(target.startswith, self.vendored_names)):
-            return
-        return self
+        return not root and any(map(target.startswith, self.vendored_names))
 
     def load_module(self, fullname):
         """
@@ -43,13 +37,6 @@ class VendorImporter:
                 __import__(extant)
                 mod = sys.modules[extant]
                 sys.modules[fullname] = mod
-                # mysterious hack:
-                # Remove the reference to the extant package/module
-                # on later Python versions to cause relative imports
-                # in the vendor package to resolve the same modules
-                # as those going through this importer.
-                if sys.version_info >= (3, ):
-                    del sys.modules[extant]
                 return mod
             except ImportError:
                 pass
@@ -61,6 +48,19 @@ class VendorImporter:
                 "distribution.".format(**locals())
             )
 
+    def create_module(self, spec):
+        return self.load_module(spec.name)
+
+    def exec_module(self, module):
+        pass
+
+    def find_spec(self, fullname, path=None, target=None):
+        """Return a module spec for vendored names."""
+        return (
+            importlib.util.spec_from_loader(fullname, self)
+            if self._module_matches_namespace(fullname) else None
+        )
+
     def install(self):
         """
         Install this importer into sys.meta_path if not already present.
@@ -69,5 +69,8 @@ class VendorImporter:
             sys.meta_path.append(self)
 
 
-names = 'six', 'packaging', 'pyparsing', 'ordered_set',
+names = (
+    'packaging', 'pyparsing', 'ordered_set', 'more_itertools', 'importlib_metadata',
+    'zipp', 'importlib_resources', 'jaraco', 'typing_extensions', 'tomli',
+)
 VendorImporter(__name__, names, 'setuptools._vendor').install()
diff --git a/libs/common/setuptools/glob.py b/libs/common/setuptools/glob.py
index 9d7cbc5d..87062b81 100644
--- a/libs/common/setuptools/glob.py
+++ b/libs/common/setuptools/glob.py
@@ -47,6 +47,8 @@ def iglob(pathname, recursive=False):
 
 def _iglob(pathname, recursive):
     dirname, basename = os.path.split(pathname)
+    glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1
+
     if not has_magic(pathname):
         if basename:
             if os.path.lexists(pathname):
@@ -56,13 +58,9 @@ def _iglob(pathname, recursive):
             if os.path.isdir(dirname):
                 yield pathname
         return
+
     if not dirname:
-        if recursive and _isrecursive(basename):
-            for x in glob2(dirname, basename):
-                yield x
-        else:
-            for x in glob1(dirname, basename):
-                yield x
+        yield from glob_in_dir(dirname, basename)
         return
     # `os.path.split()` returns the argument itself as a dirname if it is a
     # drive or UNC path.  Prevent an infinite recursion if a drive or UNC path
@@ -71,12 +69,7 @@ def _iglob(pathname, recursive):
         dirs = _iglob(dirname, recursive)
     else:
         dirs = [dirname]
-    if has_magic(basename):
-        if recursive and _isrecursive(basename):
-            glob_in_dir = glob2
-        else:
-            glob_in_dir = glob1
-    else:
+    if not has_magic(basename):
         glob_in_dir = glob0
     for dirname in dirs:
         for name in glob_in_dir(dirname, basename):
diff --git a/libs/common/setuptools/gui-arm64.exe b/libs/common/setuptools/gui-arm64.exe
new file mode 100644
index 00000000..5730f11d
Binary files /dev/null and b/libs/common/setuptools/gui-arm64.exe differ
diff --git a/libs/common/setuptools/installer.py b/libs/common/setuptools/installer.py
index 9f8be2ef..b7096df1 100644
--- a/libs/common/setuptools/installer.py
+++ b/libs/common/setuptools/installer.py
@@ -2,73 +2,34 @@ import glob
 import os
 import subprocess
 import sys
+import tempfile
+import warnings
 from distutils import log
 from distutils.errors import DistutilsError
 
 import pkg_resources
-from setuptools.command.easy_install import easy_install
-from setuptools.extern import six
 from setuptools.wheel import Wheel
-
-from .py31compat import TemporaryDirectory
+from ._deprecation_warning import SetuptoolsDeprecationWarning
 
 
 def _fixup_find_links(find_links):
     """Ensure find-links option end-up being a list of strings."""
-    if isinstance(find_links, six.string_types):
+    if isinstance(find_links, str):
         return find_links.split()
     assert isinstance(find_links, (tuple, list))
     return find_links
 
 
-def _legacy_fetch_build_egg(dist, req):
-    """Fetch an egg needed for building.
-
-    Legacy path using EasyInstall.
-    """
-    tmp_dist = dist.__class__({'script_args': ['easy_install']})
-    opts = tmp_dist.get_option_dict('easy_install')
-    opts.clear()
-    opts.update(
-        (k, v)
-        for k, v in dist.get_option_dict('easy_install').items()
-        if k in (
-            # don't use any other settings
-            'find_links', 'site_dirs', 'index_url',
-            'optimize', 'site_dirs', 'allow_hosts',
-        ))
-    if dist.dependency_links:
-        links = dist.dependency_links[:]
-        if 'find_links' in opts:
-            links = _fixup_find_links(opts['find_links'][1]) + links
-        opts['find_links'] = ('setup', links)
-    install_dir = dist.get_egg_cache_dir()
-    cmd = easy_install(
-        tmp_dist, args=["x"], install_dir=install_dir,
-        exclude_scripts=True,
-        always_copy=False, build_directory=None, editable=False,
-        upgrade=False, multi_version=True, no_report=True, user=False
-    )
-    cmd.ensure_finalized()
-    return cmd.easy_install(req)
-
-
-def fetch_build_egg(dist, req):
+def fetch_build_egg(dist, req):  # noqa: C901  # is too complex (16)  # FIXME
     """Fetch an egg needed for building.
 
     Use pip/wheel to fetch/build a wheel."""
-    # Check pip is available.
-    try:
-        pkg_resources.get_distribution('pip')
-    except pkg_resources.DistributionNotFound:
-        dist.announce(
-            'WARNING: The pip package is not available, falling back '
-            'to EasyInstall for handling setup_requires/test_requires; '
-            'this is deprecated and will be removed in a future version.'
-            , log.WARN
-        )
-        return _legacy_fetch_build_egg(dist, req)
-    # Warn if wheel is not.
+    warnings.warn(
+        "setuptools.installer is deprecated. Requirements should "
+        "be satisfied by a PEP 517 installer.",
+        SetuptoolsDeprecationWarning,
+    )
+    # Warn if wheel is not available
     try:
         pkg_resources.get_distribution('wheel')
     except pkg_resources.DistributionNotFound:
@@ -82,20 +43,17 @@ def fetch_build_egg(dist, req):
     if 'allow_hosts' in opts:
         raise DistutilsError('the `allow-hosts` option is not supported '
                              'when using pip to install requirements.')
-    if 'PIP_QUIET' in os.environ or 'PIP_VERBOSE' in os.environ:
-        quiet = False
-    else:
-        quiet = True
+    quiet = 'PIP_QUIET' not in os.environ and 'PIP_VERBOSE' not in os.environ
     if 'PIP_INDEX_URL' in os.environ:
         index_url = None
     elif 'index_url' in opts:
         index_url = opts['index_url'][1]
     else:
         index_url = None
-    if 'find_links' in opts:
-        find_links = _fixup_find_links(opts['find_links'][1])[:]
-    else:
-        find_links = []
+    find_links = (
+        _fixup_find_links(opts['find_links'][1])[:] if 'find_links' in opts
+        else []
+    )
     if dist.dependency_links:
         find_links.extend(dist.dependency_links)
     eggs_dir = os.path.realpath(dist.get_egg_cache_dir())
@@ -103,7 +61,7 @@ def fetch_build_egg(dist, req):
     for egg_dist in pkg_resources.find_distributions(eggs_dir):
         if egg_dist in req and environment.can_add(egg_dist):
             return egg_dist
-    with TemporaryDirectory() as tmpdir:
+    with tempfile.TemporaryDirectory() as tmpdir:
         cmd = [
             sys.executable, '-m', 'pip',
             '--disable-pip-version-check',
@@ -114,20 +72,16 @@ def fetch_build_egg(dist, req):
             cmd.append('--quiet')
         if index_url is not None:
             cmd.extend(('--index-url', index_url))
-        if find_links is not None:
-            for link in find_links:
-                cmd.extend(('--find-links', link))
+        for link in find_links or []:
+            cmd.extend(('--find-links', link))
         # If requirement is a PEP 508 direct URL, directly pass
         # the URL to pip, as `req @ url` does not work on the
         # command line.
-        if req.url:
-            cmd.append(req.url)
-        else:
-            cmd.append(str(req))
+        cmd.append(req.url or str(req))
         try:
             subprocess.check_call(cmd)
         except subprocess.CalledProcessError as e:
-            raise DistutilsError(str(e))
+            raise DistutilsError(str(e)) from e
         wheel = Wheel(glob.glob(os.path.join(tmpdir, '*.whl'))[0])
         dist_location = os.path.join(eggs_dir, wheel.egg_name())
         wheel.install_as_egg(dist_location)
diff --git a/libs/common/setuptools/launch.py b/libs/common/setuptools/launch.py
index 308283ea..0208fdf3 100644
--- a/libs/common/setuptools/launch.py
+++ b/libs/common/setuptools/launch.py
@@ -25,7 +25,8 @@ def run():
     sys.argv[:] = sys.argv[1:]
 
     open_ = getattr(tokenize, 'open', open)
-    script = open_(script_name).read()
+    with open_(script_name) as fid:
+        script = fid.read()
     norm_script = script.replace('\\r\\n', '\\n')
     code = compile(norm_script, script_name, 'exec')
     exec(code, namespace)
diff --git a/libs/common/setuptools/lib2to3_ex.py b/libs/common/setuptools/lib2to3_ex.py
deleted file mode 100644
index 4b1a73fe..00000000
--- a/libs/common/setuptools/lib2to3_ex.py
+++ /dev/null
@@ -1,62 +0,0 @@
-"""
-Customized Mixin2to3 support:
-
- - adds support for converting doctests
-
-
-This module raises an ImportError on Python 2.
-"""
-
-from distutils.util import Mixin2to3 as _Mixin2to3
-from distutils import log
-from lib2to3.refactor import RefactoringTool, get_fixers_from_package
-
-import setuptools
-
-
-class DistutilsRefactoringTool(RefactoringTool):
-    def log_error(self, msg, *args, **kw):
-        log.error(msg, *args)
-
-    def log_message(self, msg, *args):
-        log.info(msg, *args)
-
-    def log_debug(self, msg, *args):
-        log.debug(msg, *args)
-
-
-class Mixin2to3(_Mixin2to3):
-    def run_2to3(self, files, doctests=False):
-        # See of the distribution option has been set, otherwise check the
-        # setuptools default.
-        if self.distribution.use_2to3 is not True:
-            return
-        if not files:
-            return
-        log.info("Fixing " + " ".join(files))
-        self.__build_fixer_names()
-        self.__exclude_fixers()
-        if doctests:
-            if setuptools.run_2to3_on_doctests:
-                r = DistutilsRefactoringTool(self.fixer_names)
-                r.refactor(files, write=True, doctests_only=True)
-        else:
-            _Mixin2to3.run_2to3(self, files)
-
-    def __build_fixer_names(self):
-        if self.fixer_names:
-            return
-        self.fixer_names = []
-        for p in setuptools.lib2to3_fixer_packages:
-            self.fixer_names.extend(get_fixers_from_package(p))
-        if self.distribution.use_2to3_fixers is not None:
-            for p in self.distribution.use_2to3_fixers:
-                self.fixer_names.extend(get_fixers_from_package(p))
-
-    def __exclude_fixers(self):
-        excluded_fixers = getattr(self, 'exclude_fixers', [])
-        if self.distribution.use_2to3_exclude_fixers is not None:
-            excluded_fixers.extend(self.distribution.use_2to3_exclude_fixers)
-        for fixer_name in excluded_fixers:
-            if fixer_name in self.fixer_names:
-                self.fixer_names.remove(fixer_name)
diff --git a/libs/common/setuptools/logging.py b/libs/common/setuptools/logging.py
new file mode 100644
index 00000000..0653878f
--- /dev/null
+++ b/libs/common/setuptools/logging.py
@@ -0,0 +1,37 @@
+import sys
+import inspect
+import logging
+import distutils.log
+from . import monkey
+
+
+def _not_warning(record):
+    return record.levelno < logging.WARNING
+
+
+def configure():
+    """
+    Configure logging to emit warning and above to stderr
+    and everything else to stdout. This behavior is provided
+    for compatibility with distutils.log but may change in
+    the future.
+    """
+    err_handler = logging.StreamHandler()
+    err_handler.setLevel(logging.WARNING)
+    out_handler = logging.StreamHandler(sys.stdout)
+    out_handler.addFilter(_not_warning)
+    handlers = err_handler, out_handler
+    logging.basicConfig(
+        format="{message}", style='{', handlers=handlers, level=logging.DEBUG)
+    if inspect.ismodule(distutils.dist.log):
+        monkey.patch_func(set_threshold, distutils.log, 'set_threshold')
+        # For some reason `distutils.log` module is getting cached in `distutils.dist`
+        # and then loaded again when patched,
+        # implying: id(distutils.log) != id(distutils.dist.log).
+        # Make sure the same module object is used everywhere:
+        distutils.dist.log = distutils.log
+
+
+def set_threshold(level):
+    logging.root.setLevel(level*10)
+    return set_threshold.unpatched(level)
diff --git a/libs/common/setuptools/monkey.py b/libs/common/setuptools/monkey.py
index 3c77f8cf..77a7adcf 100644
--- a/libs/common/setuptools/monkey.py
+++ b/libs/common/setuptools/monkey.py
@@ -10,8 +10,6 @@ import functools
 from importlib import import_module
 import inspect
 
-from setuptools.extern import six
-
 import setuptools
 
 __all__ = []
@@ -37,7 +35,7 @@ def _get_mro(cls):
 
 def get_unpatched(item):
     lookup = (
-        get_unpatched_class if isinstance(item, six.class_types) else
+        get_unpatched_class if isinstance(item, type) else
         get_unpatched_function if isinstance(item, types.FunctionType) else
         lambda item: None
     )
@@ -73,8 +71,6 @@ def patch_all():
         distutils.filelist.findall = setuptools.findall
 
     needs_warehouse = (
-        sys.version_info < (2, 7, 13)
-        or
         (3, 4) < sys.version_info < (3, 4, 6)
         or
         (3, 5) < sys.version_info <= (3, 5, 3)
@@ -138,14 +134,14 @@ def patch_for_msvc_specialized_compiler():
     msvc = import_module('setuptools.msvc')
 
     if platform.system() != 'Windows':
-        # Compilers only availables on Microsoft Windows
+        # Compilers only available on Microsoft Windows
         return
 
     def patch_params(mod_name, func_name):
         """
         Prepare the parameters for patch_func to patch indicated function.
         """
-        repl_prefix = 'msvc9_' if 'msvc9' in mod_name else 'msvc14_'
+        repl_prefix = 'msvc14_'
         repl_name = repl_prefix + func_name.lstrip('_')
         repl = getattr(msvc, repl_name)
         mod = import_module(mod_name)
@@ -153,19 +149,9 @@ def patch_for_msvc_specialized_compiler():
             raise ImportError(func_name)
         return repl, mod, func_name
 
-    # Python 2.7 to 3.4
-    msvc9 = functools.partial(patch_params, 'distutils.msvc9compiler')
-
     # Python 3.5+
     msvc14 = functools.partial(patch_params, 'distutils._msvccompiler')
 
-    try:
-        # Patch distutils.msvc9compiler
-        patch_func(*msvc9('find_vcvarsall'))
-        patch_func(*msvc9('query_vcvarsall'))
-    except ImportError:
-        pass
-
     try:
         # Patch distutils._msvccompiler._get_vc_env
         patch_func(*msvc14('_get_vc_env'))
diff --git a/libs/common/setuptools/msvc.py b/libs/common/setuptools/msvc.py
index 2ffe1c81..5d4d7759 100644
--- a/libs/common/setuptools/msvc.py
+++ b/libs/common/setuptools/msvc.py
@@ -3,14 +3,6 @@ Improved support for Microsoft Visual C++ compilers.
 
 Known supported compilers:
 --------------------------
-Microsoft Visual C++ 9.0:
-    Microsoft Visual C++ Compiler for Python 2.7 (x86, amd64)
-    Microsoft Windows SDK 6.1 (x86, x64, ia64)
-    Microsoft Windows SDK 7.0 (x86, x64, ia64)
-
-Microsoft Visual C++ 10.0:
-    Microsoft Windows SDK 7.1 (x86, x64, ia64)
-
 Microsoft Visual C++ 14.X:
     Microsoft Visual C++ Build Tools 2015 (x86, x64, arm)
     Microsoft Visual Studio Build Tools 2017 (x86, x64, arm, arm64)
@@ -24,17 +16,18 @@ from io import open
 from os import listdir, pathsep
 from os.path import join, isfile, isdir, dirname
 import sys
+import contextlib
 import platform
 import itertools
+import subprocess
 import distutils.errors
 from setuptools.extern.packaging.version import LegacyVersion
-
-from setuptools.extern.six.moves import filterfalse
+from setuptools.extern.more_itertools import unique_everseen
 
 from .monkey import get_unpatched
 
 if platform.system() == 'Windows':
-    from setuptools.extern.six.moves import winreg
+    import winreg
     from os import environ
 else:
     # Mock winreg and environ so the module can be imported on this platform.
@@ -47,99 +40,155 @@ else:
 
     environ = dict()
 
-_msvc9_suppress_errors = (
-    # msvc9compiler isn't available on some platforms
-    ImportError,
 
-    # msvc9compiler raises DistutilsPlatformError in some
-    # environments. See #1118.
-    distutils.errors.DistutilsPlatformError,
-)
-
-try:
-    from distutils.msvc9compiler import Reg
-except _msvc9_suppress_errors:
-    pass
-
-
-def msvc9_find_vcvarsall(version):
-    """
-    Patched "distutils.msvc9compiler.find_vcvarsall" to use the standalone
-    compiler build for Python
-    (VCForPython / Microsoft Visual C++ Compiler for Python 2.7).
-
-    Fall back to original behavior when the standalone compiler is not
-    available.
-
-    Redirect the path of "vcvarsall.bat".
-
-    Parameters
-    ----------
-    version: float
-        Required Microsoft Visual C++ version.
-
-    Return
-    ------
-    str
-        vcvarsall.bat path
-    """
-    vc_base = r'Software\%sMicrosoft\DevDiv\VCForPython\%0.1f'
-    key = vc_base % ('', version)
+def _msvc14_find_vc2015():
+    """Python 3.8 "distutils/_msvccompiler.py" backport"""
     try:
-        # Per-user installs register the compiler path here
-        productdir = Reg.get_value(key, "installdir")
-    except KeyError:
+        key = winreg.OpenKey(
+            winreg.HKEY_LOCAL_MACHINE,
+            r"Software\Microsoft\VisualStudio\SxS\VC7",
+            0,
+            winreg.KEY_READ | winreg.KEY_WOW64_32KEY
+        )
+    except OSError:
+        return None, None
+
+    best_version = 0
+    best_dir = None
+    with key:
+        for i in itertools.count():
+            try:
+                v, vc_dir, vt = winreg.EnumValue(key, i)
+            except OSError:
+                break
+            if v and vt == winreg.REG_SZ and isdir(vc_dir):
+                try:
+                    version = int(float(v))
+                except (ValueError, TypeError):
+                    continue
+                if version >= 14 and version > best_version:
+                    best_version, best_dir = version, vc_dir
+    return best_version, best_dir
+
+
+def _msvc14_find_vc2017():
+    """Python 3.8 "distutils/_msvccompiler.py" backport
+
+    Returns "15, path" based on the result of invoking vswhere.exe
+    If no install is found, returns "None, None"
+
+    The version is returned to avoid unnecessarily changing the function
+    result. It may be ignored when the path is not None.
+
+    If vswhere.exe is not available, by definition, VS 2017 is not
+    installed.
+    """
+    root = environ.get("ProgramFiles(x86)") or environ.get("ProgramFiles")
+    if not root:
+        return None, None
+
+    try:
+        path = subprocess.check_output([
+            join(root, "Microsoft Visual Studio", "Installer", "vswhere.exe"),
+            "-latest",
+            "-prerelease",
+            "-requiresAny",
+            "-requires", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
+            "-requires", "Microsoft.VisualStudio.Workload.WDExpress",
+            "-property", "installationPath",
+            "-products", "*",
+        ]).decode(encoding="mbcs", errors="strict").strip()
+    except (subprocess.CalledProcessError, OSError, UnicodeDecodeError):
+        return None, None
+
+    path = join(path, "VC", "Auxiliary", "Build")
+    if isdir(path):
+        return 15, path
+
+    return None, None
+
+
+PLAT_SPEC_TO_RUNTIME = {
+    'x86': 'x86',
+    'x86_amd64': 'x64',
+    'x86_arm': 'arm',
+    'x86_arm64': 'arm64'
+}
+
+
+def _msvc14_find_vcvarsall(plat_spec):
+    """Python 3.8 "distutils/_msvccompiler.py" backport"""
+    _, best_dir = _msvc14_find_vc2017()
+    vcruntime = None
+
+    if plat_spec in PLAT_SPEC_TO_RUNTIME:
+        vcruntime_plat = PLAT_SPEC_TO_RUNTIME[plat_spec]
+    else:
+        vcruntime_plat = 'x64' if 'amd64' in plat_spec else 'x86'
+
+    if best_dir:
+        vcredist = join(best_dir, "..", "..", "redist", "MSVC", "**",
+                        vcruntime_plat, "Microsoft.VC14*.CRT",
+                        "vcruntime140.dll")
         try:
-            # All-user installs on a 64-bit system register here
-            key = vc_base % ('Wow6432Node\\', version)
-            productdir = Reg.get_value(key, "installdir")
-        except KeyError:
-            productdir = None
+            import glob
+            vcruntime = glob.glob(vcredist, recursive=True)[-1]
+        except (ImportError, OSError, LookupError):
+            vcruntime = None
 
-    if productdir:
-        vcvarsall = join(productdir, "vcvarsall.bat")
-        if isfile(vcvarsall):
-            return vcvarsall
+    if not best_dir:
+        best_version, best_dir = _msvc14_find_vc2015()
+        if best_version:
+            vcruntime = join(best_dir, 'redist', vcruntime_plat,
+                             "Microsoft.VC140.CRT", "vcruntime140.dll")
 
-    return get_unpatched(msvc9_find_vcvarsall)(version)
+    if not best_dir:
+        return None, None
+
+    vcvarsall = join(best_dir, "vcvarsall.bat")
+    if not isfile(vcvarsall):
+        return None, None
+
+    if not vcruntime or not isfile(vcruntime):
+        vcruntime = None
+
+    return vcvarsall, vcruntime
 
 
-def msvc9_query_vcvarsall(ver, arch='x86', *args, **kwargs):
-    """
-    Patched "distutils.msvc9compiler.query_vcvarsall" for support extra
-    Microsoft Visual C++ 9.0 and 10.0 compilers.
+def _msvc14_get_vc_env(plat_spec):
+    """Python 3.8 "distutils/_msvccompiler.py" backport"""
+    if "DISTUTILS_USE_SDK" in environ:
+        return {
+            key.lower(): value
+            for key, value in environ.items()
+        }
 
-    Set environment without use of "vcvarsall.bat".
+    vcvarsall, vcruntime = _msvc14_find_vcvarsall(plat_spec)
+    if not vcvarsall:
+        raise distutils.errors.DistutilsPlatformError(
+            "Unable to find vcvarsall.bat"
+        )
 
-    Parameters
-    ----------
-    ver: float
-        Required Microsoft Visual C++ version.
-    arch: str
-        Target architecture.
-
-    Return
-    ------
-    dict
-        environment
-    """
-    # Try to get environment from vcvarsall.bat (Classical way)
     try:
-        orig = get_unpatched(msvc9_query_vcvarsall)
-        return orig(ver, arch, *args, **kwargs)
-    except distutils.errors.DistutilsPlatformError:
-        # Pass error if Vcvarsall.bat is missing
-        pass
-    except ValueError:
-        # Pass error if environment not set after executing vcvarsall.bat
-        pass
+        out = subprocess.check_output(
+            'cmd /u /c "{}" {} && set'.format(vcvarsall, plat_spec),
+            stderr=subprocess.STDOUT,
+        ).decode('utf-16le', errors='replace')
+    except subprocess.CalledProcessError as exc:
+        raise distutils.errors.DistutilsPlatformError(
+            "Error executing {}".format(exc.cmd)
+        ) from exc
 
-    # If error, try to set environment directly
-    try:
-        return EnvironmentInfo(arch, ver).return_env()
-    except distutils.errors.DistutilsPlatformError as exc:
-        _augment_exception(exc, ver, arch)
-        raise
+    env = {
+        key.lower(): value
+        for key, _, value in
+        (line.partition('=') for line in out.splitlines())
+        if key and value
+    }
+
+    if vcruntime:
+        env['py_vcruntime_redist'] = vcruntime
+    return env
 
 
 def msvc14_get_vc_env(plat_spec):
@@ -159,16 +208,10 @@ def msvc14_get_vc_env(plat_spec):
     dict
         environment
     """
-    # Try to get environment from vcvarsall.bat (Classical way)
-    try:
-        return get_unpatched(msvc14_get_vc_env)(plat_spec)
-    except distutils.errors.DistutilsPlatformError:
-        # Pass error Vcvarsall.bat is missing
-        pass
 
-    # If error, try to set environment directly
+    # Always use backport from CPython 3.8
     try:
-        return EnvironmentInfo(plat_spec, vc_min_ver=14.0).return_env()
+        return _msvc14_get_vc_env(plat_spec)
     except distutils.errors.DistutilsPlatformError as exc:
         _augment_exception(exc, 14.0)
         raise
@@ -197,7 +240,7 @@ def _augment_exception(exc, version, arch=''):
 
     if "vcvarsall" in message.lower() or "visual c" in message.lower():
         # Special error message if MSVC++ not installed
-        tmpl = 'Microsoft Visual C++ {version:0.1f} is required.'
+        tmpl = 'Microsoft Visual C++ {version:0.1f} or greater is required.'
         message = tmpl.format(**locals())
         msdownload = 'www.microsoft.com/download/details.aspx?id=%d'
         if version == 9.0:
@@ -217,8 +260,9 @@ def _augment_exception(exc, version, arch=''):
             message += msdownload % 8279
         elif version >= 14.0:
             # For VC++ 14.X Redirect user to latest Visual C++ Build Tools
-            message += (' Get it with "Build Tools for Visual Studio": '
-                        r'https://visualstudio.microsoft.com/downloads/')
+            message += (' Get it with "Microsoft C++ Build Tools": '
+                        r'https://visualstudio.microsoft.com'
+                        r'/visual-cpp-build-tools/')
 
     exc.args = (message, )
 
@@ -500,8 +544,10 @@ class RegistryInfo:
         """
         key_read = winreg.KEY_READ
         openkey = winreg.OpenKey
+        closekey = winreg.CloseKey
         ms = self.microsoft
         for hkey in self.HKEYS:
+            bkey = None
             try:
                 bkey = openkey(hkey, ms(key), 0, key_read)
             except (OSError, IOError):
@@ -516,6 +562,9 @@ class RegistryInfo:
                 return winreg.QueryValueEx(bkey, name)[0]
             except (OSError, IOError):
                 pass
+            finally:
+                if bkey:
+                    closekey(bkey)
 
 
 class SystemInfo:
@@ -544,7 +593,7 @@ class SystemInfo:
 
         # Except for VS15+, VC version is aligned with VS version
         self.vs_ver = self.vc_ver = (
-                vc_ver or self._find_latest_available_vs_ver())
+            vc_ver or self._find_latest_available_vs_ver())
 
     def _find_latest_available_vs_ver(self):
         """
@@ -577,27 +626,23 @@ class SystemInfo:
         ms = self.ri.microsoft
         vckeys = (self.ri.vc, self.ri.vc_for_python, self.ri.vs)
         vs_vers = []
-        for hkey in self.ri.HKEYS:
-            for key in vckeys:
-                try:
-                    bkey = winreg.OpenKey(hkey, ms(key), 0, winreg.KEY_READ)
-                except (OSError, IOError):
-                    continue
+        for hkey, key in itertools.product(self.ri.HKEYS, vckeys):
+            try:
+                bkey = winreg.OpenKey(hkey, ms(key), 0, winreg.KEY_READ)
+            except (OSError, IOError):
+                continue
+            with bkey:
                 subkeys, values, _ = winreg.QueryInfoKey(bkey)
                 for i in range(values):
-                    try:
+                    with contextlib.suppress(ValueError):
                         ver = float(winreg.EnumValue(bkey, i)[0])
                         if ver not in vs_vers:
                             vs_vers.append(ver)
-                    except ValueError:
-                        pass
                 for i in range(subkeys):
-                    try:
+                    with contextlib.suppress(ValueError):
                         ver = float(winreg.EnumKey(bkey, i))
                         if ver not in vs_vers:
                             vs_vers.append(ver)
-                    except ValueError:
-                        pass
         return sorted(vs_vers)
 
     def find_programdata_vs_vers(self):
@@ -777,8 +822,8 @@ class SystemInfo:
         """
         return self._use_last_dir_name(join(self.WindowsSdkDir, 'lib'))
 
-    @property
-    def WindowsSdkDir(self):
+    @property  # noqa: C901
+    def WindowsSdkDir(self):  # noqa: C901  # is too complex (12)  # FIXME
         """
         Microsoft Windows SDK directory.
 
@@ -1225,7 +1270,7 @@ class EnvironmentInfo:
             arch_subdir = self.pi.target_dir(x64=True)
             lib = join(self.si.WindowsSdkDir, 'lib')
             libver = self._sdk_subdir
-            return [join(lib, '%sum%s' % (libver , arch_subdir))]
+            return [join(lib, '%sum%s' % (libver, arch_subdir))]
 
     @property
     def OSIncludes(self):
@@ -1274,13 +1319,16 @@ class EnvironmentInfo:
             libpath += [
                 ref,
                 join(self.si.WindowsSdkDir, 'UnionMetadata'),
-                join(ref, 'Windows.Foundation.UniversalApiContract', '1.0.0.0'),
+                join(
+                    ref, 'Windows.Foundation.UniversalApiContract', '1.0.0.0'),
                 join(ref, 'Windows.Foundation.FoundationContract', '1.0.0.0'),
-                join(ref,'Windows.Networking.Connectivity.WwanContract',
-                     '1.0.0.0'),
-                join(self.si.WindowsSdkDir, 'ExtensionSDKs', 'Microsoft.VCLibs',
-                     '%0.1f' % self.vs_ver, 'References', 'CommonConfiguration',
-                     'neutral'),
+                join(
+                    ref, 'Windows.Networking.Connectivity.WwanContract',
+                    '1.0.0.0'),
+                join(
+                    self.si.WindowsSdkDir, 'ExtensionSDKs', 'Microsoft.VCLibs',
+                    '%0.1f' % self.vs_ver, 'References', 'CommonConfiguration',
+                    'neutral'),
             ]
         return libpath
 
@@ -1651,29 +1699,5 @@ class EnvironmentInfo:
         if not extant_paths:
             msg = "%s environment variable is empty" % name.upper()
             raise distutils.errors.DistutilsPlatformError(msg)
-        unique_paths = self._unique_everseen(extant_paths)
+        unique_paths = unique_everseen(extant_paths)
         return pathsep.join(unique_paths)
-
-    # from Python docs
-    @staticmethod
-    def _unique_everseen(iterable, key=None):
-        """
-        List unique elements, preserving order.
-        Remember all elements ever seen.
-
-        _unique_everseen('AAAABBBCCDAABBB') --> A B C D
-
-        _unique_everseen('ABBCcAD', str.lower) --> A B C D
-        """
-        seen = set()
-        seen_add = seen.add
-        if key is None:
-            for element in filterfalse(seen.__contains__, iterable):
-                seen_add(element)
-                yield element
-        else:
-            for element in iterable:
-                k = key(element)
-                if k not in seen:
-                    seen_add(k)
-                    yield element
diff --git a/libs/common/setuptools/namespaces.py b/libs/common/setuptools/namespaces.py
index dc16106d..44939e1c 100644
--- a/libs/common/setuptools/namespaces.py
+++ b/libs/common/setuptools/namespaces.py
@@ -2,8 +2,6 @@ import os
 from distutils import log
 import itertools
 
-from setuptools.extern.six.moves import map
-
 
 flatten = itertools.chain.from_iterable
 
@@ -47,13 +45,17 @@ class Installer:
         "p = os.path.join(%(root)s, *%(pth)r)",
         "importlib = has_mfs and __import__('importlib.util')",
         "has_mfs and __import__('importlib.machinery')",
-        "m = has_mfs and "
+        (
+            "m = has_mfs and "
             "sys.modules.setdefault(%(pkg)r, "
-                "importlib.util.module_from_spec("
-                    "importlib.machinery.PathFinder.find_spec(%(pkg)r, "
-                        "[os.path.dirname(p)])))",
-        "m = m or "
-            "sys.modules.setdefault(%(pkg)r, types.ModuleType(%(pkg)r))",
+            "importlib.util.module_from_spec("
+            "importlib.machinery.PathFinder.find_spec(%(pkg)r, "
+            "[os.path.dirname(p)])))"
+        ),
+        (
+            "m = m or "
+            "sys.modules.setdefault(%(pkg)r, types.ModuleType(%(pkg)r))"
+        ),
         "mp = (m or []) and m.__dict__.setdefault('__path__',[])",
         "(p not in mp) and mp.append(p)",
     )
@@ -68,8 +70,6 @@ class Installer:
         return "sys._getframe(1).f_locals['sitedir']"
 
     def _gen_nspkg_line(self, pkg):
-        # ensure pkg is not a unicode string under Python 2.7
-        pkg = str(pkg)
         pth = tuple(pkg.split('.'))
         root = self._get_root()
         tmpl_lines = self._nspkg_tmpl
diff --git a/libs/common/setuptools/package_index.py b/libs/common/setuptools/package_index.py
index f419d471..362e26f3 100644
--- a/libs/common/setuptools/package_index.py
+++ b/libs/common/setuptools/package_index.py
@@ -1,33 +1,46 @@
-"""PyPI and direct package downloading"""
+"""PyPI and direct package downloading."""
+
 import sys
 import os
 import re
+import io
 import shutil
 import socket
 import base64
 import hashlib
 import itertools
 import warnings
+import configparser
+import html
+import http.client
+import urllib.parse
+import urllib.request
+import urllib.error
 from functools import wraps
 
-from setuptools.extern import six
-from setuptools.extern.six.moves import urllib, http_client, configparser, map
-
 import setuptools
 from pkg_resources import (
-    CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
-    Environment, find_distributions, safe_name, safe_version,
-    to_filename, Requirement, DEVELOP_DIST, EGG_DIST,
+    CHECKOUT_DIST,
+    Distribution,
+    BINARY_DIST,
+    normalize_path,
+    SOURCE_DIST,
+    Environment,
+    find_distributions,
+    safe_name,
+    safe_version,
+    to_filename,
+    Requirement,
+    DEVELOP_DIST,
+    EGG_DIST,
+    parse_version,
 )
-from setuptools import ssl_support
 from distutils import log
 from distutils.errors import DistutilsError
 from fnmatch import translate
-from setuptools.py27compat import get_all_headers
-from setuptools.py33compat import unescape
 from setuptools.wheel import Wheel
+from setuptools.extern.more_itertools import unique_everseen
 
-__metaclass__ = type
 
 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
 HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
@@ -39,23 +52,27 @@ URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
 
 __all__ = [
-    'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
+    'PackageIndex',
+    'distros_for_url',
+    'parse_bdist_wininst',
     'interpret_distro_name',
 ]
 
 _SOCKET_TIMEOUT = 15
 
 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
-user_agent = _tmpl.format(py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)
+user_agent = _tmpl.format(
+    py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools
+)
 
 
 def parse_requirement_arg(spec):
     try:
         return Requirement.parse(spec)
-    except ValueError:
+    except ValueError as e:
         raise DistutilsError(
             "Not a URL, existing file, or requirement spec: %r" % (spec,)
-        )
+        ) from e
 
 
 def parse_bdist_wininst(name):
@@ -118,13 +135,15 @@ def distros_for_location(location, basename, metadata=None):
         wheel = Wheel(basename)
         if not wheel.is_compatible():
             return []
-        return [Distribution(
-            location=location,
-            project_name=wheel.project_name,
-            version=wheel.version,
-            # Increase priority over eggs.
-            precedence=EGG_DIST + 1,
-        )]
+        return [
+            Distribution(
+                location=location,
+                project_name=wheel.project_name,
+                version=wheel.version,
+                # Increase priority over eggs.
+                precedence=EGG_DIST + 1,
+            )
+        ]
     if basename.endswith('.exe'):
         win_base, py_ver, platform = parse_bdist_wininst(basename)
         if win_base is not None:
@@ -135,7 +154,7 @@ def distros_for_location(location, basename, metadata=None):
     #
     for ext in EXTENSIONS:
         if basename.endswith(ext):
-            basename = basename[:-len(ext)]
+            basename = basename[: -len(ext)]
             return interpret_distro_name(location, basename, metadata)
     return []  # no extension matched
 
@@ -148,8 +167,7 @@ def distros_for_filename(filename, metadata=None):
 
 
 def interpret_distro_name(
-        location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
-        platform=None
+    location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None
 ):
     """Generate alternative interpretations of a source distro name
 
@@ -160,7 +178,7 @@ def interpret_distro_name(
     # Generate alternative interpretations of a source distro name
     # Because some packages are ambiguous as to name/versions split
     # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
-    # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
+    # So, we generate each possible interpretation (e.g. "adns, python-1.1.0"
     # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
     # the spurious interpretations should be ignored, because in the event
     # there's also an "adns" package, the spurious "python-1.1.0" version will
@@ -176,31 +194,16 @@ def interpret_distro_name(
 
     for p in range(1, len(parts) + 1):
         yield Distribution(
-            location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
-            py_version=py_version, precedence=precedence,
-            platform=platform
+            location,
+            metadata,
+            '-'.join(parts[:p]),
+            '-'.join(parts[p:]),
+            py_version=py_version,
+            precedence=precedence,
+            platform=platform,
         )
 
 
-# From Python 2.7 docs
-def unique_everseen(iterable, key=None):
-    "List unique elements, preserving order. Remember all elements ever seen."
-    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
-    # unique_everseen('ABBCcAD', str.lower) --> A B C D
-    seen = set()
-    seen_add = seen.add
-    if key is None:
-        for element in six.moves.filterfalse(seen.__contains__, iterable):
-            seen_add(element)
-            yield element
-    else:
-        for element in iterable:
-            k = key(element)
-            if k not in seen:
-                seen_add(k)
-                yield element
-
-
 def unique_values(func):
     """
     Wrap a function returning an iterable such that the resulting iterable
@@ -214,8 +217,10 @@ def unique_values(func):
     return wrapper
 
 
-REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
-# this line is here to fix emacs' cruddy broken syntax highlighting
+REL = re.compile(r"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re.I)
+"""
+Regex for an HTML tag with 'rel="val"' attributes.
+"""
 
 
 @unique_values
@@ -299,27 +304,33 @@ class PackageIndex(Environment):
     """A distribution index that scans web pages for download URLs"""
 
     def __init__(
-            self, index_url="https://pypi.org/simple/", hosts=('*',),
-            ca_bundle=None, verify_ssl=True, *args, **kw
+        self,
+        index_url="https://pypi.org/simple/",
+        hosts=('*',),
+        ca_bundle=None,
+        verify_ssl=True,
+        *args,
+        **kw
     ):
-        Environment.__init__(self, *args, **kw)
-        self.index_url = index_url + "/" [:not index_url.endswith('/')]
+        super().__init__(*args, **kw)
+        self.index_url = index_url + "/"[: not index_url.endswith('/')]
         self.scanned_urls = {}
         self.fetched_urls = {}
         self.package_pages = {}
         self.allows = re.compile('|'.join(map(translate, hosts))).match
         self.to_scan = []
-        use_ssl = (
-            verify_ssl
-            and ssl_support.is_available
-            and (ca_bundle or ssl_support.find_ca_bundle())
-        )
-        if use_ssl:
-            self.opener = ssl_support.opener_for(ca_bundle)
-        else:
-            self.opener = urllib.request.urlopen
+        self.opener = urllib.request.urlopen
 
-    def process_url(self, url, retrieve=False):
+    def add(self, dist):
+        # ignore invalid versions
+        try:
+            parse_version(dist.version)
+        except Exception:
+            return
+        return super().add(dist)
+
+    # FIXME: 'PackageIndex.process_url' is too complex (14)
+    def process_url(self, url, retrieve=False):  # noqa: C901
         """Evaluate a URL as a possible download, and maybe retrieve it"""
         if url in self.scanned_urls and not retrieve:
             return
@@ -348,6 +359,8 @@ class PackageIndex(Environment):
         f = self.open_url(url, tmpl % url)
         if f is None:
             return
+        if isinstance(f, urllib.error.HTTPError) and f.code == 401:
+            self.info("Authentication error: %s" % f.msg)
         self.fetched_urls[f.url] = True
         if 'html' not in f.headers.get('content-type', '').lower():
             f.close()  # not html, we can't process it
@@ -393,7 +406,8 @@ class PackageIndex(Environment):
             return True
         msg = (
             "\nNote: Bypassing %s (disallowed host; see "
-            "http://bit.ly/2hrImnY for details).\n")
+            "http://bit.ly/2hrImnY for details).\n"
+        )
         if fatal:
             raise DistutilsError(msg % url)
         else:
@@ -425,62 +439,63 @@ class PackageIndex(Environment):
             dist.precedence = SOURCE_DIST
             self.add(dist)
 
+    def _scan(self, link):
+        # Process a URL to see if it's for a package page
+        NO_MATCH_SENTINEL = None, None
+        if not link.startswith(self.index_url):
+            return NO_MATCH_SENTINEL
+
+        parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/')))
+        if len(parts) != 2 or '#' in parts[1]:
+            return NO_MATCH_SENTINEL
+
+        # it's a package page, sanitize and index it
+        pkg = safe_name(parts[0])
+        ver = safe_version(parts[1])
+        self.package_pages.setdefault(pkg.lower(), {})[link] = True
+        return to_filename(pkg), to_filename(ver)
+
     def process_index(self, url, page):
         """Process the contents of a PyPI page"""
 
-        def scan(link):
-            # Process a URL to see if it's for a package page
-            if link.startswith(self.index_url):
-                parts = list(map(
-                    urllib.parse.unquote, link[len(self.index_url):].split('/')
-                ))
-                if len(parts) == 2 and '#' not in parts[1]:
-                    # it's a package page, sanitize and index it
-                    pkg = safe_name(parts[0])
-                    ver = safe_version(parts[1])
-                    self.package_pages.setdefault(pkg.lower(), {})[link] = True
-                    return to_filename(pkg), to_filename(ver)
-            return None, None
-
         # process an index page into the package-page index
         for match in HREF.finditer(page):
             try:
-                scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
+                self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
             except ValueError:
                 pass
 
-        pkg, ver = scan(url)  # ensure this page is in the page index
-        if pkg:
-            # process individual package page
-            for new_url in find_external_links(url, page):
-                # Process the found URL
-                base, frag = egg_info_for_url(new_url)
-                if base.endswith('.py') and not frag:
-                    if ver:
-                        new_url += '#egg=%s-%s' % (pkg, ver)
-                    else:
-                        self.need_version_info(url)
-                self.scan_url(new_url)
-
-            return PYPI_MD5.sub(
-                lambda m: '%s' % m.group(1, 3, 2), page
-            )
-        else:
+        pkg, ver = self._scan(url)  # ensure this page is in the page index
+        if not pkg:
             return ""  # no sense double-scanning non-package pages
 
+        # process individual package page
+        for new_url in find_external_links(url, page):
+            # Process the found URL
+            base, frag = egg_info_for_url(new_url)
+            if base.endswith('.py') and not frag:
+                if ver:
+                    new_url += '#egg=%s-%s' % (pkg, ver)
+                else:
+                    self.need_version_info(url)
+            self.scan_url(new_url)
+
+        return PYPI_MD5.sub(
+            lambda m: '%s' % m.group(1, 3, 2), page
+        )
+
     def need_version_info(self, url):
         self.scan_all(
             "Page at %s links to .py file(s) without version info; an index "
-            "scan is required.", url
+            "scan is required.",
+            url,
         )
 
     def scan_all(self, msg=None, *args):
         if self.index_url not in self.fetched_urls:
             if msg:
                 self.warn(msg, *args)
-            self.info(
-                "Scanning index of all packages (this may take a while)"
-            )
+            self.info("Scanning index of all packages (this may take a while)")
         self.scan_url(self.index_url)
 
     def find_packages(self, requirement):
@@ -511,9 +526,7 @@ class PackageIndex(Environment):
         """
         checker is a ContentChecker
         """
-        checker.report(
-            self.debug,
-            "Validating %%s checksum for %s" % filename)
+        checker.report(self.debug, "Validating %%s checksum for %s" % filename)
         if not checker.is_valid():
             tfp.close()
             os.unlink(filename)
@@ -550,7 +563,8 @@ class PackageIndex(Environment):
         else:  # no distros seen for this name, might be misspelled
             meth, msg = (
                 self.warn,
-                "Couldn't find index page for %r (maybe misspelled?)")
+                "Couldn't find index page for %r (maybe misspelled?)",
+            )
         meth(msg, requirement.unsafe_name)
         self.scan_all()
 
@@ -588,9 +602,15 @@ class PackageIndex(Environment):
                 spec = parse_requirement_arg(spec)
         return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
 
-    def fetch_distribution(
-            self, requirement, tmpdir, force_scan=False, source=False,
-            develop_ok=False, local_index=None):
+    def fetch_distribution(  # noqa: C901  # is too complex (14)  # FIXME
+        self,
+        requirement,
+        tmpdir,
+        force_scan=False,
+        source=False,
+        develop_ok=False,
+        local_index=None,
+    ):
         """Obtain a distribution suitable for fulfilling `requirement`
 
         `requirement` must be a ``pkg_resources.Requirement`` instance.
@@ -622,15 +642,13 @@ class PackageIndex(Environment):
                 if dist.precedence == DEVELOP_DIST and not develop_ok:
                     if dist not in skipped:
                         self.warn(
-                            "Skipping development or system egg: %s", dist,
+                            "Skipping development or system egg: %s",
+                            dist,
                         )
                         skipped[dist] = 1
                     continue
 
-                test = (
-                    dist in req
-                    and (dist.precedence <= SOURCE_DIST or not source)
-                )
+                test = dist in req and (dist.precedence <= SOURCE_DIST or not source)
                 if test:
                     loc = self.download(dist.location, tmpdir)
                     dist.download_location = loc
@@ -679,10 +697,15 @@ class PackageIndex(Environment):
 
     def gen_setup(self, filename, fragment, tmpdir):
         match = EGG_FRAGMENT.match(fragment)
-        dists = match and [
-            d for d in
-            interpret_distro_name(filename, match.group(1), None) if d.version
-        ] or []
+        dists = (
+            match
+            and [
+                d
+                for d in interpret_distro_name(filename, match.group(1), None)
+                if d.version
+            ]
+            or []
+        )
 
         if len(dists) == 1:  # unambiguous ``#egg`` fragment
             basename = os.path.basename(filename)
@@ -690,8 +713,7 @@ class PackageIndex(Environment):
             # Make sure the file has been downloaded to the temp dir.
             if os.path.dirname(filename) != tmpdir:
                 dst = os.path.join(tmpdir, basename)
-                from setuptools.command.easy_install import samefile
-                if not samefile(filename, dst):
+                if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
                     shutil.copy2(filename, dst)
                     filename = dst
 
@@ -700,8 +722,9 @@ class PackageIndex(Environment):
                     "from setuptools import setup\n"
                     "setup(name=%r, version=%r, py_modules=[%r])\n"
                     % (
-                        dists[0].project_name, dists[0].version,
-                        os.path.splitext(basename)[0]
+                        dists[0].project_name,
+                        dists[0].version,
+                        os.path.splitext(basename)[0],
                     )
                 )
             return filename
@@ -737,7 +760,7 @@ class PackageIndex(Environment):
             size = -1
             if "content-length" in headers:
                 # Some servers return multiple Content-Length headers :(
-                sizes = get_all_headers(headers, 'Content-Length')
+                sizes = headers.get_all('Content-Length')
                 size = max(map(int, sizes))
                 self.reporthook(url, filename, blocknum, bs, size)
             with open(filename, 'wb') as tfp:
@@ -759,40 +782,40 @@ class PackageIndex(Environment):
     def reporthook(self, url, filename, blocknum, blksize, size):
         pass  # no-op
 
-    def open_url(self, url, warning=None):
+    # FIXME:
+    def open_url(self, url, warning=None):  # noqa: C901  # is too complex (12)
         if url.startswith('file:'):
             return local_open(url)
         try:
             return open_with_auth(url, self.opener)
-        except (ValueError, http_client.InvalidURL) as v:
+        except (ValueError, http.client.InvalidURL) as v:
             msg = ' '.join([str(arg) for arg in v.args])
             if warning:
                 self.warn(warning, msg)
             else:
-                raise DistutilsError('%s %s' % (url, msg))
+                raise DistutilsError('%s %s' % (url, msg)) from v
         except urllib.error.HTTPError as v:
             return v
         except urllib.error.URLError as v:
             if warning:
                 self.warn(warning, v.reason)
             else:
-                raise DistutilsError("Download error for %s: %s"
-                                     % (url, v.reason))
-        except http_client.BadStatusLine as v:
+                raise DistutilsError(
+                    "Download error for %s: %s" % (url, v.reason)
+                ) from v
+        except http.client.BadStatusLine as v:
             if warning:
                 self.warn(warning, v.line)
             else:
                 raise DistutilsError(
                     '%s returned a bad status line. The server might be '
-                    'down, %s' %
-                    (url, v.line)
-                )
-        except (http_client.HTTPException, socket.error) as v:
+                    'down, %s' % (url, v.line)
+                ) from v
+        except (http.client.HTTPException, socket.error) as v:
             if warning:
                 self.warn(warning, v)
             else:
-                raise DistutilsError("Download error for %s: %s"
-                                     % (url, v))
+                raise DistutilsError("Download error for %s: %s" % (url, v)) from v
 
     def _download_url(self, scheme, url, tmpdir):
         # Determine download filename
@@ -897,10 +920,13 @@ class PackageIndex(Environment):
 
         if rev is not None:
             self.info("Checking out %s", rev)
-            os.system("git -C %s checkout --quiet %s" % (
-                filename,
-                rev,
-            ))
+            os.system(
+                "git -C %s checkout --quiet %s"
+                % (
+                    filename,
+                    rev,
+                )
+            )
 
         return filename
 
@@ -913,10 +939,13 @@ class PackageIndex(Environment):
 
         if rev is not None:
             self.info("Updating to %s", rev)
-            os.system("hg --cwd %s up -C -r %s -q" % (
-                filename,
-                rev,
-            ))
+            os.system(
+                "hg --cwd %s up -C -r %s -q"
+                % (
+                    filename,
+                    rev,
+                )
+            )
 
         return filename
 
@@ -937,7 +966,7 @@ entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
 
 def decode_entity(match):
     what = match.group(0)
-    return unescape(what)
+    return html.unescape(what)
 
 
 def htmldecode(text):
@@ -969,8 +998,7 @@ def socket_timeout(timeout=15):
 
 def _encode_auth(auth):
     """
-    A function compatible with Python 2.3-3.3 that will encode
-    auth from a URL suitable for an HTTP header.
+    Encode auth from a URL suitable for an HTTP header.
     >>> str(_encode_auth('username%3Apassword'))
     'dXNlcm5hbWU6cGFzc3dvcmQ='
 
@@ -1012,7 +1040,7 @@ class PyPIConfig(configparser.RawConfigParser):
         Load from ~/.pypirc
         """
         defaults = dict.fromkeys(['username', 'password', 'repository'], '')
-        configparser.RawConfigParser.__init__(self, defaults)
+        super().__init__(defaults)
 
         rc = os.path.join(os.path.expanduser('~'), '.pypirc')
         if os.path.exists(rc):
@@ -1021,7 +1049,8 @@ class PyPIConfig(configparser.RawConfigParser):
     @property
     def creds_by_repository(self):
         sections_with_repositories = [
-            section for section in self.sections()
+            section
+            for section in self.sections()
             if self.get(section, 'repository').strip()
         ]
 
@@ -1050,10 +1079,10 @@ def open_with_auth(url, opener=urllib.request.urlopen):
     parsed = urllib.parse.urlparse(url)
     scheme, netloc, path, params, query, frag = parsed
 
-    # Double scheme does not raise on Mac OS X as revealed by a
+    # Double scheme does not raise on macOS as revealed by a
     # failing test. We would expect "nonnumeric port". Refs #20.
     if netloc.endswith(':'):
-        raise http_client.InvalidURL("nonnumeric port: ''")
+        raise http.client.InvalidURL("nonnumeric port: ''")
 
     if scheme in ('http', 'https'):
         auth, address = _splituser(netloc)
@@ -1092,7 +1121,8 @@ def open_with_auth(url, opener=urllib.request.urlopen):
 
 # copy of urllib.parse._splituser from Python 3.8
 def _splituser(host):
-    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+    """splituser('user[:passwd]@host[:port]')
+    --> 'user[:passwd]', 'host[:port]'."""
     user, delim, host = host.rpartition('@')
     return (user if delim else None), host
 
@@ -1124,13 +1154,13 @@ def local_open(url):
             files.append('{name}'.format(name=f))
         else:
             tmpl = (
-                "{url}"
-                "{files}")
+                "{url}" "{files}"
+            )
             body = tmpl.format(url=url, files='\n'.join(files))
         status, message = 200, "OK"
     else:
         status, message, body = 404, "Path not found", "Not found"
 
     headers = {'content-type': 'text/html'}
-    body_stream = six.StringIO(body)
+    body_stream = io.StringIO(body)
     return urllib.error.HTTPError(url, status, message, headers, body_stream)
diff --git a/libs/common/setuptools/py27compat.py b/libs/common/setuptools/py27compat.py
deleted file mode 100644
index 1d57360f..00000000
--- a/libs/common/setuptools/py27compat.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""
-Compatibility Support for Python 2.7 and earlier
-"""
-
-import sys
-import platform
-
-from setuptools.extern import six
-
-
-def get_all_headers(message, key):
-    """
-    Given an HTTPMessage, return all headers matching a given key.
-    """
-    return message.get_all(key)
-
-
-if six.PY2:
-    def get_all_headers(message, key):
-        return message.getheaders(key)
-
-
-linux_py2_ascii = (
-    platform.system() == 'Linux' and
-    six.PY2
-)
-
-rmtree_safe = str if linux_py2_ascii else lambda x: x
-"""Workaround for http://bugs.python.org/issue24672"""
-
-
-try:
-    from ._imp import find_module, PY_COMPILED, PY_FROZEN, PY_SOURCE
-    from ._imp import get_frozen_object, get_module
-except ImportError:
-    import imp
-    from imp import PY_COMPILED, PY_FROZEN, PY_SOURCE  # noqa
-
-    def find_module(module, paths=None):
-        """Just like 'imp.find_module()', but with package support"""
-        parts = module.split('.')
-        while parts:
-            part = parts.pop(0)
-            f, path, (suffix, mode, kind) = info = imp.find_module(part, paths)
-
-            if kind == imp.PKG_DIRECTORY:
-                parts = parts or ['__init__']
-                paths = [path]
-
-            elif parts:
-                raise ImportError("Can't find %r in %s" % (parts, module))
-
-        return info
-
-    def get_frozen_object(module, paths):
-        return imp.get_frozen_object(module)
-
-    def get_module(module, paths, info):
-        imp.load_module(module, *info)
-        return sys.modules[module]
diff --git a/libs/common/setuptools/py31compat.py b/libs/common/setuptools/py31compat.py
deleted file mode 100644
index e1da7ee2..00000000
--- a/libs/common/setuptools/py31compat.py
+++ /dev/null
@@ -1,32 +0,0 @@
-__all__ = []
-
-__metaclass__ = type
-
-
-try:
-    # Python >=3.2
-    from tempfile import TemporaryDirectory
-except ImportError:
-    import shutil
-    import tempfile
-
-    class TemporaryDirectory:
-        """
-        Very simple temporary directory context manager.
-        Will try to delete afterward, but will also ignore OS and similar
-        errors on deletion.
-        """
-
-        def __init__(self, **kwargs):
-            self.name = None  # Handle mkdtemp raising an exception
-            self.name = tempfile.mkdtemp(**kwargs)
-
-        def __enter__(self):
-            return self.name
-
-        def __exit__(self, exctype, excvalue, exctrace):
-            try:
-                shutil.rmtree(self.name, True)
-            except OSError:  # removal errors are not the only possible
-                pass
-            self.name = None
diff --git a/libs/common/setuptools/py33compat.py b/libs/common/setuptools/py33compat.py
deleted file mode 100644
index cb694436..00000000
--- a/libs/common/setuptools/py33compat.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import dis
-import array
-import collections
-
-try:
-    import html
-except ImportError:
-    html = None
-
-from setuptools.extern import six
-from setuptools.extern.six.moves import html_parser
-
-__metaclass__ = type
-
-OpArg = collections.namedtuple('OpArg', 'opcode arg')
-
-
-class Bytecode_compat:
-    def __init__(self, code):
-        self.code = code
-
-    def __iter__(self):
-        """Yield '(op,arg)' pair for each operation in code object 'code'"""
-
-        bytes = array.array('b', self.code.co_code)
-        eof = len(self.code.co_code)
-
-        ptr = 0
-        extended_arg = 0
-
-        while ptr < eof:
-
-            op = bytes[ptr]
-
-            if op >= dis.HAVE_ARGUMENT:
-
-                arg = bytes[ptr + 1] + bytes[ptr + 2] * 256 + extended_arg
-                ptr += 3
-
-                if op == dis.EXTENDED_ARG:
-                    long_type = six.integer_types[-1]
-                    extended_arg = arg * long_type(65536)
-                    continue
-
-            else:
-                arg = None
-                ptr += 1
-
-            yield OpArg(op, arg)
-
-
-Bytecode = getattr(dis, 'Bytecode', Bytecode_compat)
-
-
-unescape = getattr(html, 'unescape', None)
-if unescape is None:
-    # HTMLParser.unescape is deprecated since Python 3.4, and will be removed
-    # from 3.9.
-    unescape = html_parser.HTMLParser().unescape
diff --git a/libs/common/setuptools/sandbox.py b/libs/common/setuptools/sandbox.py
index 685f3f72..034fc80d 100644
--- a/libs/common/setuptools/sandbox.py
+++ b/libs/common/setuptools/sandbox.py
@@ -8,11 +8,11 @@ import re
 import contextlib
 import pickle
 import textwrap
+import builtins
 
-from setuptools.extern import six
-from setuptools.extern.six.moves import builtins, map
-
-import pkg_resources.py31compat
+import pkg_resources
+from distutils.errors import DistutilsError
+from pkg_resources import working_set
 
 if sys.platform.startswith('java'):
     import org.python.modules.posix.PosixModule as _os
@@ -23,12 +23,13 @@ try:
 except NameError:
     _file = None
 _open = open
-from distutils.errors import DistutilsError
-from pkg_resources import working_set
 
 
 __all__ = [
-    "AbstractSandbox", "DirectorySandbox", "SandboxViolation", "run_setup",
+    "AbstractSandbox",
+    "DirectorySandbox",
+    "SandboxViolation",
+    "run_setup",
 ]
 
 
@@ -70,7 +71,7 @@ def override_temp(replacement):
     """
     Monkey-patch tempfile.tempdir with replacement, ensuring it exists
     """
-    pkg_resources.py31compat.makedirs(replacement, exist_ok=True)
+    os.makedirs(replacement, exist_ok=True)
 
     saved = tempfile.tempdir
 
@@ -108,6 +109,7 @@ class UnpickleableException(Exception):
         except Exception:
             # get UnpickleableException inside the sandbox
             from setuptools.sandbox import UnpickleableException as cls
+
             return cls.dump(cls, cls(repr(exc)))
 
 
@@ -138,7 +140,7 @@ class ExceptionSaver:
             return
 
         type, exc = map(pickle.loads, self._saved)
-        six.reraise(type, exc, self._tb)
+        raise exc.with_traceback(self._tb)
 
 
 @contextlib.contextmanager
@@ -156,7 +158,8 @@ def save_modules():
     sys.modules.update(saved)
     # remove any modules imported since
     del_modules = (
-        mod_name for mod_name in sys.modules
+        mod_name
+        for mod_name in sys.modules
         if mod_name not in saved
         # exclude any encodings modules. See #285
         and not mod_name.startswith('encodings.')
@@ -185,8 +188,8 @@ def setup_context(setup_dir):
     temp_dir = os.path.join(setup_dir, 'temp')
     with save_pkg_resources_state():
         with save_modules():
-            hide_setuptools()
             with save_path():
+                hide_setuptools()
                 with save_argv():
                     with override_temp(temp_dir):
                         with pushd(setup_dir):
@@ -195,6 +198,15 @@ def setup_context(setup_dir):
                             yield
 
 
+_MODULES_TO_HIDE = {
+    'setuptools',
+    'distutils',
+    'pkg_resources',
+    'Cython',
+    '_distutils_hack',
+}
+
+
 def _needs_hiding(mod_name):
     """
     >>> _needs_hiding('setuptools')
@@ -212,8 +224,8 @@ def _needs_hiding(mod_name):
     >>> _needs_hiding('Cython')
     True
     """
-    pattern = re.compile(r'(setuptools|pkg_resources|distutils|Cython)(\.|$)')
-    return bool(pattern.match(mod_name))
+    base_module = mod_name.split('.', 1)[0]
+    return base_module in _MODULES_TO_HIDE
 
 
 def hide_setuptools():
@@ -223,6 +235,10 @@ def hide_setuptools():
     necessary to avoid issues such as #315 where setuptools upgrading itself
     would fail to find a function declared in the metadata.
     """
+    _distutils_hack = sys.modules.get('_distutils_hack', None)
+    if _distutils_hack is not None:
+        _distutils_hack.remove_shim()
+
     modules = filter(_needs_hiding, sys.modules)
     _clear_modules(modules)
 
@@ -238,15 +254,8 @@ def run_setup(setup_script, args):
             working_set.__init__()
             working_set.callbacks.append(lambda dist: dist.activate())
 
-            # __file__ should be a byte string on Python 2 (#712)
-            dunder_file = (
-                setup_script
-                if isinstance(setup_script, str) else
-                setup_script.encode(sys.getfilesystemencoding())
-            )
-
             with DirectorySandbox(setup_dir):
-                ns = dict(__file__=dunder_file, __name__='__main__')
+                ns = dict(__file__=setup_script, __name__='__main__')
                 _execfile(setup_script, ns)
         except SystemExit as v:
             if v.args and v.args[0]:
@@ -261,7 +270,8 @@ class AbstractSandbox:
 
     def __init__(self):
         self._attrs = [
-            name for name in dir(_os)
+            name
+            for name in dir(_os)
             if not name.startswith('_') and hasattr(self, name)
         ]
 
@@ -316,9 +326,25 @@ class AbstractSandbox:
         _file = _mk_single_path_wrapper('file', _file)
     _open = _mk_single_path_wrapper('open', _open)
     for name in [
-        "stat", "listdir", "chdir", "open", "chmod", "chown", "mkdir",
-        "remove", "unlink", "rmdir", "utime", "lchown", "chroot", "lstat",
-        "startfile", "mkfifo", "mknod", "pathconf", "access"
+        "stat",
+        "listdir",
+        "chdir",
+        "open",
+        "chmod",
+        "chown",
+        "mkdir",
+        "remove",
+        "unlink",
+        "rmdir",
+        "utime",
+        "lchown",
+        "chroot",
+        "lstat",
+        "startfile",
+        "mkfifo",
+        "mknod",
+        "pathconf",
+        "access",
     ]:
         if hasattr(_os, name):
             locals()[name] = _mk_single_path_wrapper(name)
@@ -369,12 +395,12 @@ class AbstractSandbox:
         """Called for path pairs like rename, link, and symlink operations"""
         return (
             self._remap_input(operation + '-from', src, *args, **kw),
-            self._remap_input(operation + '-to', dst, *args, **kw)
+            self._remap_input(operation + '-to', dst, *args, **kw),
         )
 
 
 if hasattr(os, 'devnull'):
-    _EXCEPTIONS = [os.devnull,]
+    _EXCEPTIONS = [os.devnull]
 else:
     _EXCEPTIONS = []
 
@@ -382,28 +408,38 @@ else:
 class DirectorySandbox(AbstractSandbox):
     """Restrict operations to a single subdirectory - pseudo-chroot"""
 
-    write_ops = dict.fromkeys([
-        "open", "chmod", "chown", "mkdir", "remove", "unlink", "rmdir",
-        "utime", "lchown", "chroot", "mkfifo", "mknod", "tempnam",
-    ])
+    write_ops = dict.fromkeys(
+        [
+            "open",
+            "chmod",
+            "chown",
+            "mkdir",
+            "remove",
+            "unlink",
+            "rmdir",
+            "utime",
+            "lchown",
+            "chroot",
+            "mkfifo",
+            "mknod",
+            "tempnam",
+        ]
+    )
 
-    _exception_patterns = [
-        # Allow lib2to3 to attempt to save a pickled grammar object (#121)
-        r'.*lib2to3.*\.pickle$',
-    ]
+    _exception_patterns = []
     "exempt writing to paths that match the pattern"
 
     def __init__(self, sandbox, exceptions=_EXCEPTIONS):
         self._sandbox = os.path.normcase(os.path.realpath(sandbox))
         self._prefix = os.path.join(self._sandbox, '')
         self._exceptions = [
-            os.path.normcase(os.path.realpath(path))
-            for path in exceptions
+            os.path.normcase(os.path.realpath(path)) for path in exceptions
         ]
         AbstractSandbox.__init__(self)
 
     def _violation(self, operation, *args, **kw):
         from setuptools.sandbox import SandboxViolation
+
         raise SandboxViolation(operation, args, kw)
 
     if _file:
@@ -436,12 +472,10 @@ class DirectorySandbox(AbstractSandbox):
 
     def _exempted(self, filepath):
         start_matches = (
-            filepath.startswith(exception)
-            for exception in self._exceptions
+            filepath.startswith(exception) for exception in self._exceptions
         )
         pattern_matches = (
-            re.match(pattern, filepath)
-            for pattern in self._exception_patterns
+            re.match(pattern, filepath) for pattern in self._exception_patterns
         )
         candidates = itertools.chain(start_matches, pattern_matches)
         return any(candidates)
@@ -466,15 +500,19 @@ class DirectorySandbox(AbstractSandbox):
 
 
 WRITE_FLAGS = functools.reduce(
-    operator.or_, [getattr(_os, a, 0) for a in
-        "O_WRONLY O_RDWR O_APPEND O_CREAT O_TRUNC O_TEMPORARY".split()]
+    operator.or_,
+    [
+        getattr(_os, a, 0)
+        for a in "O_WRONLY O_RDWR O_APPEND O_CREAT O_TRUNC O_TEMPORARY".split()
+    ],
 )
 
 
 class SandboxViolation(DistutilsError):
     """A setup script attempted to modify the filesystem outside the sandbox"""
 
-    tmpl = textwrap.dedent("""
+    tmpl = textwrap.dedent(
+        """
         SandboxViolation: {cmd}{args!r} {kwargs}
 
         The package setup script has attempted to modify files on your system
@@ -484,7 +522,8 @@ class SandboxViolation(DistutilsError):
         support alternate installation locations even if you run its setup
         script by hand.  Please inform the package's author and the EasyInstall
         maintainers to find out if a fix or workaround is available.
-        """).lstrip()
+        """
+    ).lstrip()
 
     def __str__(self):
         cmd, args, kwargs = self.args
diff --git a/libs/common/setuptools/site-patch.py b/libs/common/setuptools/site-patch.py
deleted file mode 100644
index 40b00de0..00000000
--- a/libs/common/setuptools/site-patch.py
+++ /dev/null
@@ -1,74 +0,0 @@
-def __boot():
-    import sys
-    import os
-    PYTHONPATH = os.environ.get('PYTHONPATH')
-    if PYTHONPATH is None or (sys.platform == 'win32' and not PYTHONPATH):
-        PYTHONPATH = []
-    else:
-        PYTHONPATH = PYTHONPATH.split(os.pathsep)
-
-    pic = getattr(sys, 'path_importer_cache', {})
-    stdpath = sys.path[len(PYTHONPATH):]
-    mydir = os.path.dirname(__file__)
-
-    for item in stdpath:
-        if item == mydir or not item:
-            continue  # skip if current dir. on Windows, or my own directory
-        importer = pic.get(item)
-        if importer is not None:
-            loader = importer.find_module('site')
-            if loader is not None:
-                # This should actually reload the current module
-                loader.load_module('site')
-                break
-        else:
-            try:
-                import imp  # Avoid import loop in Python 3
-                stream, path, descr = imp.find_module('site', [item])
-            except ImportError:
-                continue
-            if stream is None:
-                continue
-            try:
-                # This should actually reload the current module
-                imp.load_module('site', stream, path, descr)
-            finally:
-                stream.close()
-            break
-    else:
-        raise ImportError("Couldn't find the real 'site' module")
-
-    known_paths = dict([(makepath(item)[1], 1) for item in sys.path])  # 2.2 comp
-
-    oldpos = getattr(sys, '__egginsert', 0)  # save old insertion position
-    sys.__egginsert = 0  # and reset the current one
-
-    for item in PYTHONPATH:
-        addsitedir(item)
-
-    sys.__egginsert += oldpos  # restore effective old position
-
-    d, nd = makepath(stdpath[0])
-    insert_at = None
-    new_path = []
-
-    for item in sys.path:
-        p, np = makepath(item)
-
-        if np == nd and insert_at is None:
-            # We've hit the first 'system' path entry, so added entries go here
-            insert_at = len(new_path)
-
-        if np in known_paths or insert_at is None:
-            new_path.append(item)
-        else:
-            # new path after the insert point, back-insert it
-            new_path.insert(insert_at, item)
-            insert_at += 1
-
-    sys.path[:] = new_path
-
-
-if __name__ == 'site':
-    __boot()
-    del __boot
diff --git a/libs/common/setuptools/ssl_support.py b/libs/common/setuptools/ssl_support.py
deleted file mode 100644
index 226db694..00000000
--- a/libs/common/setuptools/ssl_support.py
+++ /dev/null
@@ -1,260 +0,0 @@
-import os
-import socket
-import atexit
-import re
-import functools
-
-from setuptools.extern.six.moves import urllib, http_client, map, filter
-
-from pkg_resources import ResolutionError, ExtractionError
-
-try:
-    import ssl
-except ImportError:
-    ssl = None
-
-__all__ = [
-    'VerifyingHTTPSHandler', 'find_ca_bundle', 'is_available', 'cert_paths',
-    'opener_for'
-]
-
-cert_paths = """
-/etc/pki/tls/certs/ca-bundle.crt
-/etc/ssl/certs/ca-certificates.crt
-/usr/share/ssl/certs/ca-bundle.crt
-/usr/local/share/certs/ca-root.crt
-/etc/ssl/cert.pem
-/System/Library/OpenSSL/certs/cert.pem
-/usr/local/share/certs/ca-root-nss.crt
-/etc/ssl/ca-bundle.pem
-""".strip().split()
-
-try:
-    HTTPSHandler = urllib.request.HTTPSHandler
-    HTTPSConnection = http_client.HTTPSConnection
-except AttributeError:
-    HTTPSHandler = HTTPSConnection = object
-
-is_available = ssl is not None and object not in (HTTPSHandler, HTTPSConnection)
-
-
-try:
-    from ssl import CertificateError, match_hostname
-except ImportError:
-    try:
-        from backports.ssl_match_hostname import CertificateError
-        from backports.ssl_match_hostname import match_hostname
-    except ImportError:
-        CertificateError = None
-        match_hostname = None
-
-if not CertificateError:
-
-    class CertificateError(ValueError):
-        pass
-
-
-if not match_hostname:
-
-    def _dnsname_match(dn, hostname, max_wildcards=1):
-        """Matching according to RFC 6125, section 6.4.3
-
-        https://tools.ietf.org/html/rfc6125#section-6.4.3
-        """
-        pats = []
-        if not dn:
-            return False
-
-        # Ported from python3-syntax:
-        # leftmost, *remainder = dn.split(r'.')
-        parts = dn.split(r'.')
-        leftmost = parts[0]
-        remainder = parts[1:]
-
-        wildcards = leftmost.count('*')
-        if wildcards > max_wildcards:
-            # Issue #17980: avoid denials of service by refusing more
-            # than one wildcard per fragment.  A survey of established
-            # policy among SSL implementations showed it to be a
-            # reasonable choice.
-            raise CertificateError(
-                "too many wildcards in certificate DNS name: " + repr(dn))
-
-        # speed up common case w/o wildcards
-        if not wildcards:
-            return dn.lower() == hostname.lower()
-
-        # RFC 6125, section 6.4.3, subitem 1.
-        # The client SHOULD NOT attempt to match a presented identifier in which
-        # the wildcard character comprises a label other than the left-most label.
-        if leftmost == '*':
-            # When '*' is a fragment by itself, it matches a non-empty dotless
-            # fragment.
-            pats.append('[^.]+')
-        elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
-            # RFC 6125, section 6.4.3, subitem 3.
-            # The client SHOULD NOT attempt to match a presented identifier
-            # where the wildcard character is embedded within an A-label or
-            # U-label of an internationalized domain name.
-            pats.append(re.escape(leftmost))
-        else:
-            # Otherwise, '*' matches any dotless string, e.g. www*
-            pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
-
-        # add the remaining fragments, ignore any wildcards
-        for frag in remainder:
-            pats.append(re.escape(frag))
-
-        pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
-        return pat.match(hostname)
-
-    def match_hostname(cert, hostname):
-        """Verify that *cert* (in decoded format as returned by
-        SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
-        rules are followed, but IP addresses are not accepted for *hostname*.
-
-        CertificateError is raised on failure. On success, the function
-        returns nothing.
-        """
-        if not cert:
-            raise ValueError("empty or no certificate")
-        dnsnames = []
-        san = cert.get('subjectAltName', ())
-        for key, value in san:
-            if key == 'DNS':
-                if _dnsname_match(value, hostname):
-                    return
-                dnsnames.append(value)
-        if not dnsnames:
-            # The subject is only checked when there is no dNSName entry
-            # in subjectAltName
-            for sub in cert.get('subject', ()):
-                for key, value in sub:
-                    # XXX according to RFC 2818, the most specific Common Name
-                    # must be used.
-                    if key == 'commonName':
-                        if _dnsname_match(value, hostname):
-                            return
-                        dnsnames.append(value)
-        if len(dnsnames) > 1:
-            raise CertificateError("hostname %r "
-                "doesn't match either of %s"
-                % (hostname, ', '.join(map(repr, dnsnames))))
-        elif len(dnsnames) == 1:
-            raise CertificateError("hostname %r "
-                "doesn't match %r"
-                % (hostname, dnsnames[0]))
-        else:
-            raise CertificateError("no appropriate commonName or "
-                "subjectAltName fields were found")
-
-
-class VerifyingHTTPSHandler(HTTPSHandler):
-    """Simple verifying handler: no auth, subclasses, timeouts, etc."""
-
-    def __init__(self, ca_bundle):
-        self.ca_bundle = ca_bundle
-        HTTPSHandler.__init__(self)
-
-    def https_open(self, req):
-        return self.do_open(
-            lambda host, **kw: VerifyingHTTPSConn(host, self.ca_bundle, **kw), req
-        )
-
-
-class VerifyingHTTPSConn(HTTPSConnection):
-    """Simple verifying connection: no auth, subclasses, timeouts, etc."""
-
-    def __init__(self, host, ca_bundle, **kw):
-        HTTPSConnection.__init__(self, host, **kw)
-        self.ca_bundle = ca_bundle
-
-    def connect(self):
-        sock = socket.create_connection(
-            (self.host, self.port), getattr(self, 'source_address', None)
-        )
-
-        # Handle the socket if a (proxy) tunnel is present
-        if hasattr(self, '_tunnel') and getattr(self, '_tunnel_host', None):
-            self.sock = sock
-            self._tunnel()
-            # http://bugs.python.org/issue7776: Python>=3.4.1 and >=2.7.7
-            # change self.host to mean the proxy server host when tunneling is
-            # being used. Adapt, since we are interested in the destination
-            # host for the match_hostname() comparison.
-            actual_host = self._tunnel_host
-        else:
-            actual_host = self.host
-
-        if hasattr(ssl, 'create_default_context'):
-            ctx = ssl.create_default_context(cafile=self.ca_bundle)
-            self.sock = ctx.wrap_socket(sock, server_hostname=actual_host)
-        else:
-            # This is for python < 2.7.9 and < 3.4?
-            self.sock = ssl.wrap_socket(
-                sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.ca_bundle
-            )
-        try:
-            match_hostname(self.sock.getpeercert(), actual_host)
-        except CertificateError:
-            self.sock.shutdown(socket.SHUT_RDWR)
-            self.sock.close()
-            raise
-
-
-def opener_for(ca_bundle=None):
-    """Get a urlopen() replacement that uses ca_bundle for verification"""
-    return urllib.request.build_opener(
-        VerifyingHTTPSHandler(ca_bundle or find_ca_bundle())
-    ).open
-
-
-# from jaraco.functools
-def once(func):
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        if not hasattr(func, 'always_returns'):
-            func.always_returns = func(*args, **kwargs)
-        return func.always_returns
-    return wrapper
-
-
-@once
-def get_win_certfile():
-    try:
-        import wincertstore
-    except ImportError:
-        return None
-
-    class CertFile(wincertstore.CertFile):
-        def __init__(self):
-            super(CertFile, self).__init__()
-            atexit.register(self.close)
-
-        def close(self):
-            try:
-                super(CertFile, self).close()
-            except OSError:
-                pass
-
-    _wincerts = CertFile()
-    _wincerts.addstore('CA')
-    _wincerts.addstore('ROOT')
-    return _wincerts.name
-
-
-def find_ca_bundle():
-    """Return an existing CA bundle path, or None"""
-    extant_cert_paths = filter(os.path.isfile, cert_paths)
-    return (
-        get_win_certfile()
-        or next(extant_cert_paths, None)
-        or _certifi_where()
-    )
-
-
-def _certifi_where():
-    try:
-        return __import__('certifi').where()
-    except (ImportError, ResolutionError, ExtractionError):
-        pass
diff --git a/libs/common/setuptools/unicode_utils.py b/libs/common/setuptools/unicode_utils.py
index 7c63efd2..e84e65e3 100644
--- a/libs/common/setuptools/unicode_utils.py
+++ b/libs/common/setuptools/unicode_utils.py
@@ -1,12 +1,10 @@
 import unicodedata
 import sys
 
-from setuptools.extern import six
-
 
 # HFS Plus uses decomposed UTF-8
 def decompose(path):
-    if isinstance(path, six.text_type):
+    if isinstance(path, str):
         return unicodedata.normalize('NFD', path)
     try:
         path = path.decode('utf-8')
@@ -23,7 +21,7 @@ def filesys_decode(path):
     NONE when no expected encoding works
     """
 
-    if isinstance(path, six.text_type):
+    if isinstance(path, str):
         return path
 
     fs_enc = sys.getfilesystemencoding() or 'utf-8'
diff --git a/libs/common/setuptools/wheel.py b/libs/common/setuptools/wheel.py
index 025aaa82..527ed3b2 100644
--- a/libs/common/setuptools/wheel.py
+++ b/libs/common/setuptools/wheel.py
@@ -1,24 +1,22 @@
 """Wheels support."""
 
-from distutils.util import get_platform
-from distutils import log
 import email
 import itertools
 import os
 import posixpath
 import re
 import zipfile
+import contextlib
+
+from distutils.util import get_platform
 
 import pkg_resources
 import setuptools
 from pkg_resources import parse_version
 from setuptools.extern.packaging.tags import sys_tags
 from setuptools.extern.packaging.utils import canonicalize_name
-from setuptools.extern.six import PY3
 from setuptools.command.egg_info import write_requirements
-
-
-__metaclass__ = type
+from setuptools.archive_util import _unpack_zipfile_obj
 
 
 WHEEL_NAME = re.compile(
@@ -27,12 +25,8 @@ WHEEL_NAME = re.compile(
     )\.whl$""",
     re.VERBOSE).match
 
-NAMESPACE_PACKAGE_INIT = '''\
-try:
-    __import__('pkg_resources').declare_namespace(__name__)
-except ImportError:
-    __path__ = __import__('pkgutil').extend_path(__path__, __name__)
-'''
+NAMESPACE_PACKAGE_INIT = \
+    "__import__('pkg_resources').declare_namespace(__name__)\n"
 
 
 def unpack(src_dir, dst_dir):
@@ -57,6 +51,19 @@ def unpack(src_dir, dst_dir):
         os.rmdir(dirpath)
 
 
+@contextlib.contextmanager
+def disable_info_traces():
+    """
+    Temporarily disable info traces.
+    """
+    from distutils import log
+    saved = log.set_threshold(log.WARN)
+    try:
+        yield
+    finally:
+        log.set_threshold(saved)
+
+
 class Wheel:
 
     def __init__(self, filename):
@@ -77,7 +84,8 @@ class Wheel:
 
     def is_compatible(self):
         '''Is the wheel is compatible with the current platform?'''
-        supported_tags = set((t.interpreter, t.abi, t.platform) for t in sys_tags())
+        supported_tags = set(
+            (t.interpreter, t.abi, t.platform) for t in sys_tags())
         return next((True for t in self.tags() if t in supported_tags), False)
 
     def egg_name(self):
@@ -115,7 +123,7 @@ class Wheel:
     def _convert_metadata(zf, destination_eggdir, dist_info, egg_info):
         def get_metadata(name):
             with zf.open(posixpath.join(dist_info, name)) as fp:
-                value = fp.read().decode('utf-8') if PY3 else fp.read()
+                value = fp.read().decode('utf-8')
                 return email.parser.Parser().parsestr(value)
 
         wheel_metadata = get_metadata('WHEEL')
@@ -128,8 +136,7 @@ class Wheel:
             raise ValueError(
                 'unsupported wheel format version: %s' % wheel_version)
         # Extract to target directory.
-        os.mkdir(destination_eggdir)
-        zf.extractall(destination_eggdir)
+        _unpack_zipfile_obj(zf, destination_eggdir)
         # Convert metadata.
         dist_info = os.path.join(destination_eggdir, dist_info)
         dist = pkg_resources.Distribution.from_location(
@@ -143,13 +150,13 @@ class Wheel:
         def raw_req(req):
             req.marker = None
             return str(req)
-        install_requires = list(sorted(map(raw_req, dist.requires())))
+        install_requires = list(map(raw_req, dist.requires()))
         extras_require = {
-            extra: sorted(
+            extra: [
                 req
                 for req in map(raw_req, dist.requires((extra,)))
                 if req not in install_requires
-            )
+            ]
             for extra in dist.extras
         }
         os.rename(dist_info, egg_info)
@@ -163,17 +170,12 @@ class Wheel:
                 extras_require=extras_require,
             ),
         )
-        # Temporarily disable info traces.
-        log_threshold = log._global_log.threshold
-        log.set_threshold(log.WARN)
-        try:
+        with disable_info_traces():
             write_requirements(
                 setup_dist.get_command_obj('egg_info'),
                 None,
                 os.path.join(egg_info, 'requires.txt'),
             )
-        finally:
-            log.set_threshold(log_threshold)
 
     @staticmethod
     def _move_data_entries(destination_eggdir, dist_data):
diff --git a/libs/common/setuptools/windows_support.py b/libs/common/setuptools/windows_support.py
index cb977cff..1ca64fbb 100644
--- a/libs/common/setuptools/windows_support.py
+++ b/libs/common/setuptools/windows_support.py
@@ -1,5 +1,4 @@
 import platform
-import ctypes
 
 
 def windows_only(func):
@@ -17,6 +16,7 @@ def hide_file(path):
 
     `path` must be text.
     """
+    import ctypes
     __import__('ctypes.wintypes')
     SetFileAttributes = ctypes.windll.kernel32.SetFileAttributesW
     SetFileAttributes.argtypes = ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD
diff --git a/libs/common/stevedore/_cache.py b/libs/common/stevedore/_cache.py
new file mode 100644
index 00000000..63f61290
--- /dev/null
+++ b/libs/common/stevedore/_cache.py
@@ -0,0 +1,212 @@
+#  Licensed under the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License. You may obtain
+#  a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#  License for the specific language governing permissions and limitations
+#  under the License.
+
+"""Use a cache layer in front of entry point scanning."""
+
+import errno
+import glob
+import hashlib
+import itertools
+import json
+import logging
+import os
+import os.path
+import struct
+import sys
+
+try:
+    # For python 3.8 and later
+    import importlib.metadata as importlib_metadata
+except ImportError:
+    # For everyone else
+    import importlib_metadata
+
+
+log = logging.getLogger('stevedore._cache')
+
+
+def _get_cache_dir():
+    """Locate a platform-appropriate cache directory to use.
+
+    Does not ensure that the cache directory exists.
+    """
+    # Linux, Unix, AIX, etc.
+    if os.name == 'posix' and sys.platform != 'darwin':
+        # use ~/.cache if empty OR not set
+        base_path = os.environ.get("XDG_CACHE_HOME", None) \
+            or os.path.expanduser('~/.cache')
+        return os.path.join(base_path, 'python-entrypoints')
+
+    # Mac OS
+    elif sys.platform == 'darwin':
+        return os.path.expanduser('~/Library/Caches/Python Entry Points')
+
+    # Windows (hopefully)
+    else:
+        base_path = os.environ.get('LOCALAPPDATA', None) \
+            or os.path.expanduser('~\\AppData\\Local')
+        return os.path.join(base_path, 'Python Entry Points')
+
+
+def _get_mtime(name):
+    try:
+        s = os.stat(name)
+        return s.st_mtime
+    except OSError as err:
+        if err.errno != errno.ENOENT:
+            raise
+    return -1.0
+
+
+def _ftobytes(f):
+    return struct.Struct('f').pack(f)
+
+
+def _hash_settings_for_path(path):
+    """Return a hash and the path settings that created it."""
+    paths = []
+    h = hashlib.sha256()
+
+    # Tie the cache to the python interpreter, in case it is part of a
+    # virtualenv.
+    h.update(sys.executable.encode('utf-8'))
+    h.update(sys.prefix.encode('utf-8'))
+
+    for entry in path:
+        mtime = _get_mtime(entry)
+        h.update(entry.encode('utf-8'))
+        h.update(_ftobytes(mtime))
+        paths.append((entry, mtime))
+
+        for ep_file in itertools.chain(
+                glob.iglob(os.path.join(entry,
+                                        '*.dist-info',
+                                        'entry_points.txt')),
+                glob.iglob(os.path.join(entry,
+                                        '*.egg-info',
+                                        'entry_points.txt'))
+        ):
+            mtime = _get_mtime(ep_file)
+            h.update(ep_file.encode('utf-8'))
+            h.update(_ftobytes(mtime))
+            paths.append((ep_file, mtime))
+
+    return (h.hexdigest(), paths)
+
+
+def _build_cacheable_data():
+    real_groups = importlib_metadata.entry_points()
+
+    if not isinstance(real_groups, dict):
+        # importlib-metadata 4.0 or later (or stdlib importlib.metadata in
+        # Python 3.9 or later)
+        real_groups = {
+            group: real_groups.select(group=group)
+            for group in real_groups.groups
+        }
+
+    # Convert the namedtuple values to regular tuples
+    groups = {}
+    for name, group_data in real_groups.items():
+        existing = set()
+        members = []
+        groups[name] = members
+        for ep in group_data:
+            # Filter out duplicates that can occur when testing a
+            # package that provides entry points using tox, where the
+            # package is installed in the virtualenv that tox builds
+            # and is present in the path as '.'.
+            item = ep.name, ep.value, ep.group  # convert to tuple
+            if item in existing:
+                continue
+            existing.add(item)
+            members.append(item)
+    return {
+        'groups': groups,
+        'sys.executable': sys.executable,
+        'sys.prefix': sys.prefix,
+    }
+
+
+class Cache:
+
+    def __init__(self, cache_dir=None):
+        if cache_dir is None:
+            cache_dir = _get_cache_dir()
+        self._dir = cache_dir
+        self._internal = {}
+        self._disable_caching = False
+
+        # Caching can be disabled by either placing .disable file into the
+        # target directory or when python executable is under /tmp (this is the
+        # case when executed from ansible)
+        if any([os.path.isfile(os.path.join(self._dir, '.disable')),
+                sys.executable[0:4] == '/tmp']):
+            self._disable_caching = True
+
+    def _get_data_for_path(self, path):
+        if path is None:
+            path = sys.path
+
+        internal_key = tuple(path)
+        if internal_key in self._internal:
+            return self._internal[internal_key]
+
+        digest, path_values = _hash_settings_for_path(path)
+        filename = os.path.join(self._dir, digest)
+        try:
+            log.debug('reading %s', filename)
+            with open(filename, 'r') as f:
+                data = json.load(f)
+        except (IOError, json.JSONDecodeError):
+            data = _build_cacheable_data()
+            data['path_values'] = path_values
+            if not self._disable_caching:
+                try:
+                    log.debug('writing to %s', filename)
+                    os.makedirs(self._dir, exist_ok=True)
+                    with open(filename, 'w') as f:
+                        json.dump(data, f)
+                except (IOError, OSError):
+                    # Could not create cache dir or write file.
+                    pass
+
+        self._internal[internal_key] = data
+        return data
+
+    def get_group_all(self, group, path=None):
+        result = []
+        data = self._get_data_for_path(path)
+        group_data = data.get('groups', {}).get(group, [])
+        for vals in group_data:
+            result.append(importlib_metadata.EntryPoint(*vals))
+        return result
+
+    def get_group_named(self, group, path=None):
+        result = {}
+        for ep in self.get_group_all(group, path=path):
+            if ep.name not in result:
+                result[ep.name] = ep
+        return result
+
+    def get_single(self, group, name, path=None):
+        for name, ep in self.get_group_named(group, path=path).items():
+            if name == name:
+                return ep
+        raise ValueError('No entrypoint {!r} in group {!r}'.format(
+            group, name))
+
+
+_c = Cache()
+get_group_all = _c.get_group_all
+get_group_named = _c.get_group_named
+get_single = _c.get_single
diff --git a/libs/common/stevedore/dispatch.py b/libs/common/stevedore/dispatch.py
index a1589673..27b511fc 100644
--- a/libs/common/stevedore/dispatch.py
+++ b/libs/common/stevedore/dispatch.py
@@ -142,7 +142,7 @@ class NameDispatchExtensionManager(DispatchExtensionManager):
         then ignored
     :type invoke_on_load: bool
     :param on_load_failure_callback: Callback function that will be called when
-        a entrypoint can not be loaded. The arguments that will be provided
+        an entrypoint can not be loaded. The arguments that will be provided
         when this is called (when an entrypoint fails to load) are
         (manager, entrypoint, exception)
     :type on_load_failure_callback: function
diff --git a/libs/common/stevedore/driver.py b/libs/common/stevedore/driver.py
index 167dc671..bccbcd19 100644
--- a/libs/common/stevedore/driver.py
+++ b/libs/common/stevedore/driver.py
@@ -10,7 +10,8 @@
 #  License for the specific language governing permissions and limitations
 #  under the License.
 
-from .exception import NoMatches, MultipleMatches
+from .exception import MultipleMatches
+from .exception import NoMatches
 from .named import NamedExtensionManager
 
 
@@ -33,7 +34,7 @@ class DriverManager(NamedExtensionManager):
         is True.
     :type invoke_kwds: dict
     :param on_load_failure_callback: Callback function that will be called when
-        a entrypoint can not be loaded. The arguments that will be provided
+        an entrypoint can not be loaded. The arguments that will be provided
         when this is called (when an entrypoint fails to load) are
         (manager, entrypoint, exception)
     :type on_load_failure_callback: function
@@ -85,7 +86,7 @@ class DriverManager(NamedExtensionManager):
             and then ignored
         :type propagate_map_exceptions: bool
         :param on_load_failure_callback: Callback function that will
-            be called when a entrypoint can not be loaded. The
+            be called when an entrypoint can not be loaded. The
             arguments that will be provided when this is called (when
             an entrypoint fails to load) are (manager, entrypoint,
             exception)
@@ -142,7 +143,6 @@ class DriverManager(NamedExtensionManager):
 
     @property
     def driver(self):
-        """Returns the driver being used by this manager.
-        """
+        """Returns the driver being used by this manager."""
         ext = self.extensions[0]
         return ext.obj if ext.obj else ext.plugin
diff --git a/libs/common/stevedore/enabled.py b/libs/common/stevedore/enabled.py
index c2e0c03d..7c12499b 100644
--- a/libs/common/stevedore/enabled.py
+++ b/libs/common/stevedore/enabled.py
@@ -46,7 +46,7 @@ class EnabledExtensionManager(ExtensionManager):
         then ignored
     :type propagate_map_exceptions: bool
     :param on_load_failure_callback: Callback function that will be called when
-        a entrypoint can not be loaded. The arguments that will be provided
+        an entrypoint can not be loaded. The arguments that will be provided
         when this is called (when an entrypoint fails to load) are
         (manager, entrypoint, exception)
     :type on_load_failure_callback: function
diff --git a/libs/common/stevedore/example/base.py b/libs/common/stevedore/example/base.py
index 08f8a5c6..cd48bf21 100644
--- a/libs/common/stevedore/example/base.py
+++ b/libs/common/stevedore/example/base.py
@@ -1,3 +1,18 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import abc
 
 
diff --git a/libs/common/stevedore/example/load_as_driver.py b/libs/common/stevedore/example/load_as_driver.py
index 6838c9da..a3de5df2 100644
--- a/libs/common/stevedore/example/load_as_driver.py
+++ b/libs/common/stevedore/example/load_as_driver.py
@@ -1,3 +1,17 @@
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 
 from stevedore import driver
diff --git a/libs/common/stevedore/example/load_as_extension.py b/libs/common/stevedore/example/load_as_extension.py
index f7518529..1af1f46c 100644
--- a/libs/common/stevedore/example/load_as_extension.py
+++ b/libs/common/stevedore/example/load_as_extension.py
@@ -1,3 +1,17 @@
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 
 from stevedore import extension
diff --git a/libs/common/stevedore/example/setup.py b/libs/common/stevedore/example/setup.py
index 33e2aeec..c0ea667b 100644
--- a/libs/common/stevedore/example/setup.py
+++ b/libs/common/stevedore/example/setup.py
@@ -1,4 +1,19 @@
-from setuptools import setup, find_packages
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from setuptools import find_packages
+from setuptools import setup
 
 setup(
     name='stevedore-examples',
diff --git a/libs/common/stevedore/example/simple.py b/libs/common/stevedore/example/simple.py
index 1cad96af..0cc3acd9 100644
--- a/libs/common/stevedore/example/simple.py
+++ b/libs/common/stevedore/example/simple.py
@@ -1,9 +1,21 @@
+# Copyright (C) 2020 Red Hat, Inc.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License. You may obtain
+#  a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#  License for the specific language governing permissions and limitations
+#  under the License.
 from stevedore.example import base
 
 
 class Simple(base.FormatterBase):
-    """A very basic formatter.
-    """
+    """A very basic formatter."""
 
     def format(self, data):
         """Format the data and return unicode text.
diff --git a/libs/common/stevedore/example2/fields.py b/libs/common/stevedore/example2/fields.py
index f5c8e194..82db7474 100644
--- a/libs/common/stevedore/example2/fields.py
+++ b/libs/common/stevedore/example2/fields.py
@@ -1,3 +1,18 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import textwrap
 
 from stevedore.example import base
diff --git a/libs/common/stevedore/example2/setup.py b/libs/common/stevedore/example2/setup.py
index 31d71454..2293c661 100644
--- a/libs/common/stevedore/example2/setup.py
+++ b/libs/common/stevedore/example2/setup.py
@@ -1,4 +1,19 @@
-from setuptools import setup, find_packages
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from setuptools import find_packages
+from setuptools import setup
 
 setup(
     name='stevedore-examples2',
diff --git a/libs/common/stevedore/extension.py b/libs/common/stevedore/extension.py
index f5c22928..d9ee0840 100644
--- a/libs/common/stevedore/extension.py
+++ b/libs/common/stevedore/extension.py
@@ -13,11 +13,10 @@
 """ExtensionManager
 """
 
-import operator
-import pkg_resources
-
 import logging
+import operator
 
+from . import _cache
 from .exception import NoMatches
 
 LOG = logging.getLogger(__name__)
@@ -34,7 +33,7 @@ class Extension(object):
     :param name: The entry point name.
     :type name: str
     :param entry_point: The EntryPoint instance returned by
-        :mod:`pkg_resources`.
+        :mod:`entrypoints`.
     :type entry_point: EntryPoint
     :param plugin: The value returned by entry_point.load()
     :param obj: The object returned by ``plugin(*args, **kwds)`` if the
@@ -48,6 +47,46 @@ class Extension(object):
         self.plugin = plugin
         self.obj = obj
 
+    @property
+    def module_name(self):
+        """The name of the module from which the entry point is loaded.
+
+        :return: A string in 'dotted.module' format.
+        """
+        # NOTE: importlib_metadata from PyPI includes this but the
+        # Python 3.8 standard library does not.
+        match = self.entry_point.pattern.match(self.entry_point.value)
+        return match.group('module')
+
+    @property
+    def extras(self):
+        """The 'extras' settings for the plugin."""
+        # NOTE: The underlying package returned re.Match objects until this was
+        # fixed in importlib-metadata 4.11.3. This was fixed in Python 3.10 and
+        # backported to Python 3.9.11. For older versions without this fix,
+        # translate the re.Match objects to the matched strings, which seem
+        # more useful.
+        extras = []
+        for extra in self.entry_point.extras:
+            if isinstance(extra, str):
+                # We were previously returning the whole string including
+                # backets. We need to continue doing so to preserve API
+                # compatibility.
+                extras.append(f'[{extra}]')
+            else:
+                # Python 3.6 returns _sre.SRE_Match objects. Later
+                # versions of python return re.Match objects. Both types
+                # have a 'string' attribute containing the text that
+                # matched the pattern.
+                extras.append(getattr(extra, 'string', extra))
+        return extras
+
+    @property
+    def attr(self):
+        """The attribute of the module to be loaded."""
+        match = self.entry_point.pattern.match(self.entry_point.value)
+        return match.group('attr')
+
     @property
     def entry_point_target(self):
         """The module and attribute referenced by this extension's entry_point.
@@ -55,8 +94,7 @@ class Extension(object):
         :return: A string representation of the target of the entry point in
             'dotted.module:object' format.
         """
-        return '%s:%s' % (self.entry_point.module_name,
-                          self.entry_point.attrs[0])
+        return self.entry_point.value
 
 
 class ExtensionManager(object):
@@ -80,7 +118,7 @@ class ExtensionManager(object):
         then ignored
     :type propagate_map_exceptions: bool
     :param on_load_failure_callback: Callback function that will be called when
-        a entrypoint can not be loaded. The arguments that will be provided
+        an entrypoint can not be loaded. The arguments that will be provided
         when this is called (when an entrypoint fails to load) are
         (manager, entrypoint, exception)
     :type on_load_failure_callback: function
@@ -126,7 +164,7 @@ class ExtensionManager(object):
             are logged and then ignored
         :type propagate_map_exceptions: bool
         :param on_load_failure_callback: Callback function that will
-            be called when a entrypoint can not be loaded. The
+            be called when an entrypoint can not be loaded. The
             arguments that will be provided when this is called (when
             an entrypoint fails to load) are (manager, entrypoint,
             exception)
@@ -174,7 +212,7 @@ class ExtensionManager(object):
 
         """
         if self.namespace not in self.ENTRY_POINT_CACHE:
-            eps = list(pkg_resources.iter_entry_points(self.namespace))
+            eps = list(_cache.get_group_all(self.namespace))
             self.ENTRY_POINT_CACHE[self.namespace] = eps
         return self.ENTRY_POINT_CACHE[self.namespace]
 
@@ -222,7 +260,7 @@ class ExtensionManager(object):
                 ep.require()
             plugin = ep.resolve()
         else:
-            plugin = ep.load(require=verify_requirements)
+            plugin = ep.load()
         if invoke_on_load:
             obj = plugin(*invoke_args, **invoke_kwds)
         else:
@@ -301,8 +339,7 @@ class ExtensionManager(object):
                 LOG.exception(err)
 
     def items(self):
-        """
-        Return an iterator of tuples of the form (name, extension).
+        """Return an iterator of tuples of the form (name, extension).
 
         This is analogous to the Mapping.items() method.
         """
@@ -326,6 +363,5 @@ class ExtensionManager(object):
         return self._extensions_by_name[name]
 
     def __contains__(self, name):
-        """Return true if name is in list of enabled extensions.
-        """
+        """Return true if name is in list of enabled extensions."""
         return any(extension.name == name for extension in self.extensions)
diff --git a/libs/common/stevedore/hook.py b/libs/common/stevedore/hook.py
index 4df2b0f7..4225db33 100644
--- a/libs/common/stevedore/hook.py
+++ b/libs/common/stevedore/hook.py
@@ -32,7 +32,7 @@ class HookManager(NamedExtensionManager):
         is True.
     :type invoke_kwds: dict
     :param on_load_failure_callback: Callback function that will be called when
-        a entrypoint can not be loaded. The arguments that will be provided
+        an entrypoint can not be loaded. The arguments that will be provided
         when this is called (when an entrypoint fails to load) are
         (manager, entrypoint, exception)
     :type on_load_failure_callback: function
diff --git a/libs/common/stevedore/named.py b/libs/common/stevedore/named.py
index 3b47dfd3..1be3922b 100644
--- a/libs/common/stevedore/named.py
+++ b/libs/common/stevedore/named.py
@@ -46,7 +46,7 @@ class NamedExtensionManager(ExtensionManager):
         then ignored
     :type propagate_map_exceptions: bool
     :param on_load_failure_callback: Callback function that will be called when
-        a entrypoint can not be loaded. The arguments that will be provided
+        an entrypoint can not be loaded. The arguments that will be provided
         when this is called (when an entrypoint fails to load) are
         (manager, entrypoint, exception)
     :type on_load_failure_callback: function
@@ -108,7 +108,7 @@ class NamedExtensionManager(ExtensionManager):
             and then ignored
         :type propagate_map_exceptions: bool
         :param on_load_failure_callback: Callback function that will
-            be called when a entrypoint can not be loaded. The
+            be called when an entrypoint can not be loaded. The
             arguments that will be provided when this is called (when
             an entrypoint fails to load) are (manager, entrypoint,
             exception)
diff --git a/libs/common/stevedore/sphinxext.py b/libs/common/stevedore/sphinxext.py
index 26f01e0d..250122ea 100644
--- a/libs/common/stevedore/sphinxext.py
+++ b/libs/common/stevedore/sphinxext.py
@@ -34,29 +34,32 @@ def _simple_list(mgr):
         doc = _get_docstring(ext.plugin) or '\n'
         summary = doc.splitlines()[0].strip()
         yield('* %s -- %s' % (ext.name, summary),
-              ext.entry_point.module_name)
+              ext.module_name)
 
 
 def _detailed_list(mgr, over='', under='-', titlecase=False):
     for name in sorted(mgr.names()):
         ext = mgr[name]
         if over:
-            yield (over * len(ext.name), ext.entry_point.module_name)
+            yield (over * len(ext.name), ext.module_name)
         if titlecase:
-            yield (ext.name.title(), ext.entry_point.module_name)
+            yield (ext.name.title(), ext.module_name)
         else:
-            yield (ext.name, ext.entry_point.module_name)
+            yield (ext.name, ext.module_name)
         if under:
-            yield (under * len(ext.name), ext.entry_point.module_name)
-        yield ('\n', ext.entry_point.module_name)
+            yield (under * len(ext.name), ext.module_name)
+        yield ('\n', ext.module_name)
         doc = _get_docstring(ext.plugin)
         if doc:
-            yield (doc, ext.entry_point.module_name)
+            yield (doc, ext.module_name)
         else:
-            yield ('.. warning:: No documentation found in %s'
-                   % ext.entry_point,
-                   ext.entry_point.module_name)
-        yield ('\n', ext.entry_point.module_name)
+            yield (
+                '.. warning:: No documentation found for {} in {}'.format(
+                    ext.name, ext.entry_point_target,
+                ),
+                ext.module_name,
+            )
+        yield ('\n', ext.module_name)
 
 
 class ListPluginsDirective(rst.Directive):
@@ -79,7 +82,7 @@ class ListPluginsDirective(rst.Directive):
         underline_style = self.options.get('underline-style', '=')
 
         def report_load_failure(mgr, ep, err):
-            LOG.warning(u'Failed to load %s: %s' % (ep.module_name, err))
+            LOG.warning(u'Failed to load %s: %s' % (ep.module, err))
 
         mgr = extension.ExtensionManager(
             namespace,
diff --git a/libs/common/stevedore/tests/test_cache.py b/libs/common/stevedore/tests/test_cache.py
new file mode 100644
index 00000000..0c5af67e
--- /dev/null
+++ b/libs/common/stevedore/tests/test_cache.py
@@ -0,0 +1,64 @@
+#  Licensed under the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License. You may obtain
+#  a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#  License for the specific language governing permissions and limitations
+#  under the License.
+
+"""Tests for stevedore._cache
+"""
+import sys
+
+from unittest import mock
+
+from stevedore import _cache
+from stevedore.tests import utils
+
+
+class TestCache(utils.TestCase):
+
+    def test_disable_caching_executable(self):
+        """Test caching is disabled if python interpreter is located under /tmp
+        directory (Ansible)
+        """
+        with mock.patch.object(sys, 'executable', '/tmp/fake'):
+            sot = _cache.Cache()
+            self.assertTrue(sot._disable_caching)
+
+    def test_disable_caching_file(self):
+        """Test caching is disabled if .disable file is present in target
+        dir
+        """
+        cache_dir = _cache._get_cache_dir()
+
+        with mock.patch('os.path.isfile') as mock_path:
+            mock_path.return_value = True
+            sot = _cache.Cache()
+            mock_path.assert_called_with('%s/.disable' % cache_dir)
+            self.assertTrue(sot._disable_caching)
+
+            mock_path.return_value = False
+            sot = _cache.Cache()
+            self.assertFalse(sot._disable_caching)
+
+    @mock.patch('os.makedirs')
+    @mock.patch('builtins.open')
+    def test__get_data_for_path_no_write(self, mock_open, mock_mkdir):
+        sot = _cache.Cache()
+        sot._disable_caching = True
+        mock_open.side_effect = IOError
+        sot._get_data_for_path('fake')
+        mock_mkdir.assert_not_called()
+
+    def test__build_cacheable_data(self):
+        # this is a rubbish test as we don't actually do anything with the
+        # data, but it's too hard to script since it's totally environmentally
+        # dependent and mocking out the underlying calls would remove the value
+        # of this test (we want to test those underlying API calls)
+        ret = _cache._build_cacheable_data()
+        self.assertIsInstance(ret['groups'], dict)
diff --git a/libs/common/stevedore/tests/test_dispatch.py b/libs/common/stevedore/tests/test_dispatch.py
index f1c305ab..e54e4928 100644
--- a/libs/common/stevedore/tests/test_dispatch.py
+++ b/libs/common/stevedore/tests/test_dispatch.py
@@ -10,8 +10,8 @@
 #  License for the specific language governing permissions and limitations
 #  under the License.
 
-from stevedore.tests import utils
 from stevedore import dispatch
+from stevedore.tests import utils
 
 
 def check_dispatch(ep, *args, **kwds):
diff --git a/libs/common/stevedore/tests/test_driver.py b/libs/common/stevedore/tests/test_driver.py
index c568a3a8..92308359 100644
--- a/libs/common/stevedore/tests/test_driver.py
+++ b/libs/common/stevedore/tests/test_driver.py
@@ -13,7 +13,12 @@
 """Tests for stevedore.extension
 """
 
-import pkg_resources
+try:
+    # For python 3.8 and later
+    import importlib.metadata as importlib_metadata
+except ImportError:
+    # For everyone else
+    import importlib_metadata
 
 from stevedore import driver
 from stevedore import exception
@@ -68,13 +73,15 @@ class TestCallback(utils.TestCase):
         extensions = [
             extension.Extension(
                 'backend',
-                pkg_resources.EntryPoint.parse('backend = pkg1:driver'),
+                importlib_metadata.EntryPoint(
+                    'backend', 'pkg1:driver', 'backend'),
                 'pkg backend',
                 None,
             ),
             extension.Extension(
                 'backend',
-                pkg_resources.EntryPoint.parse('backend = pkg2:driver'),
+                importlib_metadata.EntryPoint(
+                    'backend', 'pkg2:driver', 'backend'),
                 'pkg backend',
                 None,
             ),
diff --git a/libs/common/stevedore/tests/test_extension.py b/libs/common/stevedore/tests/test_extension.py
index c06afb52..405fb88b 100644
--- a/libs/common/stevedore/tests/test_extension.py
+++ b/libs/common/stevedore/tests/test_extension.py
@@ -16,6 +16,13 @@
 import operator
 from unittest import mock
 
+try:
+    # For python 3.8 and later
+    import importlib.metadata as importlib_metadata
+except ImportError:
+    # For everyone else
+    import importlib_metadata
+
 from stevedore import exception
 from stevedore import extension
 from stevedore.tests import utils
@@ -96,13 +103,13 @@ class TestCallback(utils.TestCase):
 
     def test_use_cache(self):
         # If we insert something into the cache of entry points,
-        # the manager should not have to call into pkg_resources
+        # the manager should not have to call into entrypoints
         # to find the plugins.
         cache = extension.ExtensionManager.ENTRY_POINT_CACHE
         cache['stevedore.test.faux'] = []
-        with mock.patch('pkg_resources.iter_entry_points',
+        with mock.patch('stevedore._cache.get_group_all',
                         side_effect=
-                        AssertionError('called iter_entry_points')):
+                        AssertionError('called get_group_all')):
             em = extension.ExtensionManager('stevedore.test.faux')
             names = em.names()
         self.assertEqual(names, [])
@@ -235,9 +242,48 @@ class TestLoadRequirementsOldSetuptools(utils.TestCase):
     def test_verify_requirements(self):
         self.em._load_one_plugin(self.mock_ep, False, (), {},
                                  verify_requirements=True)
-        self.mock_ep.load.assert_called_once_with(require=True)
+        self.mock_ep.load.assert_called_once_with()
 
     def test_no_verify_requirements(self):
         self.em._load_one_plugin(self.mock_ep, False, (), {},
                                  verify_requirements=False)
-        self.mock_ep.load.assert_called_once_with(require=False)
+        self.mock_ep.load.assert_called_once_with()
+
+
+class TestExtensionProperties(utils.TestCase):
+
+    def setUp(self):
+        self.ext1 = extension.Extension(
+            'name',
+            importlib_metadata.EntryPoint(
+                'name', 'module.name:attribute.name [extra]', 'group_name',
+            ),
+            mock.Mock(),
+            None,
+        )
+        self.ext2 = extension.Extension(
+            'name',
+            importlib_metadata.EntryPoint(
+                'name', 'module:attribute', 'group_name',
+            ),
+            mock.Mock(),
+            None,
+        )
+
+    def test_module_name(self):
+        self.assertEqual('module.name', self.ext1.module_name)
+        self.assertEqual('module', self.ext2.module_name)
+
+    def test_extras(self):
+        self.assertEqual(['[extra]'], self.ext1.extras)
+        self.assertEqual([], self.ext2.extras)
+
+    def test_attr(self):
+        self.assertEqual('attribute.name', self.ext1.attr)
+        self.assertEqual('attribute', self.ext2.attr)
+
+    def test_entry_point_target(self):
+        self.assertEqual('module.name:attribute.name [extra]',
+                         self.ext1.entry_point_target)
+        self.assertEqual('module:attribute',
+                         self.ext2.entry_point_target)
diff --git a/libs/common/stevedore/tests/test_sphinxext.py b/libs/common/stevedore/tests/test_sphinxext.py
index 8b6a8882..e90bd679 100644
--- a/libs/common/stevedore/tests/test_sphinxext.py
+++ b/libs/common/stevedore/tests/test_sphinxext.py
@@ -12,24 +12,26 @@
 """Tests for the sphinx extension
 """
 
-from unittest import mock
+try:
+    # For python 3.8 and later
+    import importlib.metadata as importlib_metadata
+except ImportError:
+    # For everyone else
+    import importlib_metadata
 
 from stevedore import extension
 from stevedore import sphinxext
 from stevedore.tests import utils
 
-import pkg_resources
-
 
 def _make_ext(name, docstring):
     def inner():
         pass
 
     inner.__doc__ = docstring
-    m1 = mock.Mock(spec=pkg_resources.EntryPoint)
-    m1.module_name = '%s_module' % name
-    s = mock.Mock(return_value='ENTRY_POINT(%s)' % name)
-    m1.__str__ = s
+    m1 = importlib_metadata.EntryPoint(
+        name, '{}_module:{}'.format(name, name), 'group',
+    )
     return extension.Extension(name, m1, inner, None)
 
 
@@ -111,7 +113,8 @@ class TestSphinxExt(utils.TestCase):
                 ('nodoc', 'nodoc_module'),
                 ('-----', 'nodoc_module'),
                 ('\n', 'nodoc_module'),
-                ('.. warning:: No documentation found in ENTRY_POINT(nodoc)',
+                (('.. warning:: No documentation found for '
+                 'nodoc in nodoc_module:nodoc'),
                  'nodoc_module'),
                 ('\n', 'nodoc_module'),
             ],
diff --git a/libs/common/stevedore/tests/test_test_manager.py b/libs/common/stevedore/tests/test_test_manager.py
index 54bb454f..3ada1397 100644
--- a/libs/common/stevedore/tests/test_test_manager.py
+++ b/libs/common/stevedore/tests/test_test_manager.py
@@ -10,15 +10,20 @@
 #  License for the specific language governing permissions and limitations
 #  under the License.
 
-from unittest.mock import Mock, sentinel
+from unittest.mock import Mock
+from unittest.mock import sentinel
 
-from stevedore import (ExtensionManager, NamedExtensionManager, HookManager,
-                       DriverManager, EnabledExtensionManager)
-from stevedore.dispatch import (DispatchExtensionManager,
-                                NameDispatchExtensionManager)
+from stevedore.dispatch import DispatchExtensionManager
+from stevedore.dispatch import NameDispatchExtensionManager
 from stevedore.extension import Extension
 from stevedore.tests import utils
 
+from stevedore import DriverManager
+from stevedore import EnabledExtensionManager
+from stevedore import ExtensionManager
+from stevedore import HookManager
+from stevedore import NamedExtensionManager
+
 
 test_extension = Extension('test_extension', None, None, None)
 test_extension2 = Extension('another_one', None, None, None)
diff --git a/libs/common/typing_extensions.py b/libs/common/typing_extensions.py
index 194731cd..ef42417c 100644
--- a/libs/common/typing_extensions.py
+++ b/libs/common/typing_extensions.py
@@ -1,33 +1,26 @@
 import abc
 import collections
 import collections.abc
+import functools
 import operator
 import sys
 import types as _types
 import typing
 
-# After PEP 560, internal typing API was substantially reworked.
-# This is especially important for Protocol class which uses internal APIs
-# quite extensively.
-PEP_560 = sys.version_info[:3] >= (3, 7, 0)
 
-if PEP_560:
-    GenericMeta = type
-else:
-    # 3.6
-    from typing import GenericMeta, _type_vars  # noqa
-
-
-# Please keep __all__ alphabetized within each category.
 __all__ = [
     # Super-special typing primitives.
+    'Any',
     'ClassVar',
     'Concatenate',
     'Final',
     'LiteralString',
     'ParamSpec',
+    'ParamSpecArgs',
+    'ParamSpecKwargs',
     'Self',
     'Type',
+    'TypeVar',
     'TypeVarTuple',
     'Unpack',
 
@@ -45,6 +38,7 @@ __all__ = [
     'Counter',
     'Deque',
     'DefaultDict',
+    'NamedTuple',
     'OrderedDict',
     'TypedDict',
 
@@ -54,13 +48,20 @@ __all__ = [
     # One-off things.
     'Annotated',
     'assert_never',
+    'assert_type',
+    'clear_overloads',
     'dataclass_transform',
+    'get_overloads',
     'final',
+    'get_args',
+    'get_origin',
+    'get_type_hints',
     'IntVar',
     'is_typeddict',
     'Literal',
     'NewType',
     'overload',
+    'override',
     'Protocol',
     'reveal_type',
     'runtime',
@@ -75,21 +76,13 @@ __all__ = [
     'NotRequired',
 ]
 
-if PEP_560:
-    __all__.extend(["get_args", "get_origin", "get_type_hints"])
+# for backward compatibility
+PEP_560 = True
+GenericMeta = type
 
 # The functions below are modified copies of typing internal helpers.
 # They are needed by _ProtocolMeta and they provide support for PEP 646.
 
-
-def _no_slots_copy(dct):
-    dict_copy = dict(dct)
-    if '__slots__' in dict_copy:
-        for slot in dict_copy['__slots__']:
-            dict_copy.pop(slot, None)
-    return dict_copy
-
-
 _marker = object()
 
 
@@ -148,32 +141,7 @@ def _collect_type_vars(types, typevar_types=None):
     return tuple(tvars)
 
 
-# 3.6.2+
-if hasattr(typing, 'NoReturn'):
-    NoReturn = typing.NoReturn
-# 3.6.0-3.6.1
-else:
-    class _NoReturn(typing._FinalTypingBase, _root=True):
-        """Special type indicating functions that never return.
-        Example::
-
-          from typing import NoReturn
-
-          def stop() -> NoReturn:
-              raise Exception('no way')
-
-        This type is invalid in other positions, e.g., ``List[NoReturn]``
-        will fail in static type checkers.
-        """
-        __slots__ = ()
-
-        def __instancecheck__(self, obj):
-            raise TypeError("NoReturn cannot be used with isinstance().")
-
-        def __subclasscheck__(self, cls):
-            raise TypeError("NoReturn cannot be used with issubclass().")
-
-    NoReturn = _NoReturn(_root=True)
+NoReturn = typing.NoReturn
 
 # Some unconstrained type variables.  These are used by the container types.
 # (These are not for export.)
@@ -183,6 +151,37 @@ VT = typing.TypeVar('VT')  # Value type.
 T_co = typing.TypeVar('T_co', covariant=True)  # Any type covariant containers.
 T_contra = typing.TypeVar('T_contra', contravariant=True)  # Ditto contravariant.
 
+
+if sys.version_info >= (3, 11):
+    from typing import Any
+else:
+
+    class _AnyMeta(type):
+        def __instancecheck__(self, obj):
+            if self is Any:
+                raise TypeError("typing_extensions.Any cannot be used with isinstance()")
+            return super().__instancecheck__(obj)
+
+        def __repr__(self):
+            if self is Any:
+                return "typing_extensions.Any"
+            return super().__repr__()
+
+    class Any(metaclass=_AnyMeta):
+        """Special type indicating an unconstrained type.
+        - Any is compatible with every type.
+        - Any assumed to have all methods.
+        - All values assumed to be instances of Any.
+        Note that all the above statements are true from the point of view of
+        static type checkers. At runtime, Any should not be used with instance
+        checks.
+        """
+        def __new__(cls, *args, **kwargs):
+            if cls is Any:
+                raise TypeError("Any cannot be instantiated")
+            return super().__new__(cls, *args, **kwargs)
+
+
 ClassVar = typing.ClassVar
 
 # On older versions of typing there is an internal class named "Final".
@@ -190,7 +189,7 @@ ClassVar = typing.ClassVar
 if hasattr(typing, 'Final') and sys.version_info[:2] >= (3, 7):
     Final = typing.Final
 # 3.7
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _FinalForm(typing._SpecialForm, _root=True):
 
         def __repr__(self):
@@ -198,7 +197,7 @@ elif sys.version_info[:2] >= (3, 7):
 
         def __getitem__(self, parameters):
             item = typing._type_check(parameters,
-                                      f'{self._name} accepts only single type')
+                                      f'{self._name} accepts only a single type.')
             return typing._GenericAlias(self, (item,))
 
     Final = _FinalForm('Final',
@@ -215,61 +214,6 @@ elif sys.version_info[:2] >= (3, 7):
                                TIMEOUT = 1  # Error reported by type checker
 
                        There is no runtime checking of these properties.""")
-# 3.6
-else:
-    class _Final(typing._FinalTypingBase, _root=True):
-        """A special typing construct to indicate that a name
-        cannot be re-assigned or overridden in a subclass.
-        For example:
-
-            MAX_SIZE: Final = 9000
-            MAX_SIZE += 1  # Error reported by type checker
-
-            class Connection:
-                TIMEOUT: Final[int] = 10
-            class FastConnector(Connection):
-                TIMEOUT = 1  # Error reported by type checker
-
-        There is no runtime checking of these properties.
-        """
-
-        __slots__ = ('__type__',)
-
-        def __init__(self, tp=None, **kwds):
-            self.__type__ = tp
-
-        def __getitem__(self, item):
-            cls = type(self)
-            if self.__type__ is None:
-                return cls(typing._type_check(item,
-                           f'{cls.__name__[1:]} accepts only single type.'),
-                           _root=True)
-            raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted')
-
-        def _eval_type(self, globalns, localns):
-            new_tp = typing._eval_type(self.__type__, globalns, localns)
-            if new_tp == self.__type__:
-                return self
-            return type(self)(new_tp, _root=True)
-
-        def __repr__(self):
-            r = super().__repr__()
-            if self.__type__ is not None:
-                r += f'[{typing._type_repr(self.__type__)}]'
-            return r
-
-        def __hash__(self):
-            return hash((type(self).__name__, self.__type__))
-
-        def __eq__(self, other):
-            if not isinstance(other, _Final):
-                return NotImplemented
-            if self.__type__ is not None:
-                return self.__type__ == other.__type__
-            return self is other
-
-    Final = _Final(_root=True)
-
 
 if sys.version_info >= (3, 11):
     final = typing.final
@@ -317,7 +261,7 @@ def IntVar(name):
 if hasattr(typing, 'Literal'):
     Literal = typing.Literal
 # 3.7:
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _LiteralForm(typing._SpecialForm, _root=True):
 
         def __repr__(self):
@@ -339,59 +283,75 @@ elif sys.version_info[:2] >= (3, 7):
                            Literal[...] cannot be subclassed. There is no runtime
                            checking verifying that the parameter is actually a value
                            instead of a type.""")
-# 3.6:
-else:
-    class _Literal(typing._FinalTypingBase, _root=True):
-        """A type that can be used to indicate to type checkers that the
-        corresponding value has a value literally equivalent to the
-        provided parameter. For example:
-
-            var: Literal[4] = 4
-
-        The type checker understands that 'var' is literally equal to the
-        value 4 and no other value.
-
-        Literal[...] cannot be subclassed. There is no runtime checking
-        verifying that the parameter is actually a value instead of a type.
-        """
-
-        __slots__ = ('__values__',)
-
-        def __init__(self, values=None, **kwds):
-            self.__values__ = values
-
-        def __getitem__(self, values):
-            cls = type(self)
-            if self.__values__ is None:
-                if not isinstance(values, tuple):
-                    values = (values,)
-                return cls(values, _root=True)
-            raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted')
-
-        def _eval_type(self, globalns, localns):
-            return self
-
-        def __repr__(self):
-            r = super().__repr__()
-            if self.__values__ is not None:
-                r += f'[{", ".join(map(typing._type_repr, self.__values__))}]'
-            return r
-
-        def __hash__(self):
-            return hash((type(self).__name__, self.__values__))
-
-        def __eq__(self, other):
-            if not isinstance(other, _Literal):
-                return NotImplemented
-            if self.__values__ is not None:
-                return self.__values__ == other.__values__
-            return self is other
-
-    Literal = _Literal(_root=True)
 
 
 _overload_dummy = typing._overload_dummy  # noqa
-overload = typing.overload
+
+
+if hasattr(typing, "get_overloads"):  # 3.11+
+    overload = typing.overload
+    get_overloads = typing.get_overloads
+    clear_overloads = typing.clear_overloads
+else:
+    # {module: {qualname: {firstlineno: func}}}
+    _overload_registry = collections.defaultdict(
+        functools.partial(collections.defaultdict, dict)
+    )
+
+    def overload(func):
+        """Decorator for overloaded functions/methods.
+
+        In a stub file, place two or more stub definitions for the same
+        function in a row, each decorated with @overload.  For example:
+
+        @overload
+        def utf8(value: None) -> None: ...
+        @overload
+        def utf8(value: bytes) -> bytes: ...
+        @overload
+        def utf8(value: str) -> bytes: ...
+
+        In a non-stub file (i.e. a regular .py file), do the same but
+        follow it with an implementation.  The implementation should *not*
+        be decorated with @overload.  For example:
+
+        @overload
+        def utf8(value: None) -> None: ...
+        @overload
+        def utf8(value: bytes) -> bytes: ...
+        @overload
+        def utf8(value: str) -> bytes: ...
+        def utf8(value):
+            # implementation goes here
+
+        The overloads for a function can be retrieved at runtime using the
+        get_overloads() function.
+        """
+        # classmethod and staticmethod
+        f = getattr(func, "__func__", func)
+        try:
+            _overload_registry[f.__module__][f.__qualname__][
+                f.__code__.co_firstlineno
+            ] = func
+        except AttributeError:
+            # Not a normal function; ignore.
+            pass
+        return _overload_dummy
+
+    def get_overloads(func):
+        """Return all defined overloads for *func* as a sequence."""
+        # classmethod and staticmethod
+        f = getattr(func, "__func__", func)
+        if f.__module__ not in _overload_registry:
+            return []
+        mod_dict = _overload_registry[f.__module__]
+        if f.__qualname__ not in mod_dict:
+            return []
+        return list(mod_dict[f.__qualname__].values())
+
+    def clear_overloads():
+        """Clear all overloads in the registry."""
+        _overload_registry.clear()
 
 
 # This is not a real generic class.  Don't use outside annotations.
@@ -401,154 +361,30 @@ Type = typing.Type
 # A few are simply re-exported for completeness.
 
 
-class _ExtensionsGenericMeta(GenericMeta):
-    def __subclasscheck__(self, subclass):
-        """This mimics a more modern GenericMeta.__subclasscheck__() logic
-        (that does not have problems with recursion) to work around interactions
-        between collections, typing, and typing_extensions on older
-        versions of Python, see https://github.com/python/typing/issues/501.
-        """
-        if self.__origin__ is not None:
-            if sys._getframe(1).f_globals['__name__'] not in ['abc', 'functools']:
-                raise TypeError("Parameterized generics cannot be used with class "
-                                "or instance checks")
-            return False
-        if not self.__extra__:
-            return super().__subclasscheck__(subclass)
-        res = self.__extra__.__subclasshook__(subclass)
-        if res is not NotImplemented:
-            return res
-        if self.__extra__ in subclass.__mro__:
-            return True
-        for scls in self.__extra__.__subclasses__():
-            if isinstance(scls, GenericMeta):
-                continue
-            if issubclass(subclass, scls):
-                return True
-        return False
-
-
 Awaitable = typing.Awaitable
 Coroutine = typing.Coroutine
 AsyncIterable = typing.AsyncIterable
 AsyncIterator = typing.AsyncIterator
-
-# 3.6.1+
-if hasattr(typing, 'Deque'):
-    Deque = typing.Deque
-# 3.6.0
-else:
-    class Deque(collections.deque, typing.MutableSequence[T],
-                metaclass=_ExtensionsGenericMeta,
-                extra=collections.deque):
-        __slots__ = ()
-
-        def __new__(cls, *args, **kwds):
-            if cls._gorg is Deque:
-                return collections.deque(*args, **kwds)
-            return typing._generic_new(collections.deque, cls, *args, **kwds)
-
+Deque = typing.Deque
 ContextManager = typing.ContextManager
-# 3.6.2+
-if hasattr(typing, 'AsyncContextManager'):
-    AsyncContextManager = typing.AsyncContextManager
-# 3.6.0-3.6.1
-else:
-    from _collections_abc import _check_methods as _check_methods_in_mro  # noqa
-
-    class AsyncContextManager(typing.Generic[T_co]):
-        __slots__ = ()
-
-        async def __aenter__(self):
-            return self
-
-        @abc.abstractmethod
-        async def __aexit__(self, exc_type, exc_value, traceback):
-            return None
-
-        @classmethod
-        def __subclasshook__(cls, C):
-            if cls is AsyncContextManager:
-                return _check_methods_in_mro(C, "__aenter__", "__aexit__")
-            return NotImplemented
-
+AsyncContextManager = typing.AsyncContextManager
 DefaultDict = typing.DefaultDict
 
 # 3.7.2+
 if hasattr(typing, 'OrderedDict'):
     OrderedDict = typing.OrderedDict
 # 3.7.0-3.7.2
-elif (3, 7, 0) <= sys.version_info[:3] < (3, 7, 2):
+else:
     OrderedDict = typing._alias(collections.OrderedDict, (KT, VT))
-# 3.6
-else:
-    class OrderedDict(collections.OrderedDict, typing.MutableMapping[KT, VT],
-                      metaclass=_ExtensionsGenericMeta,
-                      extra=collections.OrderedDict):
-
-        __slots__ = ()
-
-        def __new__(cls, *args, **kwds):
-            if cls._gorg is OrderedDict:
-                return collections.OrderedDict(*args, **kwds)
-            return typing._generic_new(collections.OrderedDict, cls, *args, **kwds)
-
-# 3.6.2+
-if hasattr(typing, 'Counter'):
-    Counter = typing.Counter
-# 3.6.0-3.6.1
-else:
-    class Counter(collections.Counter,
-                  typing.Dict[T, int],
-                  metaclass=_ExtensionsGenericMeta, extra=collections.Counter):
-
-        __slots__ = ()
-
-        def __new__(cls, *args, **kwds):
-            if cls._gorg is Counter:
-                return collections.Counter(*args, **kwds)
-            return typing._generic_new(collections.Counter, cls, *args, **kwds)
-
-# 3.6.1+
-if hasattr(typing, 'ChainMap'):
-    ChainMap = typing.ChainMap
-elif hasattr(collections, 'ChainMap'):
-    class ChainMap(collections.ChainMap, typing.MutableMapping[KT, VT],
-                   metaclass=_ExtensionsGenericMeta,
-                   extra=collections.ChainMap):
-
-        __slots__ = ()
-
-        def __new__(cls, *args, **kwds):
-            if cls._gorg is ChainMap:
-                return collections.ChainMap(*args, **kwds)
-            return typing._generic_new(collections.ChainMap, cls, *args, **kwds)
-
-# 3.6.1+
-if hasattr(typing, 'AsyncGenerator'):
-    AsyncGenerator = typing.AsyncGenerator
-# 3.6.0
-else:
-    class AsyncGenerator(AsyncIterator[T_co], typing.Generic[T_co, T_contra],
-                         metaclass=_ExtensionsGenericMeta,
-                         extra=collections.abc.AsyncGenerator):
-        __slots__ = ()
 
+Counter = typing.Counter
+ChainMap = typing.ChainMap
+AsyncGenerator = typing.AsyncGenerator
 NewType = typing.NewType
 Text = typing.Text
 TYPE_CHECKING = typing.TYPE_CHECKING
 
 
-def _gorg(cls):
-    """This function exists for compatibility with old typing versions."""
-    assert isinstance(cls, GenericMeta)
-    if hasattr(cls, '_gorg'):
-        return cls._gorg
-    while cls.__origin__ is not None:
-        cls = cls.__origin__
-    return cls
-
-
 _PROTO_WHITELIST = ['Callable', 'Awaitable',
                     'Iterable', 'Iterator', 'AsyncIterable', 'AsyncIterator',
                     'Hashable', 'Sized', 'Container', 'Collection', 'Reversible',
@@ -578,17 +414,57 @@ def _is_callable_members_only(cls):
     return all(callable(getattr(cls, attr, None)) for attr in _get_protocol_attrs(cls))
 
 
+def _maybe_adjust_parameters(cls):
+    """Helper function used in Protocol.__init_subclass__ and _TypedDictMeta.__new__.
+
+    The contents of this function are very similar
+    to logic found in typing.Generic.__init_subclass__
+    on the CPython main branch.
+    """
+    tvars = []
+    if '__orig_bases__' in cls.__dict__:
+        tvars = typing._collect_type_vars(cls.__orig_bases__)
+        # Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn].
+        # If found, tvars must be a subset of it.
+        # If not found, tvars is it.
+        # Also check for and reject plain Generic,
+        # and reject multiple Generic[...] and/or Protocol[...].
+        gvars = None
+        for base in cls.__orig_bases__:
+            if (isinstance(base, typing._GenericAlias) and
+                    base.__origin__ in (typing.Generic, Protocol)):
+                # for error messages
+                the_base = base.__origin__.__name__
+                if gvars is not None:
+                    raise TypeError(
+                        "Cannot inherit from Generic[...]"
+                        " and/or Protocol[...] multiple types.")
+                gvars = base.__parameters__
+        if gvars is None:
+            gvars = tvars
+        else:
+            tvarset = set(tvars)
+            gvarset = set(gvars)
+            if not tvarset <= gvarset:
+                s_vars = ', '.join(str(t) for t in tvars if t not in gvarset)
+                s_args = ', '.join(str(g) for g in gvars)
+                raise TypeError(f"Some type variables ({s_vars}) are"
+                                f" not listed in {the_base}[{s_args}]")
+            tvars = gvars
+    cls.__parameters__ = tuple(tvars)
+
+
 # 3.8+
 if hasattr(typing, 'Protocol'):
     Protocol = typing.Protocol
 # 3.7
-elif PEP_560:
+else:
 
     def _no_init(self, *args, **kwargs):
         if type(self)._is_protocol:
             raise TypeError('Protocols cannot be instantiated')
 
-    class _ProtocolMeta(abc.ABCMeta):
+    class _ProtocolMeta(abc.ABCMeta):  # noqa: B024
         # This metaclass is a bit unfortunate and exists only because of the lack
         # of __instancehook__.
         def __instancecheck__(cls, instance):
@@ -674,43 +550,13 @@ elif PEP_560:
             return typing._GenericAlias(cls, params)
 
         def __init_subclass__(cls, *args, **kwargs):
-            tvars = []
             if '__orig_bases__' in cls.__dict__:
                 error = typing.Generic in cls.__orig_bases__
             else:
                 error = typing.Generic in cls.__bases__
             if error:
                 raise TypeError("Cannot inherit from plain Generic")
-            if '__orig_bases__' in cls.__dict__:
-                tvars = typing._collect_type_vars(cls.__orig_bases__)
-                # Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn].
-                # If found, tvars must be a subset of it.
-                # If not found, tvars is it.
-                # Also check for and reject plain Generic,
-                # and reject multiple Generic[...] and/or Protocol[...].
-                gvars = None
-                for base in cls.__orig_bases__:
-                    if (isinstance(base, typing._GenericAlias) and
-                            base.__origin__ in (typing.Generic, Protocol)):
-                        # for error messages
-                        the_base = base.__origin__.__name__
-                        if gvars is not None:
-                            raise TypeError(
-                                "Cannot inherit from Generic[...]"
-                                " and/or Protocol[...] multiple types.")
-                        gvars = base.__parameters__
-                if gvars is None:
-                    gvars = tvars
-                else:
-                    tvarset = set(tvars)
-                    gvarset = set(gvars)
-                    if not tvarset <= gvarset:
-                        s_vars = ', '.join(str(t) for t in tvars if t not in gvarset)
-                        s_args = ', '.join(str(g) for g in gvars)
-                        raise TypeError(f"Some type variables ({s_vars}) are"
-                                        f" not listed in {the_base}[{s_args}]")
-                    tvars = gvars
-            cls.__parameters__ = tuple(tvars)
+            _maybe_adjust_parameters(cls)
 
             # Determine if this is a protocol or a concrete subclass.
             if not cls.__dict__.get('_is_protocol', None):
@@ -764,250 +610,12 @@ elif PEP_560:
                     raise TypeError('Protocols can only inherit from other'
                                     f' protocols, got {repr(base)}')
             cls.__init__ = _no_init
-# 3.6
-else:
-    from typing import _next_in_mro, _type_check  # noqa
-
-    def _no_init(self, *args, **kwargs):
-        if type(self)._is_protocol:
-            raise TypeError('Protocols cannot be instantiated')
-
-    class _ProtocolMeta(GenericMeta):
-        """Internal metaclass for Protocol.
-
-        This exists so Protocol classes can be generic without deriving
-        from Generic.
-        """
-        def __new__(cls, name, bases, namespace,
-                    tvars=None, args=None, origin=None, extra=None, orig_bases=None):
-            # This is just a version copied from GenericMeta.__new__ that
-            # includes "Protocol" special treatment. (Comments removed for brevity.)
-            assert extra is None  # Protocols should not have extra
-            if tvars is not None:
-                assert origin is not None
-                assert all(isinstance(t, typing.TypeVar) for t in tvars), tvars
-            else:
-                tvars = _type_vars(bases)
-                gvars = None
-                for base in bases:
-                    if base is typing.Generic:
-                        raise TypeError("Cannot inherit from plain Generic")
-                    if (isinstance(base, GenericMeta) and
-                            base.__origin__ in (typing.Generic, Protocol)):
-                        if gvars is not None:
-                            raise TypeError(
-                                "Cannot inherit from Generic[...] or"
-                                " Protocol[...] multiple times.")
-                        gvars = base.__parameters__
-                if gvars is None:
-                    gvars = tvars
-                else:
-                    tvarset = set(tvars)
-                    gvarset = set(gvars)
-                    if not tvarset <= gvarset:
-                        s_vars = ", ".join(str(t) for t in tvars if t not in gvarset)
-                        s_args = ", ".join(str(g) for g in gvars)
-                        cls_name = "Generic" if any(b.__origin__ is typing.Generic
-                                                    for b in bases) else "Protocol"
-                        raise TypeError(f"Some type variables ({s_vars}) are"
-                                        f" not listed in {cls_name}[{s_args}]")
-                    tvars = gvars
-
-            initial_bases = bases
-            if (extra is not None and type(extra) is abc.ABCMeta and
-                    extra not in bases):
-                bases = (extra,) + bases
-            bases = tuple(_gorg(b) if isinstance(b, GenericMeta) else b
-                          for b in bases)
-            if any(isinstance(b, GenericMeta) and b is not typing.Generic for b in bases):
-                bases = tuple(b for b in bases if b is not typing.Generic)
-            namespace.update({'__origin__': origin, '__extra__': extra})
-            self = super(GenericMeta, cls).__new__(cls, name, bases, namespace,
-                                                   _root=True)
-            super(GenericMeta, self).__setattr__('_gorg',
-                                                 self if not origin else
-                                                 _gorg(origin))
-            self.__parameters__ = tvars
-            self.__args__ = tuple(... if a is typing._TypingEllipsis else
-                                  () if a is typing._TypingEmpty else
-                                  a for a in args) if args else None
-            self.__next_in_mro__ = _next_in_mro(self)
-            if orig_bases is None:
-                self.__orig_bases__ = initial_bases
-            elif origin is not None:
-                self._abc_registry = origin._abc_registry
-                self._abc_cache = origin._abc_cache
-            if hasattr(self, '_subs_tree'):
-                self.__tree_hash__ = (hash(self._subs_tree()) if origin else
-                                      super(GenericMeta, self).__hash__())
-            return self
-
-        def __init__(cls, *args, **kwargs):
-            super().__init__(*args, **kwargs)
-            if not cls.__dict__.get('_is_protocol', None):
-                cls._is_protocol = any(b is Protocol or
-                                       isinstance(b, _ProtocolMeta) and
-                                       b.__origin__ is Protocol
-                                       for b in cls.__bases__)
-            if cls._is_protocol:
-                for base in cls.__mro__[1:]:
-                    if not (base in (object, typing.Generic) or
-                            base.__module__ == 'collections.abc' and
-                            base.__name__ in _PROTO_WHITELIST or
-                            isinstance(base, typing.TypingMeta) and base._is_protocol or
-                            isinstance(base, GenericMeta) and
-                            base.__origin__ is typing.Generic):
-                        raise TypeError(f'Protocols can only inherit from other'
-                                        f' protocols, got {repr(base)}')
-
-                cls.__init__ = _no_init
-
-            def _proto_hook(other):
-                if not cls.__dict__.get('_is_protocol', None):
-                    return NotImplemented
-                if not isinstance(other, type):
-                    # Same error as for issubclass(1, int)
-                    raise TypeError('issubclass() arg 1 must be a class')
-                for attr in _get_protocol_attrs(cls):
-                    for base in other.__mro__:
-                        if attr in base.__dict__:
-                            if base.__dict__[attr] is None:
-                                return NotImplemented
-                            break
-                        annotations = getattr(base, '__annotations__', {})
-                        if (isinstance(annotations, typing.Mapping) and
-                                attr in annotations and
-                                isinstance(other, _ProtocolMeta) and
-                                other._is_protocol):
-                            break
-                    else:
-                        return NotImplemented
-                return True
-            if '__subclasshook__' not in cls.__dict__:
-                cls.__subclasshook__ = _proto_hook
-
-        def __instancecheck__(self, instance):
-            # We need this method for situations where attributes are
-            # assigned in __init__.
-            if ((not getattr(self, '_is_protocol', False) or
-                    _is_callable_members_only(self)) and
-                    issubclass(instance.__class__, self)):
-                return True
-            if self._is_protocol:
-                if all(hasattr(instance, attr) and
-                        (not callable(getattr(self, attr, None)) or
-                         getattr(instance, attr) is not None)
-                        for attr in _get_protocol_attrs(self)):
-                    return True
-            return super(GenericMeta, self).__instancecheck__(instance)
-
-        def __subclasscheck__(self, cls):
-            if self.__origin__ is not None:
-                if sys._getframe(1).f_globals['__name__'] not in ['abc', 'functools']:
-                    raise TypeError("Parameterized generics cannot be used with class "
-                                    "or instance checks")
-                return False
-            if (self.__dict__.get('_is_protocol', None) and
-                    not self.__dict__.get('_is_runtime_protocol', None)):
-                if sys._getframe(1).f_globals['__name__'] in ['abc',
-                                                              'functools',
-                                                              'typing']:
-                    return False
-                raise TypeError("Instance and class checks can only be used with"
-                                " @runtime protocols")
-            if (self.__dict__.get('_is_runtime_protocol', None) and
-                    not _is_callable_members_only(self)):
-                if sys._getframe(1).f_globals['__name__'] in ['abc',
-                                                              'functools',
-                                                              'typing']:
-                    return super(GenericMeta, self).__subclasscheck__(cls)
-                raise TypeError("Protocols with non-method members"
-                                " don't support issubclass()")
-            return super(GenericMeta, self).__subclasscheck__(cls)
-
-        @typing._tp_cache
-        def __getitem__(self, params):
-            # We also need to copy this from GenericMeta.__getitem__ to get
-            # special treatment of "Protocol". (Comments removed for brevity.)
-            if not isinstance(params, tuple):
-                params = (params,)
-            if not params and _gorg(self) is not typing.Tuple:
-                raise TypeError(
-                    f"Parameter list to {self.__qualname__}[...] cannot be empty")
-            msg = "Parameters to generic types must be types."
-            params = tuple(_type_check(p, msg) for p in params)
-            if self in (typing.Generic, Protocol):
-                if not all(isinstance(p, typing.TypeVar) for p in params):
-                    raise TypeError(
-                        f"Parameters to {repr(self)}[...] must all be type variables")
-                if len(set(params)) != len(params):
-                    raise TypeError(
-                        f"Parameters to {repr(self)}[...] must all be unique")
-                tvars = params
-                args = params
-            elif self in (typing.Tuple, typing.Callable):
-                tvars = _type_vars(params)
-                args = params
-            elif self.__origin__ in (typing.Generic, Protocol):
-                raise TypeError(f"Cannot subscript already-subscripted {repr(self)}")
-            else:
-                _check_generic(self, params, len(self.__parameters__))
-                tvars = _type_vars(params)
-                args = params
-
-            prepend = (self,) if self.__origin__ is None else ()
-            return self.__class__(self.__name__,
-                                  prepend + self.__bases__,
-                                  _no_slots_copy(self.__dict__),
-                                  tvars=tvars,
-                                  args=args,
-                                  origin=self,
-                                  extra=self.__extra__,
-                                  orig_bases=self.__orig_bases__)
-
-    class Protocol(metaclass=_ProtocolMeta):
-        """Base class for protocol classes. Protocol classes are defined as::
-
-          class Proto(Protocol):
-              def meth(self) -> int:
-                  ...
-
-        Such classes are primarily used with static type checkers that recognize
-        structural subtyping (static duck-typing), for example::
-
-          class C:
-              def meth(self) -> int:
-                  return 0
-
-          def func(x: Proto) -> int:
-              return x.meth()
-
-          func(C())  # Passes static type check
-
-        See PEP 544 for details. Protocol classes decorated with
-        @typing_extensions.runtime act as simple-minded runtime protocol that checks
-        only the presence of given attributes, ignoring their type signatures.
-
-        Protocol classes can be generic, they are defined as::
-
-          class GenProto(Protocol[T]):
-              def meth(self) -> T:
-                  ...
-        """
-        __slots__ = ()
-        _is_protocol = True
-
-        def __new__(cls, *args, **kwds):
-            if _gorg(cls) is Protocol:
-                raise TypeError("Type Protocol cannot be instantiated; "
-                                "it can be used only as a base class")
-            return typing._generic_new(cls.__next_in_mro__, cls, *args, **kwds)
 
 
 # 3.8+
 if hasattr(typing, 'runtime_checkable'):
     runtime_checkable = typing.runtime_checkable
-# 3.6-3.7
+# 3.7
 else:
     def runtime_checkable(cls):
         """Mark a protocol class as a runtime protocol, so that it
@@ -1031,7 +639,7 @@ runtime = runtime_checkable
 # 3.8+
 if hasattr(typing, 'SupportsIndex'):
     SupportsIndex = typing.SupportsIndex
-# 3.6-3.7
+# 3.7
 else:
     @runtime_checkable
     class SupportsIndex(Protocol):
@@ -1049,6 +657,7 @@ if hasattr(typing, "Required"):
     # keyword with old-style TypedDict().  See https://bugs.python.org/issue42059
     # The standard library TypedDict below Python 3.11 does not store runtime
     # information about optional and required keys when using Required or NotRequired.
+    # Generic TypedDicts are also impossible using typing.TypedDict on Python <3.11.
     TypedDict = typing.TypedDict
     _TypedDictMeta = typing._TypedDictMeta
     is_typeddict = typing.is_typeddict
@@ -1131,8 +740,16 @@ else:
             # Subclasses and instances of TypedDict return actual dictionaries
             # via _dict_new.
             ns['__new__'] = _typeddict_new if name == 'TypedDict' else _dict_new
+            # Don't insert typing.Generic into __bases__ here,
+            # or Generic.__init_subclass__ will raise TypeError
+            # in the super().__new__() call.
+            # Instead, monkey-patch __bases__ onto the class after it's been created.
             tp_dict = super().__new__(cls, name, (dict,), ns)
 
+            if any(issubclass(base, typing.Generic) for base in bases):
+                tp_dict.__bases__ = (typing.Generic, dict)
+                _maybe_adjust_parameters(tp_dict)
+
             annotations = {}
             own_annotations = ns.get('__annotations__', {})
             msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type"
@@ -1148,29 +765,22 @@ else:
                 optional_keys.update(base.__dict__.get('__optional_keys__', ()))
 
             annotations.update(own_annotations)
-            if PEP_560:
-                for annotation_key, annotation_type in own_annotations.items():
-                    annotation_origin = get_origin(annotation_type)
-                    if annotation_origin is Annotated:
-                        annotation_args = get_args(annotation_type)
-                        if annotation_args:
-                            annotation_type = annotation_args[0]
-                            annotation_origin = get_origin(annotation_type)
+            for annotation_key, annotation_type in own_annotations.items():
+                annotation_origin = get_origin(annotation_type)
+                if annotation_origin is Annotated:
+                    annotation_args = get_args(annotation_type)
+                    if annotation_args:
+                        annotation_type = annotation_args[0]
+                        annotation_origin = get_origin(annotation_type)
 
-                    if annotation_origin is Required:
-                        required_keys.add(annotation_key)
-                    elif annotation_origin is NotRequired:
-                        optional_keys.add(annotation_key)
-                    elif total:
-                        required_keys.add(annotation_key)
-                    else:
-                        optional_keys.add(annotation_key)
-            else:
-                own_annotation_keys = set(own_annotations.keys())
-                if total:
-                    required_keys.update(own_annotation_keys)
+                if annotation_origin is Required:
+                    required_keys.add(annotation_key)
+                elif annotation_origin is NotRequired:
+                    optional_keys.add(annotation_key)
+                elif total:
+                    required_keys.add(annotation_key)
                 else:
-                    optional_keys.update(own_annotation_keys)
+                    optional_keys.add(annotation_key)
 
             tp_dict.__annotations__ = annotations
             tp_dict.__required_keys__ = frozenset(required_keys)
@@ -1231,9 +841,30 @@ else:
         """
         return isinstance(tp, tuple(_TYPEDDICT_TYPES))
 
+
+if hasattr(typing, "assert_type"):
+    assert_type = typing.assert_type
+
+else:
+    def assert_type(__val, __typ):
+        """Assert (to the type checker) that the value is of the given type.
+
+        When the type checker encounters a call to assert_type(), it
+        emits an error if the value is not of the specified type::
+
+            def greet(name: str) -> None:
+                assert_type(name, str)  # ok
+                assert_type(name, int)  # type checker error
+
+        At runtime this returns the first argument unchanged and otherwise
+        does nothing.
+        """
+        return __val
+
+
 if hasattr(typing, "Required"):
     get_type_hints = typing.get_type_hints
-elif PEP_560:
+else:
     import functools
     import types
 
@@ -1312,7 +943,7 @@ if hasattr(typing, 'Annotated'):
     # to work.
     _AnnotatedAlias = typing._AnnotatedAlias
 # 3.7-3.8
-elif PEP_560:
+else:
     class _AnnotatedAlias(typing._GenericAlias, _root=True):
         """Runtime representation of an annotated type.
 
@@ -1409,152 +1040,6 @@ elif PEP_560:
             raise TypeError(
                 f"Cannot subclass {cls.__module__}.Annotated"
             )
-# 3.6
-else:
-
-    def _is_dunder(name):
-        """Returns True if name is a __dunder_variable_name__."""
-        return len(name) > 4 and name.startswith('__') and name.endswith('__')
-
-    # Prior to Python 3.7 types did not have `copy_with`. A lot of the equality
-    # checks, argument expansion etc. are done on the _subs_tre. As a result we
-    # can't provide a get_type_hints function that strips out annotations.
-
-    class AnnotatedMeta(typing.GenericMeta):
-        """Metaclass for Annotated"""
-
-        def __new__(cls, name, bases, namespace, **kwargs):
-            if any(b is not object for b in bases):
-                raise TypeError("Cannot subclass " + str(Annotated))
-            return super().__new__(cls, name, bases, namespace, **kwargs)
-
-        @property
-        def __metadata__(self):
-            return self._subs_tree()[2]
-
-        def _tree_repr(self, tree):
-            cls, origin, metadata = tree
-            if not isinstance(origin, tuple):
-                tp_repr = typing._type_repr(origin)
-            else:
-                tp_repr = origin[0]._tree_repr(origin)
-            metadata_reprs = ", ".join(repr(arg) for arg in metadata)
-            return f'{cls}[{tp_repr}, {metadata_reprs}]'
-
-        def _subs_tree(self, tvars=None, args=None):  # noqa
-            if self is Annotated:
-                return Annotated
-            res = super()._subs_tree(tvars=tvars, args=args)
-            # Flatten nested Annotated
-            if isinstance(res[1], tuple) and res[1][0] is Annotated:
-                sub_tp = res[1][1]
-                sub_annot = res[1][2]
-                return (Annotated, sub_tp, sub_annot + res[2])
-            return res
-
-        def _get_cons(self):
-            """Return the class used to create instance of this type."""
-            if self.__origin__ is None:
-                raise TypeError("Cannot get the underlying type of a "
-                                "non-specialized Annotated type.")
-            tree = self._subs_tree()
-            while isinstance(tree, tuple) and tree[0] is Annotated:
-                tree = tree[1]
-            if isinstance(tree, tuple):
-                return tree[0]
-            else:
-                return tree
-
-        @typing._tp_cache
-        def __getitem__(self, params):
-            if not isinstance(params, tuple):
-                params = (params,)
-            if self.__origin__ is not None:  # specializing an instantiated type
-                return super().__getitem__(params)
-            elif not isinstance(params, tuple) or len(params) < 2:
-                raise TypeError("Annotated[...] should be instantiated "
-                                "with at least two arguments (a type and an "
-                                "annotation).")
-            else:
-                if (
-                    isinstance(params[0], typing._TypingBase) and
-                    type(params[0]).__name__ == "_ClassVar"
-                ):
-                    tp = params[0]
-                else:
-                    msg = "Annotated[t, ...]: t must be a type."
-                    tp = typing._type_check(params[0], msg)
-                metadata = tuple(params[1:])
-            return self.__class__(
-                self.__name__,
-                self.__bases__,
-                _no_slots_copy(self.__dict__),
-                tvars=_type_vars((tp,)),
-                # Metadata is a tuple so it won't be touched by _replace_args et al.
-                args=(tp, metadata),
-                origin=self,
-            )
-
-        def __call__(self, *args, **kwargs):
-            cons = self._get_cons()
-            result = cons(*args, **kwargs)
-            try:
-                result.__orig_class__ = self
-            except AttributeError:
-                pass
-            return result
-
-        def __getattr__(self, attr):
-            # For simplicity we just don't relay all dunder names
-            if self.__origin__ is not None and not _is_dunder(attr):
-                return getattr(self._get_cons(), attr)
-            raise AttributeError(attr)
-
-        def __setattr__(self, attr, value):
-            if _is_dunder(attr) or attr.startswith('_abc_'):
-                super().__setattr__(attr, value)
-            elif self.__origin__ is None:
-                raise AttributeError(attr)
-            else:
-                setattr(self._get_cons(), attr, value)
-
-        def __instancecheck__(self, obj):
-            raise TypeError("Annotated cannot be used with isinstance().")
-
-        def __subclasscheck__(self, cls):
-            raise TypeError("Annotated cannot be used with issubclass().")
-
-    class Annotated(metaclass=AnnotatedMeta):
-        """Add context specific metadata to a type.
-
-        Example: Annotated[int, runtime_check.Unsigned] indicates to the
-        hypothetical runtime_check module that this type is an unsigned int.
-        Every other consumer of this type can ignore this metadata and treat
-        this type as int.
-
-        The first argument to Annotated must be a valid type, the remaining
-        arguments are kept as a tuple in the __metadata__ field.
-
-        Details:
-
-        - It's an error to call `Annotated` with less than two arguments.
-        - Nested Annotated are flattened::
-
-            Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3]
-
-        - Instantiating an annotated type is equivalent to instantiating the
-        underlying type::
-
-            Annotated[C, Ann1](5) == C(5)
-
-        - Annotated can be used as a generic type alias::
-
-            Optimized = Annotated[T, runtime.Optimize()]
-            Optimized[int] == Annotated[int, runtime.Optimize()]
-
-            OptimizedList = Annotated[List[T], runtime.Optimize()]
-            OptimizedList[int] == Annotated[List[int], runtime.Optimize()]
-        """
 
 # Python 3.8 has get_origin() and get_args() but those implementations aren't
 # Annotated-aware, so we can't use those. Python 3.9's versions don't support
@@ -1563,7 +1048,7 @@ if sys.version_info[:2] >= (3, 10):
     get_origin = typing.get_origin
     get_args = typing.get_args
 # 3.7-3.9
-elif PEP_560:
+else:
     try:
         # 3.9+
         from typing import _BaseGenericAlias
@@ -1571,9 +1056,9 @@ elif PEP_560:
         _BaseGenericAlias = typing._GenericAlias
     try:
         # 3.9+
-        from typing import GenericAlias
+        from typing import GenericAlias as _typing_GenericAlias
     except ImportError:
-        GenericAlias = typing._GenericAlias
+        _typing_GenericAlias = typing._GenericAlias
 
     def get_origin(tp):
         """Get the unsubscripted version of a type.
@@ -1592,7 +1077,7 @@ elif PEP_560:
         """
         if isinstance(tp, _AnnotatedAlias):
             return Annotated
-        if isinstance(tp, (typing._GenericAlias, GenericAlias, _BaseGenericAlias,
+        if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias, _BaseGenericAlias,
                            ParamSpecArgs, ParamSpecKwargs)):
             return tp.__origin__
         if tp is typing.Generic:
@@ -1612,7 +1097,7 @@ elif PEP_560:
         """
         if isinstance(tp, _AnnotatedAlias):
             return (tp.__origin__,) + tp.__metadata__
-        if isinstance(tp, (typing._GenericAlias, GenericAlias)):
+        if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias)):
             if getattr(tp, "_special", False):
                 return ()
             res = tp.__args__
@@ -1645,7 +1130,7 @@ elif sys.version_info[:2] >= (3, 9):
         """
         raise TypeError(f"{self} is not subscriptable")
 # 3.7-3.8
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _TypeAliasForm(typing._SpecialForm, _root=True):
         def __repr__(self):
             return 'typing_extensions.' + self._name
@@ -1661,44 +1146,51 @@ elif sys.version_info[:2] >= (3, 7):
 
                                It's invalid when used anywhere except as in the example
                                above.""")
-# 3.6
-else:
-    class _TypeAliasMeta(typing.TypingMeta):
-        """Metaclass for TypeAlias"""
 
-        def __repr__(self):
-            return 'typing_extensions.TypeAlias'
 
-    class _TypeAliasBase(typing._FinalTypingBase, metaclass=_TypeAliasMeta, _root=True):
-        """Special marker indicating that an assignment should
-        be recognized as a proper type alias definition by type
-        checkers.
+class _DefaultMixin:
+    """Mixin for TypeVarLike defaults."""
 
-        For example::
+    __slots__ = ()
 
-            Predicate: TypeAlias = Callable[..., bool]
+    def __init__(self, default):
+        if isinstance(default, (tuple, list)):
+            self.__default__ = tuple((typing._type_check(d, "Default must be a type")
+                                      for d in default))
+        elif default:
+            self.__default__ = typing._type_check(default, "Default must be a type")
+        else:
+            self.__default__ = None
 
-        It's invalid when used anywhere except as in the example above.
-        """
-        __slots__ = ()
 
-        def __instancecheck__(self, obj):
-            raise TypeError("TypeAlias cannot be used with isinstance().")
+# Add default and infer_variance parameters from PEP 696 and 695
+class TypeVar(typing.TypeVar, _DefaultMixin, _root=True):
+    """Type variable."""
 
-        def __subclasscheck__(self, cls):
-            raise TypeError("TypeAlias cannot be used with issubclass().")
+    __module__ = 'typing'
 
-        def __repr__(self):
-            return 'typing_extensions.TypeAlias'
+    def __init__(self, name, *constraints, bound=None,
+                 covariant=False, contravariant=False,
+                 default=None, infer_variance=False):
+        super().__init__(name, *constraints, bound=bound, covariant=covariant,
+                         contravariant=contravariant)
+        _DefaultMixin.__init__(self, default)
+        self.__infer_variance__ = infer_variance
 
-    TypeAlias = _TypeAliasBase(_root=True)
+        # for pickling:
+        try:
+            def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
+        except (AttributeError, ValueError):
+            def_mod = None
+        if def_mod != 'typing_extensions':
+            self.__module__ = def_mod
 
 
 # Python 3.10+ has PEP 612
 if hasattr(typing, 'ParamSpecArgs'):
     ParamSpecArgs = typing.ParamSpecArgs
     ParamSpecKwargs = typing.ParamSpecKwargs
-# 3.6-3.9
+# 3.7-3.9
 else:
     class _Immutable:
         """Mixin to indicate that object should not be copied."""
@@ -1758,12 +1250,32 @@ else:
 
 # 3.10+
 if hasattr(typing, 'ParamSpec'):
-    ParamSpec = typing.ParamSpec
-# 3.6-3.9
+
+    # Add default Parameter - PEP 696
+    class ParamSpec(typing.ParamSpec, _DefaultMixin, _root=True):
+        """Parameter specification variable."""
+
+        __module__ = 'typing'
+
+        def __init__(self, name, *, bound=None, covariant=False, contravariant=False,
+                     default=None):
+            super().__init__(name, bound=bound, covariant=covariant,
+                             contravariant=contravariant)
+            _DefaultMixin.__init__(self, default)
+
+            # for pickling:
+            try:
+                def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
+            except (AttributeError, ValueError):
+                def_mod = None
+            if def_mod != 'typing_extensions':
+                self.__module__ = def_mod
+
+# 3.7-3.9
 else:
 
     # Inherits from list as a workaround for Callable checks in Python < 3.9.2.
-    class ParamSpec(list):
+    class ParamSpec(list, _DefaultMixin):
         """Parameter specification variable.
 
         Usage::
@@ -1821,7 +1333,8 @@ else:
         def kwargs(self):
             return ParamSpecKwargs(self)
 
-        def __init__(self, name, *, bound=None, covariant=False, contravariant=False):
+        def __init__(self, name, *, bound=None, covariant=False, contravariant=False,
+                     default=None):
             super().__init__([self])
             self.__name__ = name
             self.__covariant__ = bool(covariant)
@@ -1830,6 +1343,7 @@ else:
                 self.__bound__ = typing._type_check(bound, 'Bound must be a type.')
             else:
                 self.__bound__ = None
+            _DefaultMixin.__init__(self, default)
 
             # for pickling:
             try:
@@ -1861,28 +1375,17 @@ else:
         def __call__(self, *args, **kwargs):
             pass
 
-        if not PEP_560:
-            # Only needed in 3.6.
-            def _get_type_vars(self, tvars):
-                if self not in tvars:
-                    tvars.append(self)
 
-
-# 3.6-3.9
+# 3.7-3.9
 if not hasattr(typing, 'Concatenate'):
     # Inherits from list as a workaround for Callable checks in Python < 3.9.2.
     class _ConcatenateGenericAlias(list):
 
         # Trick Generic into looking into this for __parameters__.
-        if PEP_560:
-            __class__ = typing._GenericAlias
-        else:
-            __class__ = typing._TypingBase
+        __class__ = typing._GenericAlias
 
         # Flag in 3.8.
         _special = False
-        # Attribute in 3.6 and earlier.
-        _gorg = typing.Generic
 
         def __init__(self, origin, args):
             super().__init__(args)
@@ -1907,14 +1410,8 @@ if not hasattr(typing, 'Concatenate'):
                 tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec))
             )
 
-        if not PEP_560:
-            # Only required in 3.6.
-            def _get_type_vars(self, tvars):
-                if self.__origin__ and self.__parameters__:
-                    typing._get_type_vars(self.__parameters__, tvars)
 
-
-# 3.6-3.9
+# 3.7-3.9
 @typing._tp_cache
 def _concatenate_getitem(self, parameters):
     if parameters == ():
@@ -1949,7 +1446,7 @@ elif sys.version_info[:2] >= (3, 9):
         """
         return _concatenate_getitem(self, parameters)
 # 3.7-8
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _ConcatenateForm(typing._SpecialForm, _root=True):
         def __repr__(self):
             return 'typing_extensions.' + self._name
@@ -1969,42 +1466,6 @@ elif sys.version_info[:2] >= (3, 7):
 
         See PEP 612 for detailed information.
         """)
-# 3.6
-else:
-    class _ConcatenateAliasMeta(typing.TypingMeta):
-        """Metaclass for Concatenate."""
-
-        def __repr__(self):
-            return 'typing_extensions.Concatenate'
-
-    class _ConcatenateAliasBase(typing._FinalTypingBase,
-                                metaclass=_ConcatenateAliasMeta,
-                                _root=True):
-        """Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
-        higher order function which adds, removes or transforms parameters of a
-        callable.
-
-        For example::
-
-           Callable[Concatenate[int, P], int]
-
-        See PEP 612 for detailed information.
-        """
-        __slots__ = ()
-
-        def __instancecheck__(self, obj):
-            raise TypeError("Concatenate cannot be used with isinstance().")
-
-        def __subclasscheck__(self, cls):
-            raise TypeError("Concatenate cannot be used with issubclass().")
-
-        def __repr__(self):
-            return 'typing_extensions.Concatenate'
-
-        def __getitem__(self, parameters):
-            return _concatenate_getitem(self, parameters)
-
-    Concatenate = _ConcatenateAliasBase(_root=True)
 
 # 3.10+
 if hasattr(typing, 'TypeGuard'):
@@ -2059,10 +1520,10 @@ elif sys.version_info[:2] >= (3, 9):
         ``TypeGuard`` also works with type variables.  For more information, see
         PEP 647 (User-Defined Type Guards).
         """
-        item = typing._type_check(parameters, f'{self} accepts only single type.')
+        item = typing._type_check(parameters, f'{self} accepts only a single type.')
         return typing._GenericAlias(self, (item,))
 # 3.7-3.8
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _TypeGuardForm(typing._SpecialForm, _root=True):
 
         def __repr__(self):
@@ -2117,138 +1578,55 @@ elif sys.version_info[:2] >= (3, 7):
         ``TypeGuard`` also works with type variables.  For more information, see
         PEP 647 (User-Defined Type Guards).
         """)
-# 3.6
-else:
-    class _TypeGuard(typing._FinalTypingBase, _root=True):
-        """Special typing form used to annotate the return type of a user-defined
-        type guard function.  ``TypeGuard`` only accepts a single type argument.
-        At runtime, functions marked this way should return a boolean.
-
-        ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
-        type checkers to determine a more precise type of an expression within a
-        program's code flow.  Usually type narrowing is done by analyzing
-        conditional code flow and applying the narrowing to a block of code.  The
-        conditional expression here is sometimes referred to as a "type guard".
-
-        Sometimes it would be convenient to use a user-defined boolean function
-        as a type guard.  Such a function should use ``TypeGuard[...]`` as its
-        return type to alert static type checkers to this intention.
-
-        Using  ``-> TypeGuard`` tells the static type checker that for a given
-        function:
-
-        1. The return value is a boolean.
-        2. If the return value is ``True``, the type of its argument
-        is the type inside ``TypeGuard``.
-
-        For example::
-
-            def is_str(val: Union[str, float]):
-                # "isinstance" type guard
-                if isinstance(val, str):
-                    # Type of ``val`` is narrowed to ``str``
-                    ...
-                else:
-                    # Else, type of ``val`` is narrowed to ``float``.
-                    ...
-
-        Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
-        form of ``TypeA`` (it can even be a wider form) and this may lead to
-        type-unsafe results.  The main reason is to allow for things like
-        narrowing ``List[object]`` to ``List[str]`` even though the latter is not
-        a subtype of the former, since ``List`` is invariant.  The responsibility of
-        writing type-safe type guards is left to the user.
-
-        ``TypeGuard`` also works with type variables.  For more information, see
-        PEP 647 (User-Defined Type Guards).
-        """
-
-        __slots__ = ('__type__',)
-
-        def __init__(self, tp=None, **kwds):
-            self.__type__ = tp
-
-        def __getitem__(self, item):
-            cls = type(self)
-            if self.__type__ is None:
-                return cls(typing._type_check(item,
-                           f'{cls.__name__[1:]} accepts only a single type.'),
-                           _root=True)
-            raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted')
-
-        def _eval_type(self, globalns, localns):
-            new_tp = typing._eval_type(self.__type__, globalns, localns)
-            if new_tp == self.__type__:
-                return self
-            return type(self)(new_tp, _root=True)
-
-        def __repr__(self):
-            r = super().__repr__()
-            if self.__type__ is not None:
-                r += f'[{typing._type_repr(self.__type__)}]'
-            return r
-
-        def __hash__(self):
-            return hash((type(self).__name__, self.__type__))
-
-        def __eq__(self, other):
-            if not isinstance(other, _TypeGuard):
-                return NotImplemented
-            if self.__type__ is not None:
-                return self.__type__ == other.__type__
-            return self is other
-
-    TypeGuard = _TypeGuard(_root=True)
 
 
-if sys.version_info[:2] >= (3, 7):
-    # Vendored from cpython typing._SpecialFrom
-    class _SpecialForm(typing._Final, _root=True):
-        __slots__ = ('_name', '__doc__', '_getitem')
+# Vendored from cpython typing._SpecialFrom
+class _SpecialForm(typing._Final, _root=True):
+    __slots__ = ('_name', '__doc__', '_getitem')
 
-        def __init__(self, getitem):
-            self._getitem = getitem
-            self._name = getitem.__name__
-            self.__doc__ = getitem.__doc__
+    def __init__(self, getitem):
+        self._getitem = getitem
+        self._name = getitem.__name__
+        self.__doc__ = getitem.__doc__
 
-        def __getattr__(self, item):
-            if item in {'__name__', '__qualname__'}:
-                return self._name
-
-            raise AttributeError(item)
-
-        def __mro_entries__(self, bases):
-            raise TypeError(f"Cannot subclass {self!r}")
-
-        def __repr__(self):
-            return f'typing_extensions.{self._name}'
-
-        def __reduce__(self):
+    def __getattr__(self, item):
+        if item in {'__name__', '__qualname__'}:
             return self._name
 
-        def __call__(self, *args, **kwds):
-            raise TypeError(f"Cannot instantiate {self!r}")
+        raise AttributeError(item)
 
-        def __or__(self, other):
-            return typing.Union[self, other]
+    def __mro_entries__(self, bases):
+        raise TypeError(f"Cannot subclass {self!r}")
 
-        def __ror__(self, other):
-            return typing.Union[other, self]
+    def __repr__(self):
+        return f'typing_extensions.{self._name}'
 
-        def __instancecheck__(self, obj):
-            raise TypeError(f"{self} cannot be used with isinstance()")
+    def __reduce__(self):
+        return self._name
 
-        def __subclasscheck__(self, cls):
-            raise TypeError(f"{self} cannot be used with issubclass()")
+    def __call__(self, *args, **kwds):
+        raise TypeError(f"Cannot instantiate {self!r}")
 
-        @typing._tp_cache
-        def __getitem__(self, parameters):
-            return self._getitem(self, parameters)
+    def __or__(self, other):
+        return typing.Union[self, other]
+
+    def __ror__(self, other):
+        return typing.Union[other, self]
+
+    def __instancecheck__(self, obj):
+        raise TypeError(f"{self} cannot be used with isinstance()")
+
+    def __subclasscheck__(self, cls):
+        raise TypeError(f"{self} cannot be used with issubclass()")
+
+    @typing._tp_cache
+    def __getitem__(self, parameters):
+        return self._getitem(self, parameters)
 
 
 if hasattr(typing, "LiteralString"):
     LiteralString = typing.LiteralString
-elif sys.version_info[:2] >= (3, 7):
+else:
     @_SpecialForm
     def LiteralString(self, params):
         """Represents an arbitrary literal string.
@@ -2267,38 +1645,11 @@ elif sys.version_info[:2] >= (3, 7):
 
         """
         raise TypeError(f"{self} is not subscriptable")
-else:
-    class _LiteralString(typing._FinalTypingBase, _root=True):
-        """Represents an arbitrary literal string.
-
-        Example::
-
-          from typing_extensions import LiteralString
-
-          def query(sql: LiteralString) -> ...:
-              ...
-
-          query("SELECT * FROM table")  # ok
-          query(f"SELECT * FROM {input()}")  # not ok
-
-        See PEP 675 for details.
-
-        """
-
-        __slots__ = ()
-
-        def __instancecheck__(self, obj):
-            raise TypeError(f"{self} cannot be used with isinstance().")
-
-        def __subclasscheck__(self, cls):
-            raise TypeError(f"{self} cannot be used with issubclass().")
-
-    LiteralString = _LiteralString(_root=True)
 
 
 if hasattr(typing, "Self"):
     Self = typing.Self
-elif sys.version_info[:2] >= (3, 7):
+else:
     @_SpecialForm
     def Self(self, params):
         """Used to spell the type of "self" in classes.
@@ -2315,35 +1666,11 @@ elif sys.version_info[:2] >= (3, 7):
         """
 
         raise TypeError(f"{self} is not subscriptable")
-else:
-    class _Self(typing._FinalTypingBase, _root=True):
-        """Used to spell the type of "self" in classes.
-
-        Example::
-
-          from typing import Self
-
-          class ReturnsSelf:
-              def parse(self, data: bytes) -> Self:
-                  ...
-                  return self
-
-        """
-
-        __slots__ = ()
-
-        def __instancecheck__(self, obj):
-            raise TypeError(f"{self} cannot be used with isinstance().")
-
-        def __subclasscheck__(self, cls):
-            raise TypeError(f"{self} cannot be used with issubclass().")
-
-    Self = _Self(_root=True)
 
 
 if hasattr(typing, "Never"):
     Never = typing.Never
-elif sys.version_info[:2] >= (3, 7):
+else:
     @_SpecialForm
     def Never(self, params):
         """The bottom type, a type that has no members.
@@ -2369,39 +1696,6 @@ elif sys.version_info[:2] >= (3, 7):
         """
 
         raise TypeError(f"{self} is not subscriptable")
-else:
-    class _Never(typing._FinalTypingBase, _root=True):
-        """The bottom type, a type that has no members.
-
-        This can be used to define a function that should never be
-        called, or a function that never returns::
-
-            from typing_extensions import Never
-
-            def never_call_me(arg: Never) -> None:
-                pass
-
-            def int_or_str(arg: int | str) -> None:
-                never_call_me(arg)  # type checker error
-                match arg:
-                    case int():
-                        print("It's an int")
-                    case str():
-                        print("It's a str")
-                    case _:
-                        never_call_me(arg)  # ok, arg is of type Never
-
-        """
-
-        __slots__ = ()
-
-        def __instancecheck__(self, obj):
-            raise TypeError(f"{self} cannot be used with isinstance().")
-
-        def __subclasscheck__(self, cls):
-            raise TypeError(f"{self} cannot be used with issubclass().")
-
-    Never = _Never(_root=True)
 
 
 if hasattr(typing, 'Required'):
@@ -2429,7 +1723,7 @@ elif sys.version_info[:2] >= (3, 9):
         There is no runtime checking that a required key is actually provided
         when instantiating a related TypedDict.
         """
-        item = typing._type_check(parameters, f'{self._name} accepts only single type')
+        item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
         return typing._GenericAlias(self, (item,))
 
     @_ExtensionsSpecialForm
@@ -2446,17 +1740,17 @@ elif sys.version_info[:2] >= (3, 9):
                 year=1999,
             )
         """
-        item = typing._type_check(parameters, f'{self._name} accepts only single type')
+        item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
         return typing._GenericAlias(self, (item,))
 
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _RequiredForm(typing._SpecialForm, _root=True):
         def __repr__(self):
             return 'typing_extensions.' + self._name
 
         def __getitem__(self, parameters):
             item = typing._type_check(parameters,
-                                      '{} accepts only single type'.format(self._name))
+                                      f'{self._name} accepts only a single type.')
             return typing._GenericAlias(self, (item,))
 
     Required = _RequiredForm(
@@ -2490,81 +1784,11 @@ elif sys.version_info[:2] >= (3, 7):
                 year=1999,
             )
         """)
-else:
-    # NOTE: Modeled after _Final's implementation when _FinalTypingBase available
-    class _MaybeRequired(typing._FinalTypingBase, _root=True):
-        __slots__ = ('__type__',)
-
-        def __init__(self, tp=None, **kwds):
-            self.__type__ = tp
-
-        def __getitem__(self, item):
-            cls = type(self)
-            if self.__type__ is None:
-                return cls(typing._type_check(item,
-                           '{} accepts only single type.'.format(cls.__name__[1:])),
-                           _root=True)
-            raise TypeError('{} cannot be further subscripted'
-                            .format(cls.__name__[1:]))
-
-        def _eval_type(self, globalns, localns):
-            new_tp = typing._eval_type(self.__type__, globalns, localns)
-            if new_tp == self.__type__:
-                return self
-            return type(self)(new_tp, _root=True)
-
-        def __repr__(self):
-            r = super().__repr__()
-            if self.__type__ is not None:
-                r += '[{}]'.format(typing._type_repr(self.__type__))
-            return r
-
-        def __hash__(self):
-            return hash((type(self).__name__, self.__type__))
-
-        def __eq__(self, other):
-            if not isinstance(other, type(self)):
-                return NotImplemented
-            if self.__type__ is not None:
-                return self.__type__ == other.__type__
-            return self is other
-
-    class _Required(_MaybeRequired, _root=True):
-        """A special typing construct to mark a key of a total=False TypedDict
-        as required. For example:
-
-            class Movie(TypedDict, total=False):
-                title: Required[str]
-                year: int
-
-            m = Movie(
-                title='The Matrix',  # typechecker error if key is omitted
-                year=1999,
-            )
-
-        There is no runtime checking that a required key is actually provided
-        when instantiating a related TypedDict.
-        """
-
-    class _NotRequired(_MaybeRequired, _root=True):
-        """A special typing construct to mark a key of a TypedDict as
-        potentially missing. For example:
-
-            class Movie(TypedDict):
-                title: str
-                year: NotRequired[int]
-
-            m = Movie(
-                title='The Matrix',  # typechecker error if key is omitted
-                year=1999,
-            )
-        """
-
-    Required = _Required(_root=True)
-    NotRequired = _NotRequired(_root=True)
 
 
-if sys.version_info[:2] >= (3, 9):
+if hasattr(typing, "Unpack"):  # 3.11+
+    Unpack = typing.Unpack
+elif sys.version_info[:2] >= (3, 9):
     class _UnpackSpecialForm(typing._SpecialForm, _root=True):
         def __repr__(self):
             return 'typing_extensions.' + self._name
@@ -2584,13 +1808,13 @@ if sys.version_info[:2] >= (3, 9):
             ) -> Array[Batch, Unpack[Shape]]: ...
 
         """
-        item = typing._type_check(parameters, f'{self._name} accepts only single type')
+        item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
         return _UnpackAlias(self, (item,))
 
     def _is_unpack(obj):
         return isinstance(obj, _UnpackAlias)
 
-elif sys.version_info[:2] >= (3, 7):
+else:
     class _UnpackAlias(typing._GenericAlias, _root=True):
         __class__ = typing.TypeVar
 
@@ -2600,7 +1824,7 @@ elif sys.version_info[:2] >= (3, 7):
 
         def __getitem__(self, parameters):
             item = typing._type_check(parameters,
-                                      f'{self._name} accepts only single type')
+                                      f'{self._name} accepts only a single type.')
             return _UnpackAlias(self, (item,))
 
     Unpack = _UnpackForm(
@@ -2619,149 +1843,105 @@ elif sys.version_info[:2] >= (3, 7):
     def _is_unpack(obj):
         return isinstance(obj, _UnpackAlias)
 
+
+if hasattr(typing, "TypeVarTuple"):  # 3.11+
+
+    # Add default Parameter - PEP 696
+    class TypeVarTuple(typing.TypeVarTuple, _DefaultMixin, _root=True):
+        """Type variable tuple."""
+
+        def __init__(self, name, *, default=None):
+            super().__init__(name)
+            _DefaultMixin.__init__(self, default)
+
+            # for pickling:
+            try:
+                def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
+            except (AttributeError, ValueError):
+                def_mod = None
+            if def_mod != 'typing_extensions':
+                self.__module__ = def_mod
+
 else:
-    # NOTE: Modeled after _Final's implementation when _FinalTypingBase available
-    class _Unpack(typing._FinalTypingBase, _root=True):
-        """A special typing construct to unpack a variadic type. For example:
+    class TypeVarTuple(_DefaultMixin):
+        """Type variable tuple.
 
-            Shape = TypeVarTuple('Shape')
-            Batch = NewType('Batch', int)
+        Usage::
 
-            def add_batch_axis(
-                x: Array[Unpack[Shape]]
-            ) -> Array[Batch, Unpack[Shape]]: ...
+            Ts = TypeVarTuple('Ts')
+
+        In the same way that a normal type variable is a stand-in for a single
+        type such as ``int``, a type variable *tuple* is a stand-in for a *tuple*
+        type such as ``Tuple[int, str]``.
+
+        Type variable tuples can be used in ``Generic`` declarations.
+        Consider the following example::
+
+            class Array(Generic[*Ts]): ...
+
+        The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``,
+        where ``T1`` and ``T2`` are type variables. To use these type variables
+        as type parameters of ``Array``, we must *unpack* the type variable tuple using
+        the star operator: ``*Ts``. The signature of ``Array`` then behaves
+        as if we had simply written ``class Array(Generic[T1, T2]): ...``.
+        In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows
+        us to parameterise the class with an *arbitrary* number of type parameters.
+
+        Type variable tuples can be used anywhere a normal ``TypeVar`` can.
+        This includes class definitions, as shown above, as well as function
+        signatures and variable annotations::
+
+            class Array(Generic[*Ts]):
+
+                def __init__(self, shape: Tuple[*Ts]):
+                    self._shape: Tuple[*Ts] = shape
+
+                def get_shape(self) -> Tuple[*Ts]:
+                    return self._shape
+
+            shape = (Height(480), Width(640))
+            x: Array[Height, Width] = Array(shape)
+            y = abs(x)  # Inferred type is Array[Height, Width]
+            z = x + x   #        ...    is Array[Height, Width]
+            x.get_shape()  #     ...    is tuple[Height, Width]
 
         """
-        __slots__ = ('__type__',)
+
+        # Trick Generic __parameters__.
         __class__ = typing.TypeVar
 
-        def __init__(self, tp=None, **kwds):
-            self.__type__ = tp
+        def __iter__(self):
+            yield self.__unpacked__
 
-        def __getitem__(self, item):
-            cls = type(self)
-            if self.__type__ is None:
-                return cls(typing._type_check(item,
-                           'Unpack accepts only single type.'),
-                           _root=True)
-            raise TypeError('Unpack cannot be further subscripted')
+        def __init__(self, name, *, default=None):
+            self.__name__ = name
+            _DefaultMixin.__init__(self, default)
 
-        def _eval_type(self, globalns, localns):
-            new_tp = typing._eval_type(self.__type__, globalns, localns)
-            if new_tp == self.__type__:
-                return self
-            return type(self)(new_tp, _root=True)
+            # for pickling:
+            try:
+                def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
+            except (AttributeError, ValueError):
+                def_mod = None
+            if def_mod != 'typing_extensions':
+                self.__module__ = def_mod
+
+            self.__unpacked__ = Unpack[self]
 
         def __repr__(self):
-            r = super().__repr__()
-            if self.__type__ is not None:
-                r += '[{}]'.format(typing._type_repr(self.__type__))
-            return r
+            return self.__name__
 
         def __hash__(self):
-            return hash((type(self).__name__, self.__type__))
+            return object.__hash__(self)
 
         def __eq__(self, other):
-            if not isinstance(other, _Unpack):
-                return NotImplemented
-            if self.__type__ is not None:
-                return self.__type__ == other.__type__
             return self is other
 
-        # For 3.6 only
-        def _get_type_vars(self, tvars):
-            self.__type__._get_type_vars(tvars)
+        def __reduce__(self):
+            return self.__name__
 
-    Unpack = _Unpack(_root=True)
-
-    def _is_unpack(obj):
-        return isinstance(obj, _Unpack)
-
-
-class TypeVarTuple:
-    """Type variable tuple.
-
-    Usage::
-
-        Ts = TypeVarTuple('Ts')
-
-    In the same way that a normal type variable is a stand-in for a single
-    type such as ``int``, a type variable *tuple* is a stand-in for a *tuple* type such as
-    ``Tuple[int, str]``.
-
-    Type variable tuples can be used in ``Generic`` declarations.
-    Consider the following example::
-
-        class Array(Generic[*Ts]): ...
-
-    The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``,
-    where ``T1`` and ``T2`` are type variables. To use these type variables
-    as type parameters of ``Array``, we must *unpack* the type variable tuple using
-    the star operator: ``*Ts``. The signature of ``Array`` then behaves
-    as if we had simply written ``class Array(Generic[T1, T2]): ...``.
-    In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows
-    us to parameterise the class with an *arbitrary* number of type parameters.
-
-    Type variable tuples can be used anywhere a normal ``TypeVar`` can.
-    This includes class definitions, as shown above, as well as function
-    signatures and variable annotations::
-
-        class Array(Generic[*Ts]):
-
-            def __init__(self, shape: Tuple[*Ts]):
-                self._shape: Tuple[*Ts] = shape
-
-            def get_shape(self) -> Tuple[*Ts]:
-                return self._shape
-
-        shape = (Height(480), Width(640))
-        x: Array[Height, Width] = Array(shape)
-        y = abs(x)  # Inferred type is Array[Height, Width]
-        z = x + x   #        ...    is Array[Height, Width]
-        x.get_shape()  #     ...    is tuple[Height, Width]
-
-    """
-
-    # Trick Generic __parameters__.
-    __class__ = typing.TypeVar
-
-    def __iter__(self):
-        yield self.__unpacked__
-
-    def __init__(self, name):
-        self.__name__ = name
-
-        # for pickling:
-        try:
-            def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
-        except (AttributeError, ValueError):
-            def_mod = None
-        if def_mod != 'typing_extensions':
-            self.__module__ = def_mod
-
-        self.__unpacked__ = Unpack[self]
-
-    def __repr__(self):
-        return self.__name__
-
-    def __hash__(self):
-        return object.__hash__(self)
-
-    def __eq__(self, other):
-        return self is other
-
-    def __reduce__(self):
-        return self.__name__
-
-    def __init_subclass__(self, *args, **kwds):
-        if '_root' not in kwds:
-            raise TypeError("Cannot subclass special typing classes")
-
-    if not PEP_560:
-        # Only needed in 3.6.
-        def _get_type_vars(self, tvars):
-            if self not in tvars:
-                tvars.append(self)
+        def __init_subclass__(self, *args, **kwds):
+            if '_root' not in kwds:
+                raise TypeError("Cannot subclass special typing classes")
 
 
 if hasattr(typing, "reveal_type"):
@@ -2821,10 +2001,11 @@ else:
         eq_default: bool = True,
         order_default: bool = False,
         kw_only_default: bool = False,
-        field_descriptors: typing.Tuple[
+        field_specifiers: typing.Tuple[
             typing.Union[typing.Type[typing.Any], typing.Callable[..., typing.Any]],
             ...
         ] = (),
+        **kwargs: typing.Any,
     ) -> typing.Callable[[T], T]:
         """Decorator that marks a function, class, or metaclass as providing
         dataclass-like behavior.
@@ -2876,8 +2057,8 @@ else:
           assumed to be True or False if it is omitted by the caller.
         - ``kw_only_default`` indicates whether the ``kw_only`` parameter is
           assumed to be True or False if it is omitted by the caller.
-        - ``field_descriptors`` specifies a static list of supported classes
-          or functions, that describe fields, similar to ``dataclasses.field()``.
+        - ``field_specifiers`` specifies a static list of supported classes
+          or functions that describe fields, similar to ``dataclasses.field()``.
 
         At runtime, this decorator records its arguments in the
         ``__dataclass_transform__`` attribute on the decorated object.
@@ -2890,12 +2071,43 @@ else:
                 "eq_default": eq_default,
                 "order_default": order_default,
                 "kw_only_default": kw_only_default,
-                "field_descriptors": field_descriptors,
+                "field_specifiers": field_specifiers,
+                "kwargs": kwargs,
             }
             return cls_or_fn
         return decorator
 
 
+if hasattr(typing, "override"):
+    override = typing.override
+else:
+    _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
+
+    def override(__arg: _F) -> _F:
+        """Indicate that a method is intended to override a method in a base class.
+
+        Usage:
+
+            class Base:
+                def method(self) -> None: ...
+                    pass
+
+            class Child(Base):
+                @override
+                def method(self) -> None:
+                    super().method()
+
+        When this decorator is applied to a method, the type checker will
+        validate that it overrides a method with the same name on a base class.
+        This helps prevent bugs that may occur when a base class is changed
+        without an equivalent change to a child class.
+
+        See PEP 698 for details.
+
+        """
+        return __arg
+
+
 # We have to do some monkey patching to deal with the dual nature of
 # Unpack/TypeVarTuple:
 # - We want Unpack to be a kind of TypeVar so it gets accepted in
@@ -2906,3 +2118,92 @@ else:
 if not hasattr(typing, "TypeVarTuple"):
     typing._collect_type_vars = _collect_type_vars
     typing._check_generic = _check_generic
+
+
+# Backport typing.NamedTuple as it exists in Python 3.11.
+# In 3.11, the ability to define generic `NamedTuple`s was supported.
+# This was explicitly disallowed in 3.9-3.10, and only half-worked in <=3.8.
+if sys.version_info >= (3, 11):
+    NamedTuple = typing.NamedTuple
+else:
+    def _caller():
+        try:
+            return sys._getframe(2).f_globals.get('__name__', '__main__')
+        except (AttributeError, ValueError):  # For platforms without _getframe()
+            return None
+
+    def _make_nmtuple(name, types, module, defaults=()):
+        fields = [n for n, t in types]
+        annotations = {n: typing._type_check(t, f"field {n} annotation must be a type")
+                       for n, t in types}
+        nm_tpl = collections.namedtuple(name, fields,
+                                        defaults=defaults, module=module)
+        nm_tpl.__annotations__ = nm_tpl.__new__.__annotations__ = annotations
+        # The `_field_types` attribute was removed in 3.9;
+        # in earlier versions, it is the same as the `__annotations__` attribute
+        if sys.version_info < (3, 9):
+            nm_tpl._field_types = annotations
+        return nm_tpl
+
+    _prohibited_namedtuple_fields = typing._prohibited
+    _special_namedtuple_fields = frozenset({'__module__', '__name__', '__annotations__'})
+
+    class _NamedTupleMeta(type):
+        def __new__(cls, typename, bases, ns):
+            assert _NamedTuple in bases
+            for base in bases:
+                if base is not _NamedTuple and base is not typing.Generic:
+                    raise TypeError(
+                        'can only inherit from a NamedTuple type and Generic')
+            bases = tuple(tuple if base is _NamedTuple else base for base in bases)
+            types = ns.get('__annotations__', {})
+            default_names = []
+            for field_name in types:
+                if field_name in ns:
+                    default_names.append(field_name)
+                elif default_names:
+                    raise TypeError(f"Non-default namedtuple field {field_name} "
+                                    f"cannot follow default field"
+                                    f"{'s' if len(default_names) > 1 else ''} "
+                                    f"{', '.join(default_names)}")
+            nm_tpl = _make_nmtuple(
+                typename, types.items(),
+                defaults=[ns[n] for n in default_names],
+                module=ns['__module__']
+            )
+            nm_tpl.__bases__ = bases
+            if typing.Generic in bases:
+                class_getitem = typing.Generic.__class_getitem__.__func__
+                nm_tpl.__class_getitem__ = classmethod(class_getitem)
+            # update from user namespace without overriding special namedtuple attributes
+            for key in ns:
+                if key in _prohibited_namedtuple_fields:
+                    raise AttributeError("Cannot overwrite NamedTuple attribute " + key)
+                elif key not in _special_namedtuple_fields and key not in nm_tpl._fields:
+                    setattr(nm_tpl, key, ns[key])
+            if typing.Generic in bases:
+                nm_tpl.__init_subclass__()
+            return nm_tpl
+
+    def NamedTuple(__typename, __fields=None, **kwargs):
+        if __fields is None:
+            __fields = kwargs.items()
+        elif kwargs:
+            raise TypeError("Either list of fields or keywords"
+                            " can be provided to NamedTuple, not both")
+        return _make_nmtuple(__typename, __fields, module=_caller())
+
+    NamedTuple.__doc__ = typing.NamedTuple.__doc__
+    _NamedTuple = type.__new__(_NamedTupleMeta, 'NamedTuple', (), {})
+
+    # On 3.8+, alter the signature so that it matches typing.NamedTuple.
+    # The signature of typing.NamedTuple on >=3.8 is invalid syntax in Python 3.7,
+    # so just leave the signature as it is on 3.7.
+    if sys.version_info >= (3, 8):
+        NamedTuple.__text_signature__ = '(typename, fields=None, /, **kwargs)'
+
+    def _namedtuple_mro_entries(bases):
+        assert NamedTuple in bases
+        return (_NamedTuple,)
+
+    NamedTuple.__mro_entries__ = _namedtuple_mro_entries
diff --git a/libs/common/zipp.py b/libs/common/zipp.py
deleted file mode 100644
index 8ab7d099..00000000
--- a/libs/common/zipp.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# coding: utf-8
-
-from __future__ import division
-
-import io
-import sys
-import posixpath
-import zipfile
-import functools
-import itertools
-
-import more_itertools
-
-__metaclass__ = type
-
-
-def _parents(path):
-    """
-    Given a path with elements separated by
-    posixpath.sep, generate all parents of that path.
-
-    >>> list(_parents('b/d'))
-    ['b']
-    >>> list(_parents('/b/d/'))
-    ['/b']
-    >>> list(_parents('b/d/f/'))
-    ['b/d', 'b']
-    >>> list(_parents('b'))
-    []
-    >>> list(_parents(''))
-    []
-    """
-    return itertools.islice(_ancestry(path), 1, None)
-
-
-def _ancestry(path):
-    """
-    Given a path with elements separated by
-    posixpath.sep, generate all elements of that path
-
-    >>> list(_ancestry('b/d'))
-    ['b/d', 'b']
-    >>> list(_ancestry('/b/d/'))
-    ['/b/d', '/b']
-    >>> list(_ancestry('b/d/f/'))
-    ['b/d/f', 'b/d', 'b']
-    >>> list(_ancestry('b'))
-    ['b']
-    >>> list(_ancestry(''))
-    []
-    """
-    path = path.rstrip(posixpath.sep)
-    while path and path != posixpath.sep:
-        yield path
-        path, tail = posixpath.split(path)
-
-
-class Path:
-    """
-    A pathlib-compatible interface for zip files.
-
-    Consider a zip file with this structure::
-
-        .
-        ├── a.txt
-        └── b
-            ├── c.txt
-            └── d
-                └── e.txt
-
-    >>> data = io.BytesIO()
-    >>> zf = zipfile.ZipFile(data, 'w')
-    >>> zf.writestr('a.txt', 'content of a')
-    >>> zf.writestr('b/c.txt', 'content of c')
-    >>> zf.writestr('b/d/e.txt', 'content of e')
-    >>> zf.filename = 'abcde.zip'
-
-    Path accepts the zipfile object itself or a filename
-
-    >>> root = Path(zf)
-
-    From there, several path operations are available.
-
-    Directory iteration (including the zip file itself):
-
-    >>> a, b = root.iterdir()
-    >>> a
-    Path('abcde.zip', 'a.txt')
-    >>> b
-    Path('abcde.zip', 'b/')
-
-    name property:
-
-    >>> b.name
-    'b'
-
-    join with divide operator:
-
-    >>> c = b / 'c.txt'
-    >>> c
-    Path('abcde.zip', 'b/c.txt')
-    >>> c.name
-    'c.txt'
-
-    Read text:
-
-    >>> c.read_text()
-    'content of c'
-
-    existence:
-
-    >>> c.exists()
-    True
-    >>> (b / 'missing.txt').exists()
-    False
-
-    Coercion to string:
-
-    >>> str(c)
-    'abcde.zip/b/c.txt'
-    """
-
-    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
-
-    def __init__(self, root, at=""):
-        self.root = (
-            root
-            if isinstance(root, zipfile.ZipFile)
-            else zipfile.ZipFile(self._pathlib_compat(root))
-        )
-        self.at = at
-
-    @staticmethod
-    def _pathlib_compat(path):
-        """
-        For path-like objects, convert to a filename for compatibility
-        on Python 3.6.1 and earlier.
-        """
-        try:
-            return path.__fspath__()
-        except AttributeError:
-            return str(path)
-
-    @property
-    def open(self):
-        return functools.partial(self.root.open, self.at)
-
-    @property
-    def name(self):
-        return posixpath.basename(self.at.rstrip("/"))
-
-    def read_text(self, *args, **kwargs):
-        with self.open() as strm:
-            return io.TextIOWrapper(strm, *args, **kwargs).read()
-
-    def read_bytes(self):
-        with self.open() as strm:
-            return strm.read()
-
-    def _is_child(self, path):
-        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
-
-    def _next(self, at):
-        return Path(self.root, at)
-
-    def is_dir(self):
-        return not self.at or self.at.endswith("/")
-
-    def is_file(self):
-        return not self.is_dir()
-
-    def exists(self):
-        return self.at in self._names()
-
-    def iterdir(self):
-        if not self.is_dir():
-            raise ValueError("Can't listdir a file")
-        subs = map(self._next, self._names())
-        return filter(self._is_child, subs)
-
-    def __str__(self):
-        return posixpath.join(self.root.filename, self.at)
-
-    def __repr__(self):
-        return self.__repr.format(self=self)
-
-    def joinpath(self, add):
-        add = self._pathlib_compat(add)
-        next = posixpath.join(self.at, add)
-        next_dir = posixpath.join(self.at, add, "")
-        names = self._names()
-        return self._next(next_dir if next not in names and next_dir in names else next)
-
-    __truediv__ = joinpath
-
-    @staticmethod
-    def _implied_dirs(names):
-        return more_itertools.unique_everseen(
-            parent + "/"
-            for name in names
-            for parent in _parents(name)
-            if parent + "/" not in names
-        )
-
-    @classmethod
-    def _add_implied_dirs(cls, names):
-        return names + list(cls._implied_dirs(names))
-
-    @property
-    def parent(self):
-        parent_at = posixpath.dirname(self.at.rstrip('/'))
-        if parent_at:
-            parent_at += '/'
-        return self._next(parent_at)
-
-    def _names(self):
-        return self._add_implied_dirs(self.root.namelist())
-
-    if sys.version_info < (3,):
-        __div__ = __truediv__
diff --git a/libs/common/zipp/__init__.py b/libs/common/zipp/__init__.py
new file mode 100644
index 00000000..ad01e27e
--- /dev/null
+++ b/libs/common/zipp/__init__.py
@@ -0,0 +1,381 @@
+import io
+import posixpath
+import zipfile
+import itertools
+import contextlib
+import pathlib
+import re
+import fnmatch
+
+from .py310compat import text_encoding
+
+
+__all__ = ['Path']
+
+
+def _parents(path):
+    """
+    Given a path with elements separated by
+    posixpath.sep, generate all parents of that path.
+
+    >>> list(_parents('b/d'))
+    ['b']
+    >>> list(_parents('/b/d/'))
+    ['/b']
+    >>> list(_parents('b/d/f/'))
+    ['b/d', 'b']
+    >>> list(_parents('b'))
+    []
+    >>> list(_parents(''))
+    []
+    """
+    return itertools.islice(_ancestry(path), 1, None)
+
+
+def _ancestry(path):
+    """
+    Given a path with elements separated by
+    posixpath.sep, generate all elements of that path
+
+    >>> list(_ancestry('b/d'))
+    ['b/d', 'b']
+    >>> list(_ancestry('/b/d/'))
+    ['/b/d', '/b']
+    >>> list(_ancestry('b/d/f/'))
+    ['b/d/f', 'b/d', 'b']
+    >>> list(_ancestry('b'))
+    ['b']
+    >>> list(_ancestry(''))
+    []
+    """
+    path = path.rstrip(posixpath.sep)
+    while path and path != posixpath.sep:
+        yield path
+        path, tail = posixpath.split(path)
+
+
+_dedupe = dict.fromkeys
+"""Deduplicate an iterable in original order"""
+
+
+def _difference(minuend, subtrahend):
+    """
+    Return items in minuend not in subtrahend, retaining order
+    with O(1) lookup.
+    """
+    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
+
+
+class InitializedState:
+    """
+    Mix-in to save the initialization state for pickling.
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.__args = args
+        self.__kwargs = kwargs
+        super().__init__(*args, **kwargs)
+
+    def __getstate__(self):
+        return self.__args, self.__kwargs
+
+    def __setstate__(self, state):
+        args, kwargs = state
+        super().__init__(*args, **kwargs)
+
+
+class CompleteDirs(InitializedState, zipfile.ZipFile):
+    """
+    A ZipFile subclass that ensures that implied directories
+    are always included in the namelist.
+    """
+
+    @staticmethod
+    def _implied_dirs(names):
+        parents = itertools.chain.from_iterable(map(_parents, names))
+        as_dirs = (p + posixpath.sep for p in parents)
+        return _dedupe(_difference(as_dirs, names))
+
+    def namelist(self):
+        names = super(CompleteDirs, self).namelist()
+        return names + list(self._implied_dirs(names))
+
+    def _name_set(self):
+        return set(self.namelist())
+
+    def resolve_dir(self, name):
+        """
+        If the name represents a directory, return that name
+        as a directory (with the trailing slash).
+        """
+        names = self._name_set()
+        dirname = name + '/'
+        dir_match = name not in names and dirname in names
+        return dirname if dir_match else name
+
+    @classmethod
+    def make(cls, source):
+        """
+        Given a source (filename or zipfile), return an
+        appropriate CompleteDirs subclass.
+        """
+        if isinstance(source, CompleteDirs):
+            return source
+
+        if not isinstance(source, zipfile.ZipFile):
+            return cls(source)
+
+        # Only allow for FastLookup when supplied zipfile is read-only
+        if 'r' not in source.mode:
+            cls = CompleteDirs
+
+        source.__class__ = cls
+        return source
+
+
+class FastLookup(CompleteDirs):
+    """
+    ZipFile subclass to ensure implicit
+    dirs exist and are resolved rapidly.
+    """
+
+    def namelist(self):
+        with contextlib.suppress(AttributeError):
+            return self.__names
+        self.__names = super(FastLookup, self).namelist()
+        return self.__names
+
+    def _name_set(self):
+        with contextlib.suppress(AttributeError):
+            return self.__lookup
+        self.__lookup = super(FastLookup, self)._name_set()
+        return self.__lookup
+
+
+class Path:
+    """
+    A pathlib-compatible interface for zip files.
+
+    Consider a zip file with this structure::
+
+        .
+        ├── a.txt
+        └── b
+            ├── c.txt
+            └── d
+                └── e.txt
+
+    >>> data = io.BytesIO()
+    >>> zf = zipfile.ZipFile(data, 'w')
+    >>> zf.writestr('a.txt', 'content of a')
+    >>> zf.writestr('b/c.txt', 'content of c')
+    >>> zf.writestr('b/d/e.txt', 'content of e')
+    >>> zf.filename = 'mem/abcde.zip'
+
+    Path accepts the zipfile object itself or a filename
+
+    >>> root = Path(zf)
+
+    From there, several path operations are available.
+
+    Directory iteration (including the zip file itself):
+
+    >>> a, b = root.iterdir()
+    >>> a
+    Path('mem/abcde.zip', 'a.txt')
+    >>> b
+    Path('mem/abcde.zip', 'b/')
+
+    name property:
+
+    >>> b.name
+    'b'
+
+    join with divide operator:
+
+    >>> c = b / 'c.txt'
+    >>> c
+    Path('mem/abcde.zip', 'b/c.txt')
+    >>> c.name
+    'c.txt'
+
+    Read text:
+
+    >>> c.read_text()
+    'content of c'
+
+    existence:
+
+    >>> c.exists()
+    True
+    >>> (b / 'missing.txt').exists()
+    False
+
+    Coercion to string:
+
+    >>> import os
+    >>> str(c).replace(os.sep, posixpath.sep)
+    'mem/abcde.zip/b/c.txt'
+
+    At the root, ``name``, ``filename``, and ``parent``
+    resolve to the zipfile. Note these attributes are not
+    valid and will raise a ``ValueError`` if the zipfile
+    has no filename.
+
+    >>> root.name
+    'abcde.zip'
+    >>> str(root.filename).replace(os.sep, posixpath.sep)
+    'mem/abcde.zip'
+    >>> str(root.parent)
+    'mem'
+    """
+
+    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
+
+    def __init__(self, root, at=""):
+        """
+        Construct a Path from a ZipFile or filename.
+
+        Note: When the source is an existing ZipFile object,
+        its type (__class__) will be mutated to a
+        specialized type. If the caller wishes to retain the
+        original type, the caller should either create a
+        separate ZipFile object or pass a filename.
+        """
+        self.root = FastLookup.make(root)
+        self.at = at
+
+    def __eq__(self, other):
+        """
+        >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo'
+        False
+        """
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return (self.root, self.at) == (other.root, other.at)
+
+    def __hash__(self):
+        return hash((self.root, self.at))
+
+    def open(self, mode='r', *args, pwd=None, **kwargs):
+        """
+        Open this entry as text or binary following the semantics
+        of ``pathlib.Path.open()`` by passing arguments through
+        to io.TextIOWrapper().
+        """
+        if self.is_dir():
+            raise IsADirectoryError(self)
+        zip_mode = mode[0]
+        if not self.exists() and zip_mode == 'r':
+            raise FileNotFoundError(self)
+        stream = self.root.open(self.at, zip_mode, pwd=pwd)
+        if 'b' in mode:
+            if args or kwargs:
+                raise ValueError("encoding args invalid for binary operation")
+            return stream
+        else:
+            kwargs["encoding"] = text_encoding(kwargs.get("encoding"))
+        return io.TextIOWrapper(stream, *args, **kwargs)
+
+    @property
+    def name(self):
+        return pathlib.Path(self.at).name or self.filename.name
+
+    @property
+    def suffix(self):
+        return pathlib.Path(self.at).suffix or self.filename.suffix
+
+    @property
+    def suffixes(self):
+        return pathlib.Path(self.at).suffixes or self.filename.suffixes
+
+    @property
+    def stem(self):
+        return pathlib.Path(self.at).stem or self.filename.stem
+
+    @property
+    def filename(self):
+        return pathlib.Path(self.root.filename).joinpath(self.at)
+
+    def read_text(self, *args, **kwargs):
+        kwargs["encoding"] = text_encoding(kwargs.get("encoding"))
+        with self.open('r', *args, **kwargs) as strm:
+            return strm.read()
+
+    def read_bytes(self):
+        with self.open('rb') as strm:
+            return strm.read()
+
+    def _is_child(self, path):
+        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
+
+    def _next(self, at):
+        return self.__class__(self.root, at)
+
+    def is_dir(self):
+        return not self.at or self.at.endswith("/")
+
+    def is_file(self):
+        return self.exists() and not self.is_dir()
+
+    def exists(self):
+        return self.at in self.root._name_set()
+
+    def iterdir(self):
+        if not self.is_dir():
+            raise ValueError("Can't listdir a file")
+        subs = map(self._next, self.root.namelist())
+        return filter(self._is_child, subs)
+
+    def match(self, path_pattern):
+        return pathlib.Path(self.at).match(path_pattern)
+
+    def is_symlink(self):
+        """
+        Return whether this path is a symlink. Always false (python/cpython#82102).
+        """
+        return False
+
+    def _descendants(self):
+        for child in self.iterdir():
+            yield child
+            if child.is_dir():
+                yield from child._descendants()
+
+    def glob(self, pattern):
+        if not pattern:
+            raise ValueError("Unacceptable pattern: {!r}".format(pattern))
+
+        matches = re.compile(fnmatch.translate(pattern)).fullmatch
+        return (
+            child
+            for child in self._descendants()
+            if matches(str(child.relative_to(self)))
+        )
+
+    def rglob(self, pattern):
+        return self.glob(f'**/{pattern}')
+
+    def relative_to(self, other, *extra):
+        return posixpath.relpath(str(self), str(other.joinpath(*extra)))
+
+    def __str__(self):
+        return posixpath.join(self.root.filename, self.at)
+
+    def __repr__(self):
+        return self.__repr.format(self=self)
+
+    def joinpath(self, *other):
+        next = posixpath.join(self.at, *other)
+        return self._next(self.root.resolve_dir(next))
+
+    __truediv__ = joinpath
+
+    @property
+    def parent(self):
+        if not self.at:
+            return self.filename.parent
+        parent_at = posixpath.dirname(self.at.rstrip('/'))
+        if parent_at:
+            parent_at += '/'
+        return self._next(parent_at)
diff --git a/libs/common/zipp/py310compat.py b/libs/common/zipp/py310compat.py
new file mode 100644
index 00000000..8244124c
--- /dev/null
+++ b/libs/common/zipp/py310compat.py
@@ -0,0 +1,12 @@
+import sys
+import io
+
+
+te_impl = 'lambda encoding, stacklevel=2, /: encoding'
+te_impl_37 = te_impl.replace(', /', '')
+_text_encoding = eval(te_impl) if sys.version_info > (3, 8) else eval(te_impl_37)
+
+
+text_encoding = (
+    io.text_encoding if sys.version_info > (3, 10) else _text_encoding  # type: ignore
+)