diff --git a/lib/certifi/__init__.py b/lib/certifi/__init__.py index b700d3a6..af4bcc15 100644 --- a/lib/certifi/__init__.py +++ b/lib/certifi/__init__.py @@ -1,4 +1,4 @@ from .core import contents, where __all__ = ["contents", "where"] -__version__ = "2022.05.18.1" +__version__ = "2022.09.24" diff --git a/lib/certifi/cacert.pem b/lib/certifi/cacert.pem index f597fb3a..40051551 100644 --- a/lib/certifi/cacert.pem +++ b/lib/certifi/cacert.pem @@ -1323,78 +1323,6 @@ t/2jioSgrGK+KwmHNPBqAbubKVY8/gA3zyNs8U6qtnRGEmyR7jTV7JqR50S+kDFy SjnRBUkLp7Y3gaVdjKozXoEofKd9J+sAro03 -----END CERTIFICATE----- -# Issuer: CN=EC-ACC O=Agencia Catalana de Certificacio (NIF Q-0801176-I) OU=Serveis Publics de Certificacio/Vegeu https://www.catcert.net/verarrel (c)03/Jerarquia Entitats de Certificacio Catalanes -# Subject: CN=EC-ACC O=Agencia Catalana de Certificacio (NIF Q-0801176-I) OU=Serveis Publics de Certificacio/Vegeu https://www.catcert.net/verarrel (c)03/Jerarquia Entitats de Certificacio Catalanes -# Label: "EC-ACC" -# Serial: -23701579247955709139626555126524820479 -# MD5 Fingerprint: eb:f5:9d:29:0d:61:f9:42:1f:7c:c2:ba:6d:e3:15:09 -# SHA1 Fingerprint: 28:90:3a:63:5b:52:80:fa:e6:77:4c:0b:6d:a7:d6:ba:a6:4a:f2:e8 -# SHA256 Fingerprint: 88:49:7f:01:60:2f:31:54:24:6a:e2:8c:4d:5a:ef:10:f1:d8:7e:bb:76:62:6f:4a:e0:b7:f9:5b:a7:96:87:99 ------BEGIN CERTIFICATE----- -MIIFVjCCBD6gAwIBAgIQ7is969Qh3hSoYqwE893EATANBgkqhkiG9w0BAQUFADCB -8zELMAkGA1UEBhMCRVMxOzA5BgNVBAoTMkFnZW5jaWEgQ2F0YWxhbmEgZGUgQ2Vy -dGlmaWNhY2lvIChOSUYgUS0wODAxMTc2LUkpMSgwJgYDVQQLEx9TZXJ2ZWlzIFB1 -YmxpY3MgZGUgQ2VydGlmaWNhY2lvMTUwMwYDVQQLEyxWZWdldSBodHRwczovL3d3 -dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbCAoYykwMzE1MDMGA1UECxMsSmVyYXJxdWlh -IEVudGl0YXRzIGRlIENlcnRpZmljYWNpbyBDYXRhbGFuZXMxDzANBgNVBAMTBkVD -LUFDQzAeFw0wMzAxMDcyMzAwMDBaFw0zMTAxMDcyMjU5NTlaMIHzMQswCQYDVQQG -EwJFUzE7MDkGA1UEChMyQWdlbmNpYSBDYXRhbGFuYSBkZSBDZXJ0aWZpY2FjaW8g -KE5JRiBRLTA4MDExNzYtSSkxKDAmBgNVBAsTH1NlcnZlaXMgUHVibGljcyBkZSBD -ZXJ0aWZpY2FjaW8xNTAzBgNVBAsTLFZlZ2V1IGh0dHBzOi8vd3d3LmNhdGNlcnQu -bmV0L3ZlcmFycmVsIChjKTAzMTUwMwYDVQQLEyxKZXJhcnF1aWEgRW50aXRhdHMg -ZGUgQ2VydGlmaWNhY2lvIENhdGFsYW5lczEPMA0GA1UEAxMGRUMtQUNDMIIBIjAN -BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsyLHT+KXQpWIR4NA9h0X84NzJB5R -85iKw5K4/0CQBXCHYMkAqbWUZRkiFRfCQ2xmRJoNBD45b6VLeqpjt4pEndljkYRm -4CgPukLjbo73FCeTae6RDqNfDrHrZqJyTxIThmV6PttPB/SnCWDaOkKZx7J/sxaV -HMf5NLWUhdWZXqBIoH7nF2W4onW4HvPlQn2v7fOKSGRdghST2MDk/7NQcvJ29rNd -QlB50JQ+awwAvthrDk4q7D7SzIKiGGUzE3eeml0aE9jD2z3Il3rucO2n5nzbcc8t -lGLfbdb1OL4/pYUKGbio2Al1QnDE6u/LDsg0qBIimAy4E5S2S+zw0JDnJwIDAQAB -o4HjMIHgMB0GA1UdEQQWMBSBEmVjX2FjY0BjYXRjZXJ0Lm5ldDAPBgNVHRMBAf8E -BTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUoMOLRKo3pUW/l4Ba0fF4 -opvpXY0wfwYDVR0gBHgwdjB0BgsrBgEEAfV4AQMBCjBlMCwGCCsGAQUFBwIBFiBo -dHRwczovL3d3dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbDA1BggrBgEFBQcCAjApGidW -ZWdldSBodHRwczovL3d3dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbCAwDQYJKoZIhvcN -AQEFBQADggEBAKBIW4IB9k1IuDlVNZyAelOZ1Vr/sXE7zDkJlF7W2u++AVtd0x7Y -/X1PzaBB4DSTv8vihpw3kpBWHNzrKQXlxJ7HNd+KDM3FIUPpqojlNcAZQmNaAl6k -SBg6hW/cnbw/nZzBh7h6YQjpdwt/cKt63dmXLGQehb+8dJahw3oS7AwaboMMPOhy -Rp/7SNVel+axofjk70YllJyJ22k4vuxcDlbHZVHlUIiIv0LVKz3l+bqeLrPK9HOS -Agu+TGbrIP65y7WZf+a2E/rKS03Z7lNGBjvGTq2TWoF+bCpLagVFjPIhpDGQh2xl -nJ2lYJU6Un/10asIbvPuW/mIPX64b24D5EI= ------END CERTIFICATE----- - -# Issuer: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority -# Subject: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority -# Label: "Hellenic Academic and Research Institutions RootCA 2011" -# Serial: 0 -# MD5 Fingerprint: 73:9f:4c:4b:73:5b:79:e9:fa:ba:1c:ef:6e:cb:d5:c9 -# SHA1 Fingerprint: fe:45:65:9b:79:03:5b:98:a1:61:b5:51:2e:ac:da:58:09:48:22:4d -# SHA256 Fingerprint: bc:10:4f:15:a4:8b:e7:09:dc:a5:42:a7:e1:d4:b9:df:6f:05:45:27:e8:02:ea:a9:2d:59:54:44:25:8a:fe:71 ------BEGIN CERTIFICATE----- -MIIEMTCCAxmgAwIBAgIBADANBgkqhkiG9w0BAQUFADCBlTELMAkGA1UEBhMCR1Ix -RDBCBgNVBAoTO0hlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1 -dGlvbnMgQ2VydC4gQXV0aG9yaXR5MUAwPgYDVQQDEzdIZWxsZW5pYyBBY2FkZW1p -YyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIFJvb3RDQSAyMDExMB4XDTExMTIw -NjEzNDk1MloXDTMxMTIwMTEzNDk1MlowgZUxCzAJBgNVBAYTAkdSMUQwQgYDVQQK -EztIZWxsZW5pYyBBY2FkZW1pYyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIENl -cnQuIEF1dGhvcml0eTFAMD4GA1UEAxM3SGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl -c2VhcmNoIEluc3RpdHV0aW9ucyBSb290Q0EgMjAxMTCCASIwDQYJKoZIhvcNAQEB -BQADggEPADCCAQoCggEBAKlTAOMupvaO+mDYLZU++CwqVE7NuYRhlFhPjz2L5EPz -dYmNUeTDN9KKiE15HrcS3UN4SoqS5tdI1Q+kOilENbgH9mgdVc04UfCMJDGFr4PJ -fel3r+0ae50X+bOdOFAPplp5kYCvN66m0zH7tSYJnTxa71HFK9+WXesyHgLacEns -bgzImjeN9/E2YEsmLIKe0HjzDQ9jpFEw4fkrJxIH2Oq9GGKYsFk3fb7u8yBRQlqD -75O6aRXxYp2fmTmCobd0LovUxQt7L/DICto9eQqakxylKHJzkUOap9FNhYS5qXSP -FEDH3N6sQWRstBmbAmNtJGSPRLIl6s5ddAxjMlyNh+UCAwEAAaOBiTCBhjAPBgNV -HRMBAf8EBTADAQH/MAsGA1UdDwQEAwIBBjAdBgNVHQ4EFgQUppFC/RNhSiOeCKQp -5dgTBCPuQSUwRwYDVR0eBEAwPqA8MAWCAy5ncjAFggMuZXUwBoIELmVkdTAGggQu -b3JnMAWBAy5ncjAFgQMuZXUwBoEELmVkdTAGgQQub3JnMA0GCSqGSIb3DQEBBQUA -A4IBAQAf73lB4XtuP7KMhjdCSk4cNx6NZrokgclPEg8hwAOXhiVtXdMiKahsog2p -6z0GW5k6x8zDmjR/qw7IThzh+uTczQ2+vyT+bOdrwg3IBp5OjWEopmr95fZi6hg8 -TqBTnbI6nOulnJEWtk2C4AwFSKls9cz4y51JtPACpf1wA+2KIaWuE4ZJwzNzvoc7 -dIsXRSZMFpGD/md9zU1jZ/rzAxKWeAaNsWftjj++n08C9bMJL/NMh98qy5V8Acys -Nnq/onN694/BtZqhFLKPM58N7yLcZnuEvUUXBj08yrl3NI/K6s8/MT7jiOOASSXI -l7WdmplNsDz4SgCbZN2fOUvRJ9e4 ------END CERTIFICATE----- - # Issuer: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 # Subject: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 # Label: "Actalis Authentication Root CA" @@ -4528,3 +4456,253 @@ PQQDAwNpADBmAjEAyjzGKnXCXnViOTYAYFqLwZOZzNnbQTs7h5kXO9XMT8oi96CA y/m0sRtW9XLS/BnRAjEAkfcwkz8QRitxpNA7RJvAKQIFskF3UfN5Wp6OFKBOQtJb gfM0agPnIjhQW+0ZT0MW -----END CERTIFICATE----- + +# Issuer: CN=DigiCert TLS ECC P384 Root G5 O=DigiCert, Inc. +# Subject: CN=DigiCert TLS ECC P384 Root G5 O=DigiCert, Inc. +# Label: "DigiCert TLS ECC P384 Root G5" +# Serial: 13129116028163249804115411775095713523 +# MD5 Fingerprint: d3:71:04:6a:43:1c:db:a6:59:e1:a8:a3:aa:c5:71:ed +# SHA1 Fingerprint: 17:f3:de:5e:9f:0f:19:e9:8e:f6:1f:32:26:6e:20:c4:07:ae:30:ee +# SHA256 Fingerprint: 01:8e:13:f0:77:25:32:cf:80:9b:d1:b1:72:81:86:72:83:fc:48:c6:e1:3b:e9:c6:98:12:85:4a:49:0c:1b:05 +-----BEGIN CERTIFICATE----- +MIICGTCCAZ+gAwIBAgIQCeCTZaz32ci5PhwLBCou8zAKBggqhkjOPQQDAzBOMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xJjAkBgNVBAMTHURp +Z2lDZXJ0IFRMUyBFQ0MgUDM4NCBSb290IEc1MB4XDTIxMDExNTAwMDAwMFoXDTQ2 +MDExNDIzNTk1OVowTjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDkRpZ2lDZXJ0LCBJ +bmMuMSYwJAYDVQQDEx1EaWdpQ2VydCBUTFMgRUNDIFAzODQgUm9vdCBHNTB2MBAG +ByqGSM49AgEGBSuBBAAiA2IABMFEoc8Rl1Ca3iOCNQfN0MsYndLxf3c1TzvdlHJS +7cI7+Oz6e2tYIOyZrsn8aLN1udsJ7MgT9U7GCh1mMEy7H0cKPGEQQil8pQgO4CLp +0zVozptjn4S1mU1YoI71VOeVyaNCMEAwHQYDVR0OBBYEFMFRRVBZqz7nLFr6ICIS +B4CIfBFqMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MAoGCCqGSM49 +BAMDA2gAMGUCMQCJao1H5+z8blUD2WdsJk6Dxv3J+ysTvLd6jLRl0mlpYxNjOyZQ +LgGheQaRnUi/wr4CMEfDFXuxoJGZSZOoPHzoRgaLLPIxAJSdYsiJvRmEFOml+wG4 +DXZDjC5Ty3zfDBeWUA== +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert TLS RSA4096 Root G5 O=DigiCert, Inc. +# Subject: CN=DigiCert TLS RSA4096 Root G5 O=DigiCert, Inc. +# Label: "DigiCert TLS RSA4096 Root G5" +# Serial: 11930366277458970227240571539258396554 +# MD5 Fingerprint: ac:fe:f7:34:96:a9:f2:b3:b4:12:4b:e4:27:41:6f:e1 +# SHA1 Fingerprint: a7:88:49:dc:5d:7c:75:8c:8c:de:39:98:56:b3:aa:d0:b2:a5:71:35 +# SHA256 Fingerprint: 37:1a:00:dc:05:33:b3:72:1a:7e:eb:40:e8:41:9e:70:79:9d:2b:0a:0f:2c:1d:80:69:31:65:f7:ce:c4:ad:75 +-----BEGIN CERTIFICATE----- +MIIFZjCCA06gAwIBAgIQCPm0eKj6ftpqMzeJ3nzPijANBgkqhkiG9w0BAQwFADBN +MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xJTAjBgNVBAMT +HERpZ2lDZXJ0IFRMUyBSU0E0MDk2IFJvb3QgRzUwHhcNMjEwMTE1MDAwMDAwWhcN +NDYwMTE0MjM1OTU5WjBNMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQs +IEluYy4xJTAjBgNVBAMTHERpZ2lDZXJ0IFRMUyBSU0E0MDk2IFJvb3QgRzUwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCz0PTJeRGd/fxmgefM1eS87IE+ +ajWOLrfn3q/5B03PMJ3qCQuZvWxX2hhKuHisOjmopkisLnLlvevxGs3npAOpPxG0 +2C+JFvuUAT27L/gTBaF4HI4o4EXgg/RZG5Wzrn4DReW+wkL+7vI8toUTmDKdFqgp +wgscONyfMXdcvyej/Cestyu9dJsXLfKB2l2w4SMXPohKEiPQ6s+d3gMXsUJKoBZM +pG2T6T867jp8nVid9E6P/DsjyG244gXazOvswzH016cpVIDPRFtMbzCe88zdH5RD +nU1/cHAN1DrRN/BsnZvAFJNY781BOHW8EwOVfH/jXOnVDdXifBBiqmvwPXbzP6Po +sMH976pXTayGpxi0KcEsDr9kvimM2AItzVwv8n/vFfQMFawKsPHTDU9qTXeXAaDx +Zre3zu/O7Oyldcqs4+Fj97ihBMi8ez9dLRYiVu1ISf6nL3kwJZu6ay0/nTvEF+cd +Lvvyz6b84xQslpghjLSR6Rlgg/IwKwZzUNWYOwbpx4oMYIwo+FKbbuH2TbsGJJvX +KyY//SovcfXWJL5/MZ4PbeiPT02jP/816t9JXkGPhvnxd3lLG7SjXi/7RgLQZhNe +XoVPzthwiHvOAbWWl9fNff2C+MIkwcoBOU+NosEUQB+cZtUMCUbW8tDRSHZWOkPL +tgoRObqME2wGtZ7P6wIDAQABo0IwQDAdBgNVHQ4EFgQUUTMc7TZArxfTJc1paPKv +TiM+s0EwDgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcN +AQEMBQADggIBAGCmr1tfV9qJ20tQqcQjNSH/0GEwhJG3PxDPJY7Jv0Y02cEhJhxw +GXIeo8mH/qlDZJY6yFMECrZBu8RHANmfGBg7sg7zNOok992vIGCukihfNudd5N7H +PNtQOa27PShNlnx2xlv0wdsUpasZYgcYQF+Xkdycx6u1UQ3maVNVzDl92sURVXLF +O4uJ+DQtpBflF+aZfTCIITfNMBc9uPK8qHWgQ9w+iUuQrm0D4ByjoJYJu32jtyoQ +REtGBzRj7TG5BO6jm5qu5jF49OokYTurWGT/u4cnYiWB39yhL/btp/96j1EuMPik +AdKFOV8BmZZvWltwGUb+hmA+rYAQCd05JS9Yf7vSdPD3Rh9GOUrYU9DzLjtxpdRv +/PNn5AeP3SYZ4Y1b+qOTEZvpyDrDVWiakuFSdjjo4bq9+0/V77PnSIMx8IIh47a+ +p6tv75/fTM8BuGJqIz3nCU2AG3swpMPdB380vqQmsvZB6Akd4yCYqjdP//fx4ilw +MUc/dNAUFvohigLVigmUdy7yWSiLfFCSCmZ4OIN1xLVaqBHG5cGdZlXPU8Sv13WF +qUITVuwhd4GTWgzqltlJyqEI8pc7bZsEGCREjnwB8twl2F6GmrE52/WRMmrRpnCK +ovfepEWFJqgejF0pW8hL2JpqA15w8oVPbEtoL8pU9ozaMv7Da4M/OMZ+ +-----END CERTIFICATE----- + +# Issuer: CN=Certainly Root R1 O=Certainly +# Subject: CN=Certainly Root R1 O=Certainly +# Label: "Certainly Root R1" +# Serial: 188833316161142517227353805653483829216 +# MD5 Fingerprint: 07:70:d4:3e:82:87:a0:fa:33:36:13:f4:fa:33:e7:12 +# SHA1 Fingerprint: a0:50:ee:0f:28:71:f4:27:b2:12:6d:6f:50:96:25:ba:cc:86:42:af +# SHA256 Fingerprint: 77:b8:2c:d8:64:4c:43:05:f7:ac:c5:cb:15:6b:45:67:50:04:03:3d:51:c6:0c:62:02:a8:e0:c3:34:67:d3:a0 +-----BEGIN CERTIFICATE----- +MIIFRzCCAy+gAwIBAgIRAI4P+UuQcWhlM1T01EQ5t+AwDQYJKoZIhvcNAQELBQAw +PTELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCUNlcnRhaW5seTEaMBgGA1UEAxMRQ2Vy +dGFpbmx5IFJvb3QgUjEwHhcNMjEwNDAxMDAwMDAwWhcNNDYwNDAxMDAwMDAwWjA9 +MQswCQYDVQQGEwJVUzESMBAGA1UEChMJQ2VydGFpbmx5MRowGAYDVQQDExFDZXJ0 +YWlubHkgUm9vdCBSMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANA2 +1B/q3avk0bbm+yLA3RMNansiExyXPGhjZjKcA7WNpIGD2ngwEc/csiu+kr+O5MQT +vqRoTNoCaBZ0vrLdBORrKt03H2As2/X3oXyVtwxwhi7xOu9S98zTm/mLvg7fMbed +aFySpvXl8wo0tf97ouSHocavFwDvA5HtqRxOcT3Si2yJ9HiG5mpJoM610rCrm/b0 +1C7jcvk2xusVtyWMOvwlDbMicyF0yEqWYZL1LwsYpfSt4u5BvQF5+paMjRcCMLT5 +r3gajLQ2EBAHBXDQ9DGQilHFhiZ5shGIXsXwClTNSaa/ApzSRKft43jvRl5tcdF5 +cBxGX1HpyTfcX35pe0HfNEXgO4T0oYoKNp43zGJS4YkNKPl6I7ENPT2a/Z2B7yyQ +wHtETrtJ4A5KVpK8y7XdeReJkd5hiXSSqOMyhb5OhaRLWcsrxXiOcVTQAjeZjOVJ +6uBUcqQRBi8LjMFbvrWhsFNunLhgkR9Za/kt9JQKl7XsxXYDVBtlUrpMklZRNaBA +2CnbrlJ2Oy0wQJuK0EJWtLeIAaSHO1OWzaMWj/Nmqhexx2DgwUMFDO6bW2BvBlyH +Wyf5QBGenDPBt+U1VwV/J84XIIwc/PH72jEpSe31C4SnT8H2TsIonPru4K8H+zMR +eiFPCyEQtkA6qyI6BJyLm4SGcprSp6XEtHWRqSsjAgMBAAGjQjBAMA4GA1UdDwEB +/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTgqj8ljZ9EXME66C6u +d0yEPmcM9DANBgkqhkiG9w0BAQsFAAOCAgEAuVevuBLaV4OPaAszHQNTVfSVcOQr +PbA56/qJYv331hgELyE03fFo8NWWWt7CgKPBjcZq91l3rhVkz1t5BXdm6ozTaw3d +8VkswTOlMIAVRQdFGjEitpIAq5lNOo93r6kiyi9jyhXWx8bwPWz8HA2YEGGeEaIi +1wrykXprOQ4vMMM2SZ/g6Q8CRFA3lFV96p/2O7qUpUzpvD5RtOjKkjZUbVwlKNrd +rRT90+7iIgXr0PK3aBLXWopBGsaSpVo7Y0VPv+E6dyIvXL9G+VoDhRNCX8reU9di +taY1BMJH/5n9hN9czulegChB8n3nHpDYT3Y+gjwN/KUD+nsa2UUeYNrEjvn8K8l7 +lcUq/6qJ34IxD3L/DCfXCh5WAFAeDJDBlrXYFIW7pw0WwfgHJBu6haEaBQmAupVj +yTrsJZ9/nbqkRxWbRHDxakvWOF5D8xh+UG7pWijmZeZ3Gzr9Hb4DJqPb1OG7fpYn +Kx3upPvaJVQTA945xsMfTZDsjxtK0hzthZU4UHlG1sGQUDGpXJpuHfUzVounmdLy +yCwzk5Iwx06MZTMQZBf9JBeW0Y3COmor6xOLRPIh80oat3df1+2IpHLlOR+Vnb5n +wXARPbv0+Em34yaXOp/SX3z7wJl8OSngex2/DaeP0ik0biQVy96QXr8axGbqwua6 +OV+KmalBWQewLK8= +-----END CERTIFICATE----- + +# Issuer: CN=Certainly Root E1 O=Certainly +# Subject: CN=Certainly Root E1 O=Certainly +# Label: "Certainly Root E1" +# Serial: 8168531406727139161245376702891150584 +# MD5 Fingerprint: 0a:9e:ca:cd:3e:52:50:c6:36:f3:4b:a3:ed:a7:53:e9 +# SHA1 Fingerprint: f9:e1:6d:dc:01:89:cf:d5:82:45:63:3e:c5:37:7d:c2:eb:93:6f:2b +# SHA256 Fingerprint: b4:58:5f:22:e4:ac:75:6a:4e:86:12:a1:36:1c:5d:9d:03:1a:93:fd:84:fe:bb:77:8f:a3:06:8b:0f:c4:2d:c2 +-----BEGIN CERTIFICATE----- +MIIB9zCCAX2gAwIBAgIQBiUzsUcDMydc+Y2aub/M+DAKBggqhkjOPQQDAzA9MQsw +CQYDVQQGEwJVUzESMBAGA1UEChMJQ2VydGFpbmx5MRowGAYDVQQDExFDZXJ0YWlu +bHkgUm9vdCBFMTAeFw0yMTA0MDEwMDAwMDBaFw00NjA0MDEwMDAwMDBaMD0xCzAJ +BgNVBAYTAlVTMRIwEAYDVQQKEwlDZXJ0YWlubHkxGjAYBgNVBAMTEUNlcnRhaW5s +eSBSb290IEUxMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAE3m/4fxzf7flHh4axpMCK ++IKXgOqPyEpeKn2IaKcBYhSRJHpcnqMXfYqGITQYUBsQ3tA3SybHGWCA6TS9YBk2 +QNYphwk8kXr2vBMj3VlOBF7PyAIcGFPBMdjaIOlEjeR2o0IwQDAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU8ygYy2R17ikq6+2uI1g4 +hevIIgcwCgYIKoZIzj0EAwMDaAAwZQIxALGOWiDDshliTd6wT99u0nCK8Z9+aozm +ut6Dacpps6kFtZaSF4fC0urQe87YQVt8rgIwRt7qy12a7DLCZRawTDBcMPPaTnOG +BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Subject: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Label: "E-Tugra Global Root CA RSA v3" +# Serial: 75951268308633135324246244059508261641472512052 +# MD5 Fingerprint: 22:be:10:f6:c2:f8:03:88:73:5f:33:29:47:28:47:a4 +# SHA1 Fingerprint: e9:a8:5d:22:14:52:1c:5b:aa:0a:b4:be:24:6a:23:8a:c9:ba:e2:a9 +# SHA256 Fingerprint: ef:66:b0:b1:0a:3c:db:9f:2e:36:48:c7:6b:d2:af:18:ea:d2:bf:e6:f1:17:65:5e:28:c4:06:0d:a1:a3:f4:c2 +-----BEGIN CERTIFICATE----- +MIIF8zCCA9ugAwIBAgIUDU3FzRYilZYIfrgLfxUGNPt5EDQwDQYJKoZIhvcNAQEL +BQAwgYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUt +VHVncmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYw +JAYDVQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIFJTQSB2MzAeFw0yMDAzMTgw +OTA3MTdaFw00NTAzMTIwOTA3MTdaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMG +QW5rYXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1 +Z3JhIFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBD +QSBSU0EgdjMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCiZvCJt3J7 +7gnJY9LTQ91ew6aEOErxjYG7FL1H6EAX8z3DeEVypi6Q3po61CBxyryfHUuXCscx +uj7X/iWpKo429NEvx7epXTPcMHD4QGxLsqYxYdE0PD0xesevxKenhOGXpOhL9hd8 +7jwH7eKKV9y2+/hDJVDqJ4GohryPUkqWOmAalrv9c/SF/YP9f4RtNGx/ardLAQO/ +rWm31zLZ9Vdq6YaCPqVmMbMWPcLzJmAy01IesGykNz709a/r4d+ABs8qQedmCeFL +l+d3vSFtKbZnwy1+7dZ5ZdHPOrbRsV5WYVB6Ws5OUDGAA5hH5+QYfERaxqSzO8bG +wzrwbMOLyKSRBfP12baqBqG3q+Sx6iEUXIOk/P+2UNOMEiaZdnDpwA+mdPy70Bt4 +znKS4iicvObpCdg604nmvi533wEKb5b25Y08TVJ2Glbhc34XrD2tbKNSEhhw5oBO +M/J+JjKsBY04pOZ2PJ8QaQ5tndLBeSBrW88zjdGUdjXnXVXHt6woq0bM5zshtQoK +5EpZ3IE1S0SVEgpnpaH/WwAH0sDM+T/8nzPyAPiMbIedBi3x7+PmBvrFZhNb/FAH +nnGGstpvdDDPk1Po3CLW3iAfYY2jLqN4MpBs3KwytQXk9TwzDdbgh3cXTJ2w2Amo +DVf3RIXwyAS+XF1a4xeOVGNpf0l0ZAWMowIDAQABo2MwYTAPBgNVHRMBAf8EBTAD +AQH/MB8GA1UdIwQYMBaAFLK0ruYt9ybVqnUtdkvAG1Mh0EjvMB0GA1UdDgQWBBSy +tK7mLfcm1ap1LXZLwBtTIdBI7zAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEL +BQADggIBAImocn+M684uGMQQgC0QDP/7FM0E4BQ8Tpr7nym/Ip5XuYJzEmMmtcyQ +6dIqKe6cLcwsmb5FJ+Sxce3kOJUxQfJ9emN438o2Fi+CiJ+8EUdPdk3ILY7r3y18 +Tjvarvbj2l0Upq7ohUSdBm6O++96SmotKygY/r+QLHUWnw/qln0F7psTpURs+APQ +3SPh/QMSEgj0GDSz4DcLdxEBSL9htLX4GdnLTeqjjO/98Aa1bZL0SmFQhO3sSdPk +vmjmLuMxC1QLGpLWgti2omU8ZgT5Vdps+9u1FGZNlIM7zR6mK7L+d0CGq+ffCsn9 +9t2HVhjYsCxVYJb6CH5SkPVLpi6HfMsg2wY+oF0Dd32iPBMbKaITVaA9FCKvb7jQ +mhty3QUBjYZgv6Rn7rWlDdF/5horYmbDB7rnoEgcOMPpRfunf/ztAmgayncSd6YA +VSgU7NbHEqIbZULpkejLPoeJVF3Zr52XnGnnCv8PWniLYypMfUeUP95L6VPQMPHF +9p5J3zugkaOj/s1YzOrfr28oO6Bpm4/srK4rVJ2bBLFHIK+WEj5jlB0E5y67hscM +moi/dkfv97ALl2bSRM9gUgfh1SxKOidhd8rXj+eHDjD/DLsE4mHDosiXYY60MGo8 +bcIHX0pzLz/5FooBZu+6kcpSV3uu1OYP3Qt6f4ueJiDPO++BcYNZ +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Subject: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Label: "E-Tugra Global Root CA ECC v3" +# Serial: 218504919822255052842371958738296604628416471745 +# MD5 Fingerprint: 46:bc:81:bb:f1:b5:1e:f7:4b:96:bc:14:e2:e7:27:64 +# SHA1 Fingerprint: 8a:2f:af:57:53:b1:b0:e6:a1:04:ec:5b:6a:69:71:6d:f6:1c:e2:84 +# SHA256 Fingerprint: 87:3f:46:85:fa:7f:56:36:25:25:2e:6d:36:bc:d7:f1:6f:c2:49:51:f2:64:e4:7e:1b:95:4f:49:08:cd:ca:13 +-----BEGIN CERTIFICATE----- +MIICpTCCAiqgAwIBAgIUJkYZdzHhT28oNt45UYbm1JeIIsEwCgYIKoZIzj0EAwMw +gYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUtVHVn +cmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYwJAYD +VQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIEVDQyB2MzAeFw0yMDAzMTgwOTQ2 +NThaFw00NTAzMTIwOTQ2NThaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMGQW5r +YXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1Z3Jh +IFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBDQSBF +Q0MgdjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASOmCm/xxAeJ9urA8woLNheSBkQ +KczLWYHMjLiSF4mDKpL2w6QdTGLVn9agRtwcvHbB40fQWxPa56WzZkjnIZpKT4YK +fWzqTTKACrJ6CZtpS5iB4i7sAnCWH/31Rs7K3IKjYzBhMA8GA1UdEwEB/wQFMAMB +Af8wHwYDVR0jBBgwFoAU/4Ixcj75xGZsrTie0bBRiKWQzPUwHQYDVR0OBBYEFP+C +MXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNp +ADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/6 +7W4WAie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFx +vmjkI6TZraE3 +-----END CERTIFICATE----- + +# Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. +# Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. +# Label: "Security Communication RootCA3" +# Serial: 16247922307909811815 +# MD5 Fingerprint: 1c:9a:16:ff:9e:5c:e0:4d:8a:14:01:f4:35:5d:29:26 +# SHA1 Fingerprint: c3:03:c8:22:74:92:e5:61:a2:9c:5f:79:91:2b:1e:44:13:91:30:3a +# SHA256 Fingerprint: 24:a5:5c:2a:b0:51:44:2d:06:17:76:65:41:23:9a:4a:d0:32:d7:c5:51:75:aa:34:ff:de:2f:bc:4f:5c:52:94 +-----BEGIN CERTIFICATE----- +MIIFfzCCA2egAwIBAgIJAOF8N0D9G/5nMA0GCSqGSIb3DQEBDAUAMF0xCzAJBgNV +BAYTAkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMScw +JQYDVQQDEx5TZWN1cml0eSBDb21tdW5pY2F0aW9uIFJvb3RDQTMwHhcNMTYwNjE2 +MDYxNzE2WhcNMzgwMTE4MDYxNzE2WjBdMQswCQYDVQQGEwJKUDElMCMGA1UEChMc +U0VDT00gVHJ1c3QgU3lzdGVtcyBDTy4sTFRELjEnMCUGA1UEAxMeU2VjdXJpdHkg +Q29tbXVuaWNhdGlvbiBSb290Q0EzMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEA48lySfcw3gl8qUCBWNO0Ot26YQ+TUG5pPDXC7ltzkBtnTCHsXzW7OT4r +CmDvu20rhvtxosis5FaU+cmvsXLUIKx00rgVrVH+hXShuRD+BYD5UpOzQD11EKzA +lrenfna84xtSGc4RHwsENPXY9Wk8d/Nk9A2qhd7gCVAEF5aEt8iKvE1y/By7z/MG +TfmfZPd+pmaGNXHIEYBMwXFAWB6+oHP2/D5Q4eAvJj1+XCO1eXDe+uDRpdYMQXF7 +9+qMHIjH7Iv10S9VlkZ8WjtYO/u62C21Jdp6Ts9EriGmnpjKIG58u4iFW/vAEGK7 +8vknR+/RiTlDxN/e4UG/VHMgly1s2vPUB6PmudhvrvyMGS7TZ2crldtYXLVqAvO4 +g160a75BflcJdURQVc1aEWEhCmHCqYj9E7wtiS/NYeCVvsq1e+F7NGcLH7YMx3we +GVPKp7FKFSBWFHA9K4IsD50VHUeAR/94mQ4xr28+j+2GaR57GIgUssL8gjMunEst ++3A7caoreyYn8xrC3PsXuKHqy6C0rtOUfnrQq8PsOC0RLoi/1D+tEjtCrI8Cbn3M +0V9hvqG8OmpI6iZVIhZdXw3/JzOfGAN0iltSIEdrRU0id4xVJ/CvHozJgyJUt5rQ +T9nO/NkuHJYosQLTA70lUhw0Zk8jq/R3gpYd0VcwCBEF/VfR2ccCAwEAAaNCMEAw +HQYDVR0OBBYEFGQUfPxYchamCik0FW8qy7z8r6irMA4GA1UdDwEB/wQEAwIBBjAP +BgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBDAUAA4ICAQDcAiMI4u8hOscNtybS +YpOnpSNyByCCYN8Y11StaSWSntkUz5m5UoHPrmyKO1o5yGwBQ8IibQLwYs1OY0PA +FNr0Y/Dq9HHuTofjcan0yVflLl8cebsjqodEV+m9NU1Bu0soo5iyG9kLFwfl9+qd +9XbXv8S2gVj/yP9kaWJ5rW4OH3/uHWnlt3Jxs/6lATWUVCvAUm2PVcTJ0rjLyjQI +UYWg9by0F1jqClx6vWPGOi//lkkZhOpn2ASxYfQAW0q3nHE3GYV5v4GwxxMOdnE+ +OoAGrgYWp421wsTL/0ClXI2lyTrtcoHKXJg80jQDdwj98ClZXSEIx2C/pHF7uNke +gr4Jr2VvKKu/S7XuPghHJ6APbw+LP6yVGPO5DtxnVW5inkYO0QR4ynKudtml+LLf +iAlhi+8kTtFZP1rUPcmTPCtk9YENFpb3ksP+MW/oKjJ0DvRMmEoYDjBU1cXrvMUV +nuiZIesnKwkK2/HmcBhWuwzkvvnoEKQTkrgc4NtnHVMDpCKn3F2SEDzq//wbEBrD +2NCcnWXL0CsnMQMeNuE9dnUM/0Umud1RvCPHX9jYhxBAEg09ODfnRDwYwFMJZI// +1ZqmfHAuc1Uh6N//g7kdPjIe1qZ9LPFm6Vwdp6POXiUyK+OVrCoHzrQoeIY8Laad +TdJ0MN1kURXbg4NR16/9M51NZg== +-----END CERTIFICATE----- + +# Issuer: CN=Security Communication ECC RootCA1 O=SECOM Trust Systems CO.,LTD. +# Subject: CN=Security Communication ECC RootCA1 O=SECOM Trust Systems CO.,LTD. +# Label: "Security Communication ECC RootCA1" +# Serial: 15446673492073852651 +# MD5 Fingerprint: 7e:43:b0:92:68:ec:05:43:4c:98:ab:5d:35:2e:7e:86 +# SHA1 Fingerprint: b8:0e:26:a9:bf:d2:b2:3b:c0:ef:46:c9:ba:c7:bb:f6:1d:0d:41:41 +# SHA256 Fingerprint: e7:4f:bd:a5:5b:d5:64:c4:73:a3:6b:44:1a:a7:99:c8:a6:8e:07:74:40:e8:28:8b:9f:a1:e5:0e:4b:ba:ca:11 +-----BEGIN CERTIFICATE----- +MIICODCCAb6gAwIBAgIJANZdm7N4gS7rMAoGCCqGSM49BAMDMGExCzAJBgNVBAYT +AkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMSswKQYD +VQQDEyJTZWN1cml0eSBDb21tdW5pY2F0aW9uIEVDQyBSb290Q0ExMB4XDTE2MDYx +NjA1MTUyOFoXDTM4MDExODA1MTUyOFowYTELMAkGA1UEBhMCSlAxJTAjBgNVBAoT +HFNFQ09NIFRydXN0IFN5c3RlbXMgQ08uLExURC4xKzApBgNVBAMTIlNlY3VyaXR5 +IENvbW11bmljYXRpb24gRUNDIFJvb3RDQTEwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AASkpW9gAwPDvTH00xecK4R1rOX9PVdu12O/5gSJko6BnOPpR27KkBLIE+Cnnfdl +dB9sELLo5OnvbYUymUSxXv3MdhDYW72ixvnWQuRXdtyQwjWpS4g8EkdtXP9JTxpK +ULGjQjBAMB0GA1UdDgQWBBSGHOf+LaVKiwj+KBH6vqNm+GBZLzAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu +9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3LsnNdo4gIxwwCMQDAqy0O +be0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70eN9k= +-----END CERTIFICATE----- diff --git a/lib/certifi/core.py b/lib/certifi/core.py index 497d938d..de028981 100644 --- a/lib/certifi/core.py +++ b/lib/certifi/core.py @@ -4,12 +4,12 @@ certifi.py This module returns the installation location of cacert.pem or its contents. """ -import os -import types -from typing import Union +import sys -try: - from importlib.resources import path as get_path, read_text + +if sys.version_info >= (3, 11): + + from importlib.resources import as_file, files _CACERT_CTX = None _CACERT_PATH = None @@ -33,13 +33,54 @@ try: # We also have to hold onto the actual context manager, because # it will do the cleanup whenever it gets garbage collected, so # we will also store that at the global level as well. + _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem")) + _CACERT_PATH = str(_CACERT_CTX.__enter__()) + + return _CACERT_PATH + + def contents() -> str: + return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii") + +elif sys.version_info >= (3, 7): + + from importlib.resources import path as get_path, read_text + + _CACERT_CTX = None + _CACERT_PATH = None + + def where() -> str: + # This is slightly terrible, but we want to delay extracting the + # file in cases where we're inside of a zipimport situation until + # someone actually calls where(), but we don't want to re-extract + # the file on every call of where(), so we'll do it once then store + # it in a global variable. + global _CACERT_CTX + global _CACERT_PATH + if _CACERT_PATH is None: + # This is slightly janky, the importlib.resources API wants you + # to manage the cleanup of this file, so it doesn't actually + # return a path, it returns a context manager that will give + # you the path when you enter it and will do any cleanup when + # you leave it. In the common case of not needing a temporary + # file, it will just return the file system location and the + # __exit__() is a no-op. + # + # We also have to hold onto the actual context manager, because + # it will do the cleanup whenever it gets garbage collected, so + # we will also store that at the global level as well. _CACERT_CTX = get_path("certifi", "cacert.pem") _CACERT_PATH = str(_CACERT_CTX.__enter__()) return _CACERT_PATH + def contents() -> str: + return read_text("certifi", "cacert.pem", encoding="ascii") + +else: + import os + import types + from typing import Union -except ImportError: Package = Union[types.ModuleType, str] Resource = Union[str, "os.PathLike"] @@ -63,6 +104,5 @@ except ImportError: return os.path.join(f, "cacert.pem") - -def contents() -> str: - return read_text("certifi", "cacert.pem", encoding="ascii") + def contents() -> str: + return read_text("certifi", "cacert.pem", encoding="ascii") diff --git a/lib/charset_normalizer/__init__.py b/lib/charset_normalizer/__init__.py index 1aea851a..2dcaf56f 100644 --- a/lib/charset_normalizer/__init__.py +++ b/lib/charset_normalizer/__init__.py @@ -1,4 +1,4 @@ -# -*- coding: utf_8 -*- +# -*- coding: utf-8 -*- """ Charset-Normalizer ~~~~~~~~~~~~~~ diff --git a/lib/charset_normalizer/api.py b/lib/charset_normalizer/api.py index bdc8ed98..72907f94 100644 --- a/lib/charset_normalizer/api.py +++ b/lib/charset_normalizer/api.py @@ -1,11 +1,8 @@ import logging +import warnings +from os import PathLike from os.path import basename, splitext -from typing import BinaryIO, List, Optional, Set - -try: - from os import PathLike -except ImportError: # pragma: no cover - PathLike = str # type: ignore +from typing import Any, BinaryIO, List, Optional, Set from .cd import ( coherence_ratio, @@ -18,6 +15,7 @@ from .md import mess_ratio from .models import CharsetMatch, CharsetMatches from .utils import ( any_specified_encoding, + cut_sequence_chunks, iana_name, identify_sig_or_bom, is_cp_similar, @@ -39,8 +37,8 @@ def from_bytes( steps: int = 5, chunk_size: int = 512, threshold: float = 0.2, - cp_isolation: List[str] = None, - cp_exclusion: List[str] = None, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, preemptive_behaviour: bool = True, explain: bool = False, ) -> CharsetMatches: @@ -70,11 +68,11 @@ def from_bytes( ) if explain: - previous_logger_level = logger.level # type: int + previous_logger_level: int = logger.level logger.addHandler(explain_handler) logger.setLevel(TRACE) - length = len(sequences) # type: int + length: int = len(sequences) if length == 0: logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.") @@ -119,8 +117,8 @@ def from_bytes( if steps > 1 and length / steps < chunk_size: chunk_size = int(length / steps) - is_too_small_sequence = len(sequences) < TOO_SMALL_SEQUENCE # type: bool - is_too_large_sequence = len(sequences) >= TOO_BIG_SEQUENCE # type: bool + is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE + is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE if is_too_small_sequence: logger.log( @@ -137,11 +135,11 @@ def from_bytes( ), ) - prioritized_encodings = [] # type: List[str] + prioritized_encodings: List[str] = [] - specified_encoding = ( + specified_encoding: Optional[str] = ( any_specified_encoding(sequences) if preemptive_behaviour else None - ) # type: Optional[str] + ) if specified_encoding is not None: prioritized_encodings.append(specified_encoding) @@ -151,15 +149,15 @@ def from_bytes( specified_encoding, ) - tested = set() # type: Set[str] - tested_but_hard_failure = [] # type: List[str] - tested_but_soft_failure = [] # type: List[str] + tested: Set[str] = set() + tested_but_hard_failure: List[str] = [] + tested_but_soft_failure: List[str] = [] - fallback_ascii = None # type: Optional[CharsetMatch] - fallback_u8 = None # type: Optional[CharsetMatch] - fallback_specified = None # type: Optional[CharsetMatch] + fallback_ascii: Optional[CharsetMatch] = None + fallback_u8: Optional[CharsetMatch] = None + fallback_specified: Optional[CharsetMatch] = None - results = CharsetMatches() # type: CharsetMatches + results: CharsetMatches = CharsetMatches() sig_encoding, sig_payload = identify_sig_or_bom(sequences) @@ -190,11 +188,11 @@ def from_bytes( tested.add(encoding_iana) - decoded_payload = None # type: Optional[str] - bom_or_sig_available = sig_encoding == encoding_iana # type: bool - strip_sig_or_bom = bom_or_sig_available and should_strip_sig_or_bom( + decoded_payload: Optional[str] = None + bom_or_sig_available: bool = sig_encoding == encoding_iana + strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom( encoding_iana - ) # type: bool + ) if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: logger.log( @@ -205,7 +203,7 @@ def from_bytes( continue try: - is_multi_byte_decoder = is_multi_byte_encoding(encoding_iana) # type: bool + is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana) except (ModuleNotFoundError, ImportError): logger.log( TRACE, @@ -240,7 +238,7 @@ def from_bytes( tested_but_hard_failure.append(encoding_iana) continue - similar_soft_failure_test = False # type: bool + similar_soft_failure_test: bool = False for encoding_soft_failed in tested_but_soft_failure: if is_cp_similar(encoding_iana, encoding_soft_failed): @@ -262,11 +260,11 @@ def from_bytes( int(length / steps), ) - multi_byte_bonus = ( + multi_byte_bonus: bool = ( is_multi_byte_decoder and decoded_payload is not None and len(decoded_payload) < length - ) # type: bool + ) if multi_byte_bonus: logger.log( @@ -276,72 +274,47 @@ def from_bytes( encoding_iana, ) - max_chunk_gave_up = int(len(r_) / 4) # type: int + max_chunk_gave_up: int = int(len(r_) / 4) max_chunk_gave_up = max(max_chunk_gave_up, 2) - early_stop_count = 0 # type: int + early_stop_count: int = 0 lazy_str_hard_failure = False - md_chunks = [] # type: List[str] + md_chunks: List[str] = [] md_ratios = [] - for i in r_: - if i + chunk_size > length + 8: - continue - - cut_sequence = sequences[i : i + chunk_size] - - if bom_or_sig_available and strip_sig_or_bom is False: - cut_sequence = sig_payload + cut_sequence - - try: - chunk = cut_sequence.decode( - encoding_iana, - errors="ignore" if is_multi_byte_decoder else "strict", - ) # type: str - except UnicodeDecodeError as e: # Lazy str loading may have missed something there - logger.log( - TRACE, - "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", - encoding_iana, - str(e), - ) - early_stop_count = max_chunk_gave_up - lazy_str_hard_failure = True - break - - # multi-byte bad cutting detector and adjustment - # not the cleanest way to perform that fix but clever enough for now. - if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80: - - chunk_partial_size_chk = min(chunk_size, 16) # type: int - - if ( - decoded_payload - and chunk[:chunk_partial_size_chk] not in decoded_payload - ): - for j in range(i, i - 4, -1): - cut_sequence = sequences[j : i + chunk_size] - - if bom_or_sig_available and strip_sig_or_bom is False: - cut_sequence = sig_payload + cut_sequence - - chunk = cut_sequence.decode(encoding_iana, errors="ignore") - - if chunk[:chunk_partial_size_chk] in decoded_payload: - break - - md_chunks.append(chunk) - - md_ratios.append(mess_ratio(chunk, threshold)) - - if md_ratios[-1] >= threshold: - early_stop_count += 1 - - if (early_stop_count >= max_chunk_gave_up) or ( - bom_or_sig_available and strip_sig_or_bom is False + try: + for chunk in cut_sequence_chunks( + sequences, + encoding_iana, + r_, + chunk_size, + bom_or_sig_available, + strip_sig_or_bom, + sig_payload, + is_multi_byte_decoder, + decoded_payload, ): - break + md_chunks.append(chunk) + + md_ratios.append(mess_ratio(chunk, threshold)) + + if md_ratios[-1] >= threshold: + early_stop_count += 1 + + if (early_stop_count >= max_chunk_gave_up) or ( + bom_or_sig_available and strip_sig_or_bom is False + ): + break + except UnicodeDecodeError as e: # Lazy str loading may have missed something there + logger.log( + TRACE, + "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + early_stop_count = max_chunk_gave_up + lazy_str_hard_failure = True # We might want to check the sequence again with the whole content # Only if initial MD tests passes @@ -362,9 +335,7 @@ def from_bytes( tested_but_hard_failure.append(encoding_iana) continue - mean_mess_ratio = ( - sum(md_ratios) / len(md_ratios) if md_ratios else 0.0 - ) # type: float + mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0 if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up: tested_but_soft_failure.append(encoding_iana) logger.log( @@ -399,7 +370,7 @@ def from_bytes( ) if not is_multi_byte_decoder: - target_languages = encoding_languages(encoding_iana) # type: List[str] + target_languages: List[str] = encoding_languages(encoding_iana) else: target_languages = mb_encoding_languages(encoding_iana) @@ -516,8 +487,8 @@ def from_fp( steps: int = 5, chunk_size: int = 512, threshold: float = 0.20, - cp_isolation: List[str] = None, - cp_exclusion: List[str] = None, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, preemptive_behaviour: bool = True, explain: bool = False, ) -> CharsetMatches: @@ -538,12 +509,12 @@ def from_fp( def from_path( - path: PathLike, + path: "PathLike[Any]", steps: int = 5, chunk_size: int = 512, threshold: float = 0.20, - cp_isolation: List[str] = None, - cp_exclusion: List[str] = None, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, preemptive_behaviour: bool = True, explain: bool = False, ) -> CharsetMatches: @@ -565,17 +536,22 @@ def from_path( def normalize( - path: PathLike, + path: "PathLike[Any]", steps: int = 5, chunk_size: int = 512, threshold: float = 0.20, - cp_isolation: List[str] = None, - cp_exclusion: List[str] = None, + cp_isolation: Optional[List[str]] = None, + cp_exclusion: Optional[List[str]] = None, preemptive_behaviour: bool = True, ) -> CharsetMatch: """ Take a (text-based) file path and try to create another file next to it, this time using UTF-8. """ + warnings.warn( + "normalize is deprecated and will be removed in 3.0", + DeprecationWarning, + ) + results = from_path( path, steps, diff --git a/lib/charset_normalizer/assets/__init__.py b/lib/charset_normalizer/assets/__init__.py index b2e56ff3..3c33ba30 100644 --- a/lib/charset_normalizer/assets/__init__.py +++ b/lib/charset_normalizer/assets/__init__.py @@ -1,1244 +1,1122 @@ -# -*- coding: utf_8 -*- -from collections import OrderedDict +# -*- coding: utf-8 -*- +from typing import Dict, List -FREQUENCIES = OrderedDict( - [ - ( - "English", - [ - "e", - "a", - "t", - "i", - "o", - "n", - "s", - "r", - "h", - "l", - "d", - "c", - "u", - "m", - "f", - "p", - "g", - "w", - "y", - "b", - "v", - "k", - "x", - "j", - "z", - "q", - ], - ), - ( - "German", - [ - "e", - "n", - "i", - "r", - "s", - "t", - "a", - "d", - "h", - "u", - "l", - "g", - "o", - "c", - "m", - "b", - "f", - "k", - "w", - "z", - "p", - "v", - "ü", - "ä", - "ö", - "j", - ], - ), - ( - "French", - [ - "e", - "a", - "s", - "n", - "i", - "t", - "r", - "l", - "u", - "o", - "d", - "c", - "p", - "m", - "é", - "v", - "g", - "f", - "b", - "h", - "q", - "à", - "x", - "è", - "y", - "j", - ], - ), - ( - "Dutch", - [ - "e", - "n", - "a", - "i", - "r", - "t", - "o", - "d", - "s", - "l", - "g", - "h", - "v", - "m", - "u", - "k", - "c", - "p", - "b", - "w", - "j", - "z", - "f", - "y", - "x", - "ë", - ], - ), - ( - "Italian", - [ - "e", - "i", - "a", - "o", - "n", - "l", - "t", - "r", - "s", - "c", - "d", - "u", - "p", - "m", - "g", - "v", - "f", - "b", - "z", - "h", - "q", - "è", - "à", - "k", - "y", - "ò", - ], - ), - ( - "Polish", - [ - "a", - "i", - "o", - "e", - "n", - "r", - "z", - "w", - "s", - "c", - "t", - "k", - "y", - "d", - "p", - "m", - "u", - "l", - "j", - "ł", - "g", - "b", - "h", - "ą", - "ę", - "ó", - ], - ), - ( - "Spanish", - [ - "e", - "a", - "o", - "n", - "s", - "r", - "i", - "l", - "d", - "t", - "c", - "u", - "m", - "p", - "b", - "g", - "v", - "f", - "y", - "ó", - "h", - "q", - "í", - "j", - "z", - "á", - ], - ), - ( - "Russian", - [ - "о", - "а", - "е", - "и", - "н", - "с", - "т", - "р", - "в", - "л", - "к", - "м", - "д", - "п", - "у", - "г", - "я", - "ы", - "з", - "б", - "й", - "ь", - "ч", - "х", - "ж", - "ц", - ], - ), - ( - "Japanese", - [ - "の", - "に", - "る", - "た", - "は", - "ー", - "と", - "し", - "を", - "で", - "て", - "が", - "い", - "ン", - "れ", - "な", - "年", - "ス", - "っ", - "ル", - "か", - "ら", - "あ", - "さ", - "も", - "り", - ], - ), - ( - "Portuguese", - [ - "a", - "e", - "o", - "s", - "i", - "r", - "d", - "n", - "t", - "m", - "u", - "c", - "l", - "p", - "g", - "v", - "b", - "f", - "h", - "ã", - "q", - "é", - "ç", - "á", - "z", - "í", - ], - ), - ( - "Swedish", - [ - "e", - "a", - "n", - "r", - "t", - "s", - "i", - "l", - "d", - "o", - "m", - "k", - "g", - "v", - "h", - "f", - "u", - "p", - "ä", - "c", - "b", - "ö", - "å", - "y", - "j", - "x", - ], - ), - ( - "Chinese", - [ - "的", - "一", - "是", - "不", - "了", - "在", - "人", - "有", - "我", - "他", - "这", - "个", - "们", - "中", - "来", - "上", - "大", - "为", - "和", - "国", - "地", - "到", - "以", - "说", - "时", - "要", - "就", - "出", - "会", - ], - ), - ( - "Ukrainian", - [ - "о", - "а", - "н", - "і", - "и", - "р", - "в", - "т", - "е", - "с", - "к", - "л", - "у", - "д", - "м", - "п", - "з", - "я", - "ь", - "б", - "г", - "й", - "ч", - "х", - "ц", - "ї", - ], - ), - ( - "Norwegian", - [ - "e", - "r", - "n", - "t", - "a", - "s", - "i", - "o", - "l", - "d", - "g", - "k", - "m", - "v", - "f", - "p", - "u", - "b", - "h", - "å", - "y", - "j", - "ø", - "c", - "æ", - "w", - ], - ), - ( - "Finnish", - [ - "a", - "i", - "n", - "t", - "e", - "s", - "l", - "o", - "u", - "k", - "ä", - "m", - "r", - "v", - "j", - "h", - "p", - "y", - "d", - "ö", - "g", - "c", - "b", - "f", - "w", - "z", - ], - ), - ( - "Vietnamese", - [ - "n", - "h", - "t", - "i", - "c", - "g", - "a", - "o", - "u", - "m", - "l", - "r", - "à", - "đ", - "s", - "e", - "v", - "p", - "b", - "y", - "ư", - "d", - "á", - "k", - "ộ", - "ế", - ], - ), - ( - "Czech", - [ - "o", - "e", - "a", - "n", - "t", - "s", - "i", - "l", - "v", - "r", - "k", - "d", - "u", - "m", - "p", - "í", - "c", - "h", - "z", - "á", - "y", - "j", - "b", - "ě", - "é", - "ř", - ], - ), - ( - "Hungarian", - [ - "e", - "a", - "t", - "l", - "s", - "n", - "k", - "r", - "i", - "o", - "z", - "á", - "é", - "g", - "m", - "b", - "y", - "v", - "d", - "h", - "u", - "p", - "j", - "ö", - "f", - "c", - ], - ), - ( - "Korean", - [ - "이", - "다", - "에", - "의", - "는", - "로", - "하", - "을", - "가", - "고", - "지", - "서", - "한", - "은", - "기", - "으", - "년", - "대", - "사", - "시", - "를", - "리", - "도", - "인", - "스", - "일", - ], - ), - ( - "Indonesian", - [ - "a", - "n", - "e", - "i", - "r", - "t", - "u", - "s", - "d", - "k", - "m", - "l", - "g", - "p", - "b", - "o", - "h", - "y", - "j", - "c", - "w", - "f", - "v", - "z", - "x", - "q", - ], - ), - ( - "Turkish", - [ - "a", - "e", - "i", - "n", - "r", - "l", - "ı", - "k", - "d", - "t", - "s", - "m", - "y", - "u", - "o", - "b", - "ü", - "ş", - "v", - "g", - "z", - "h", - "c", - "p", - "ç", - "ğ", - ], - ), - ( - "Romanian", - [ - "e", - "i", - "a", - "r", - "n", - "t", - "u", - "l", - "o", - "c", - "s", - "d", - "p", - "m", - "ă", - "f", - "v", - "î", - "g", - "b", - "ș", - "ț", - "z", - "h", - "â", - "j", - ], - ), - ( - "Farsi", - [ - "ا", - "ی", - "ر", - "د", - "ن", - "ه", - "و", - "م", - "ت", - "ب", - "س", - "ل", - "ک", - "ش", - "ز", - "ف", - "گ", - "ع", - "خ", - "ق", - "ج", - "آ", - "پ", - "ح", - "ط", - "ص", - ], - ), - ( - "Arabic", - [ - "ا", - "ل", - "ي", - "م", - "و", - "ن", - "ر", - "ت", - "ب", - "ة", - "ع", - "د", - "س", - "ف", - "ه", - "ك", - "ق", - "أ", - "ح", - "ج", - "ش", - "ط", - "ص", - "ى", - "خ", - "إ", - ], - ), - ( - "Danish", - [ - "e", - "r", - "n", - "t", - "a", - "i", - "s", - "d", - "l", - "o", - "g", - "m", - "k", - "f", - "v", - "u", - "b", - "h", - "p", - "å", - "y", - "ø", - "æ", - "c", - "j", - "w", - ], - ), - ( - "Serbian", - [ - "а", - "и", - "о", - "е", - "н", - "р", - "с", - "у", - "т", - "к", - "ј", - "в", - "д", - "м", - "п", - "л", - "г", - "з", - "б", - "a", - "i", - "e", - "o", - "n", - "ц", - "ш", - ], - ), - ( - "Lithuanian", - [ - "i", - "a", - "s", - "o", - "r", - "e", - "t", - "n", - "u", - "k", - "m", - "l", - "p", - "v", - "d", - "j", - "g", - "ė", - "b", - "y", - "ų", - "š", - "ž", - "c", - "ą", - "į", - ], - ), - ( - "Slovene", - [ - "e", - "a", - "i", - "o", - "n", - "r", - "s", - "l", - "t", - "j", - "v", - "k", - "d", - "p", - "m", - "u", - "z", - "b", - "g", - "h", - "č", - "c", - "š", - "ž", - "f", - "y", - ], - ), - ( - "Slovak", - [ - "o", - "a", - "e", - "n", - "i", - "r", - "v", - "t", - "s", - "l", - "k", - "d", - "m", - "p", - "u", - "c", - "h", - "j", - "b", - "z", - "á", - "y", - "ý", - "í", - "č", - "é", - ], - ), - ( - "Hebrew", - [ - "י", - "ו", - "ה", - "ל", - "ר", - "ב", - "ת", - "מ", - "א", - "ש", - "נ", - "ע", - "ם", - "ד", - "ק", - "ח", - "פ", - "ס", - "כ", - "ג", - "ט", - "צ", - "ן", - "ז", - "ך", - ], - ), - ( - "Bulgarian", - [ - "а", - "и", - "о", - "е", - "н", - "т", - "р", - "с", - "в", - "л", - "к", - "д", - "п", - "м", - "з", - "г", - "я", - "ъ", - "у", - "б", - "ч", - "ц", - "й", - "ж", - "щ", - "х", - ], - ), - ( - "Croatian", - [ - "a", - "i", - "o", - "e", - "n", - "r", - "j", - "s", - "t", - "u", - "k", - "l", - "v", - "d", - "m", - "p", - "g", - "z", - "b", - "c", - "č", - "h", - "š", - "ž", - "ć", - "f", - ], - ), - ( - "Hindi", - [ - "क", - "र", - "स", - "न", - "त", - "म", - "ह", - "प", - "य", - "ल", - "व", - "ज", - "द", - "ग", - "ब", - "श", - "ट", - "अ", - "ए", - "थ", - "भ", - "ड", - "च", - "ध", - "ष", - "इ", - ], - ), - ( - "Estonian", - [ - "a", - "i", - "e", - "s", - "t", - "l", - "u", - "n", - "o", - "k", - "r", - "d", - "m", - "v", - "g", - "p", - "j", - "h", - "ä", - "b", - "õ", - "ü", - "f", - "c", - "ö", - "y", - ], - ), - ( - "Simple English", - [ - "e", - "a", - "t", - "i", - "o", - "n", - "s", - "r", - "h", - "l", - "d", - "c", - "m", - "u", - "f", - "p", - "g", - "w", - "b", - "y", - "v", - "k", - "j", - "x", - "z", - "q", - ], - ), - ( - "Thai", - [ - "า", - "น", - "ร", - "อ", - "ก", - "เ", - "ง", - "ม", - "ย", - "ล", - "ว", - "ด", - "ท", - "ส", - "ต", - "ะ", - "ป", - "บ", - "ค", - "ห", - "แ", - "จ", - "พ", - "ช", - "ข", - "ใ", - ], - ), - ( - "Greek", - [ - "α", - "τ", - "ο", - "ι", - "ε", - "ν", - "ρ", - "σ", - "κ", - "η", - "π", - "ς", - "υ", - "μ", - "λ", - "ί", - "ό", - "ά", - "γ", - "έ", - "δ", - "ή", - "ω", - "χ", - "θ", - "ύ", - ], - ), - ( - "Tamil", - [ - "க", - "த", - "ப", - "ட", - "ர", - "ம", - "ல", - "ன", - "வ", - "ற", - "ய", - "ள", - "ச", - "ந", - "இ", - "ண", - "அ", - "ஆ", - "ழ", - "ங", - "எ", - "உ", - "ஒ", - "ஸ", - ], - ), - ( - "Classical Chinese", - [ - "之", - "年", - "為", - "也", - "以", - "一", - "人", - "其", - "者", - "國", - "有", - "二", - "十", - "於", - "曰", - "三", - "不", - "大", - "而", - "子", - "中", - "五", - "四", - ], - ), - ( - "Kazakh", - [ - "а", - "ы", - "е", - "н", - "т", - "р", - "л", - "і", - "д", - "с", - "м", - "қ", - "к", - "о", - "б", - "и", - "у", - "ғ", - "ж", - "ң", - "з", - "ш", - "й", - "п", - "г", - "ө", - ], - ), - ] -) +FREQUENCIES: Dict[str, List[str]] = { + "English": [ + "e", + "a", + "t", + "i", + "o", + "n", + "s", + "r", + "h", + "l", + "d", + "c", + "u", + "m", + "f", + "p", + "g", + "w", + "y", + "b", + "v", + "k", + "x", + "j", + "z", + "q", + ], + "German": [ + "e", + "n", + "i", + "r", + "s", + "t", + "a", + "d", + "h", + "u", + "l", + "g", + "o", + "c", + "m", + "b", + "f", + "k", + "w", + "z", + "p", + "v", + "ü", + "ä", + "ö", + "j", + ], + "French": [ + "e", + "a", + "s", + "n", + "i", + "t", + "r", + "l", + "u", + "o", + "d", + "c", + "p", + "m", + "é", + "v", + "g", + "f", + "b", + "h", + "q", + "à", + "x", + "è", + "y", + "j", + ], + "Dutch": [ + "e", + "n", + "a", + "i", + "r", + "t", + "o", + "d", + "s", + "l", + "g", + "h", + "v", + "m", + "u", + "k", + "c", + "p", + "b", + "w", + "j", + "z", + "f", + "y", + "x", + "ë", + ], + "Italian": [ + "e", + "i", + "a", + "o", + "n", + "l", + "t", + "r", + "s", + "c", + "d", + "u", + "p", + "m", + "g", + "v", + "f", + "b", + "z", + "h", + "q", + "è", + "à", + "k", + "y", + "ò", + ], + "Polish": [ + "a", + "i", + "o", + "e", + "n", + "r", + "z", + "w", + "s", + "c", + "t", + "k", + "y", + "d", + "p", + "m", + "u", + "l", + "j", + "ł", + "g", + "b", + "h", + "ą", + "ę", + "ó", + ], + "Spanish": [ + "e", + "a", + "o", + "n", + "s", + "r", + "i", + "l", + "d", + "t", + "c", + "u", + "m", + "p", + "b", + "g", + "v", + "f", + "y", + "ó", + "h", + "q", + "í", + "j", + "z", + "á", + ], + "Russian": [ + "о", + "а", + "е", + "и", + "н", + "с", + "т", + "р", + "в", + "л", + "к", + "м", + "д", + "п", + "у", + "г", + "я", + "ы", + "з", + "б", + "й", + "ь", + "ч", + "х", + "ж", + "ц", + ], + "Japanese": [ + "の", + "に", + "る", + "た", + "は", + "ー", + "と", + "し", + "を", + "で", + "て", + "が", + "い", + "ン", + "れ", + "な", + "年", + "ス", + "っ", + "ル", + "か", + "ら", + "あ", + "さ", + "も", + "り", + ], + "Portuguese": [ + "a", + "e", + "o", + "s", + "i", + "r", + "d", + "n", + "t", + "m", + "u", + "c", + "l", + "p", + "g", + "v", + "b", + "f", + "h", + "ã", + "q", + "é", + "ç", + "á", + "z", + "í", + ], + "Swedish": [ + "e", + "a", + "n", + "r", + "t", + "s", + "i", + "l", + "d", + "o", + "m", + "k", + "g", + "v", + "h", + "f", + "u", + "p", + "ä", + "c", + "b", + "ö", + "å", + "y", + "j", + "x", + ], + "Chinese": [ + "的", + "一", + "是", + "不", + "了", + "在", + "人", + "有", + "我", + "他", + "这", + "个", + "们", + "中", + "来", + "上", + "大", + "为", + "和", + "国", + "地", + "到", + "以", + "说", + "时", + "要", + "就", + "出", + "会", + ], + "Ukrainian": [ + "о", + "а", + "н", + "і", + "и", + "р", + "в", + "т", + "е", + "с", + "к", + "л", + "у", + "д", + "м", + "п", + "з", + "я", + "ь", + "б", + "г", + "й", + "ч", + "х", + "ц", + "ї", + ], + "Norwegian": [ + "e", + "r", + "n", + "t", + "a", + "s", + "i", + "o", + "l", + "d", + "g", + "k", + "m", + "v", + "f", + "p", + "u", + "b", + "h", + "å", + "y", + "j", + "ø", + "c", + "æ", + "w", + ], + "Finnish": [ + "a", + "i", + "n", + "t", + "e", + "s", + "l", + "o", + "u", + "k", + "ä", + "m", + "r", + "v", + "j", + "h", + "p", + "y", + "d", + "ö", + "g", + "c", + "b", + "f", + "w", + "z", + ], + "Vietnamese": [ + "n", + "h", + "t", + "i", + "c", + "g", + "a", + "o", + "u", + "m", + "l", + "r", + "à", + "đ", + "s", + "e", + "v", + "p", + "b", + "y", + "ư", + "d", + "á", + "k", + "ộ", + "ế", + ], + "Czech": [ + "o", + "e", + "a", + "n", + "t", + "s", + "i", + "l", + "v", + "r", + "k", + "d", + "u", + "m", + "p", + "í", + "c", + "h", + "z", + "á", + "y", + "j", + "b", + "ě", + "é", + "ř", + ], + "Hungarian": [ + "e", + "a", + "t", + "l", + "s", + "n", + "k", + "r", + "i", + "o", + "z", + "á", + "é", + "g", + "m", + "b", + "y", + "v", + "d", + "h", + "u", + "p", + "j", + "ö", + "f", + "c", + ], + "Korean": [ + "이", + "다", + "에", + "의", + "는", + "로", + "하", + "을", + "가", + "고", + "지", + "서", + "한", + "은", + "기", + "으", + "년", + "대", + "사", + "시", + "를", + "리", + "도", + "인", + "스", + "일", + ], + "Indonesian": [ + "a", + "n", + "e", + "i", + "r", + "t", + "u", + "s", + "d", + "k", + "m", + "l", + "g", + "p", + "b", + "o", + "h", + "y", + "j", + "c", + "w", + "f", + "v", + "z", + "x", + "q", + ], + "Turkish": [ + "a", + "e", + "i", + "n", + "r", + "l", + "ı", + "k", + "d", + "t", + "s", + "m", + "y", + "u", + "o", + "b", + "ü", + "ş", + "v", + "g", + "z", + "h", + "c", + "p", + "ç", + "ğ", + ], + "Romanian": [ + "e", + "i", + "a", + "r", + "n", + "t", + "u", + "l", + "o", + "c", + "s", + "d", + "p", + "m", + "ă", + "f", + "v", + "î", + "g", + "b", + "ș", + "ț", + "z", + "h", + "â", + "j", + ], + "Farsi": [ + "ا", + "ی", + "ر", + "د", + "ن", + "ه", + "و", + "م", + "ت", + "ب", + "س", + "ل", + "ک", + "ش", + "ز", + "ف", + "گ", + "ع", + "خ", + "ق", + "ج", + "آ", + "پ", + "ح", + "ط", + "ص", + ], + "Arabic": [ + "ا", + "ل", + "ي", + "م", + "و", + "ن", + "ر", + "ت", + "ب", + "ة", + "ع", + "د", + "س", + "ف", + "ه", + "ك", + "ق", + "أ", + "ح", + "ج", + "ش", + "ط", + "ص", + "ى", + "خ", + "إ", + ], + "Danish": [ + "e", + "r", + "n", + "t", + "a", + "i", + "s", + "d", + "l", + "o", + "g", + "m", + "k", + "f", + "v", + "u", + "b", + "h", + "p", + "å", + "y", + "ø", + "æ", + "c", + "j", + "w", + ], + "Serbian": [ + "а", + "и", + "о", + "е", + "н", + "р", + "с", + "у", + "т", + "к", + "ј", + "в", + "д", + "м", + "п", + "л", + "г", + "з", + "б", + "a", + "i", + "e", + "o", + "n", + "ц", + "ш", + ], + "Lithuanian": [ + "i", + "a", + "s", + "o", + "r", + "e", + "t", + "n", + "u", + "k", + "m", + "l", + "p", + "v", + "d", + "j", + "g", + "ė", + "b", + "y", + "ų", + "š", + "ž", + "c", + "ą", + "į", + ], + "Slovene": [ + "e", + "a", + "i", + "o", + "n", + "r", + "s", + "l", + "t", + "j", + "v", + "k", + "d", + "p", + "m", + "u", + "z", + "b", + "g", + "h", + "č", + "c", + "š", + "ž", + "f", + "y", + ], + "Slovak": [ + "o", + "a", + "e", + "n", + "i", + "r", + "v", + "t", + "s", + "l", + "k", + "d", + "m", + "p", + "u", + "c", + "h", + "j", + "b", + "z", + "á", + "y", + "ý", + "í", + "č", + "é", + ], + "Hebrew": [ + "י", + "ו", + "ה", + "ל", + "ר", + "ב", + "ת", + "מ", + "א", + "ש", + "נ", + "ע", + "ם", + "ד", + "ק", + "ח", + "פ", + "ס", + "כ", + "ג", + "ט", + "צ", + "ן", + "ז", + "ך", + ], + "Bulgarian": [ + "а", + "и", + "о", + "е", + "н", + "т", + "р", + "с", + "в", + "л", + "к", + "д", + "п", + "м", + "з", + "г", + "я", + "ъ", + "у", + "б", + "ч", + "ц", + "й", + "ж", + "щ", + "х", + ], + "Croatian": [ + "a", + "i", + "o", + "e", + "n", + "r", + "j", + "s", + "t", + "u", + "k", + "l", + "v", + "d", + "m", + "p", + "g", + "z", + "b", + "c", + "č", + "h", + "š", + "ž", + "ć", + "f", + ], + "Hindi": [ + "क", + "र", + "स", + "न", + "त", + "म", + "ह", + "प", + "य", + "ल", + "व", + "ज", + "द", + "ग", + "ब", + "श", + "ट", + "अ", + "ए", + "थ", + "भ", + "ड", + "च", + "ध", + "ष", + "इ", + ], + "Estonian": [ + "a", + "i", + "e", + "s", + "t", + "l", + "u", + "n", + "o", + "k", + "r", + "d", + "m", + "v", + "g", + "p", + "j", + "h", + "ä", + "b", + "õ", + "ü", + "f", + "c", + "ö", + "y", + ], + "Simple English": [ + "e", + "a", + "t", + "i", + "o", + "n", + "s", + "r", + "h", + "l", + "d", + "c", + "m", + "u", + "f", + "p", + "g", + "w", + "b", + "y", + "v", + "k", + "j", + "x", + "z", + "q", + ], + "Thai": [ + "า", + "น", + "ร", + "อ", + "ก", + "เ", + "ง", + "ม", + "ย", + "ล", + "ว", + "ด", + "ท", + "ส", + "ต", + "ะ", + "ป", + "บ", + "ค", + "ห", + "แ", + "จ", + "พ", + "ช", + "ข", + "ใ", + ], + "Greek": [ + "α", + "τ", + "ο", + "ι", + "ε", + "ν", + "ρ", + "σ", + "κ", + "η", + "π", + "ς", + "υ", + "μ", + "λ", + "ί", + "ό", + "ά", + "γ", + "έ", + "δ", + "ή", + "ω", + "χ", + "θ", + "ύ", + ], + "Tamil": [ + "க", + "த", + "ப", + "ட", + "ர", + "ம", + "ல", + "ன", + "வ", + "ற", + "ய", + "ள", + "ச", + "ந", + "இ", + "ண", + "அ", + "ஆ", + "ழ", + "ங", + "எ", + "உ", + "ஒ", + "ஸ", + ], + "Classical Chinese": [ + "之", + "年", + "為", + "也", + "以", + "一", + "人", + "其", + "者", + "國", + "有", + "二", + "十", + "於", + "曰", + "三", + "不", + "大", + "而", + "子", + "中", + "五", + "四", + ], + "Kazakh": [ + "а", + "ы", + "е", + "н", + "т", + "р", + "л", + "і", + "д", + "с", + "м", + "қ", + "к", + "о", + "б", + "и", + "у", + "ғ", + "ж", + "ң", + "з", + "ш", + "й", + "п", + "г", + "ө", + ], +} diff --git a/lib/charset_normalizer/cd.py b/lib/charset_normalizer/cd.py index 8429a0eb..ee4b7424 100644 --- a/lib/charset_normalizer/cd.py +++ b/lib/charset_normalizer/cd.py @@ -1,8 +1,8 @@ import importlib from codecs import IncrementalDecoder -from collections import Counter, OrderedDict +from collections import Counter from functools import lru_cache -from typing import Dict, List, Optional, Tuple +from typing import Counter as TypeCounter, Dict, List, Optional, Tuple from .assets import FREQUENCIES from .constant import KO_NAMES, LANGUAGE_SUPPORTED_COUNT, TOO_SMALL_SEQUENCE, ZH_NAMES @@ -24,17 +24,19 @@ def encoding_unicode_range(iana_name: str) -> List[str]: if is_multi_byte_encoding(iana_name): raise IOError("Function not supported on multi-byte code page") - decoder = importlib.import_module("encodings.{}".format(iana_name)).IncrementalDecoder # type: ignore + decoder = importlib.import_module( + "encodings.{}".format(iana_name) + ).IncrementalDecoder - p = decoder(errors="ignore") # type: IncrementalDecoder - seen_ranges = {} # type: Dict[str, int] - character_count = 0 # type: int + p: IncrementalDecoder = decoder(errors="ignore") + seen_ranges: Dict[str, int] = {} + character_count: int = 0 for i in range(0x40, 0xFF): - chunk = p.decode(bytes([i])) # type: str + chunk: str = p.decode(bytes([i])) if chunk: - character_range = unicode_range(chunk) # type: Optional[str] + character_range: Optional[str] = unicode_range(chunk) if character_range is None: continue @@ -58,7 +60,7 @@ def unicode_range_languages(primary_range: str) -> List[str]: """ Return inferred languages used with a unicode range. """ - languages = [] # type: List[str] + languages: List[str] = [] for language, characters in FREQUENCIES.items(): for character in characters: @@ -75,8 +77,8 @@ def encoding_languages(iana_name: str) -> List[str]: Single-byte encoding language association. Some code page are heavily linked to particular language(s). This function does the correspondence. """ - unicode_ranges = encoding_unicode_range(iana_name) # type: List[str] - primary_range = None # type: Optional[str] + unicode_ranges: List[str] = encoding_unicode_range(iana_name) + primary_range: Optional[str] = None for specified_range in unicode_ranges: if "Latin" not in specified_range: @@ -115,8 +117,8 @@ def get_target_features(language: str) -> Tuple[bool, bool]: """ Determine main aspects from a supported language if it contains accents and if is pure Latin. """ - target_have_accents = False # type: bool - target_pure_latin = True # type: bool + target_have_accents: bool = False + target_pure_latin: bool = True for character in FREQUENCIES[language]: if not target_have_accents and is_accentuated(character): @@ -133,7 +135,7 @@ def alphabet_languages( """ Return associated languages associated to given characters. """ - languages = [] # type: List[Tuple[str, float]] + languages: List[Tuple[str, float]] = [] source_have_accents = any(is_accentuated(character) for character in characters) @@ -147,13 +149,13 @@ def alphabet_languages( if target_have_accents is False and source_have_accents: continue - character_count = len(language_characters) # type: int + character_count: int = len(language_characters) - character_match_count = len( + character_match_count: int = len( [c for c in language_characters if c in characters] - ) # type: int + ) - ratio = character_match_count / character_count # type: float + ratio: float = character_match_count / character_count if ratio >= 0.2: languages.append((language, ratio)) @@ -174,36 +176,33 @@ def characters_popularity_compare( if language not in FREQUENCIES: raise ValueError("{} not available".format(language)) - character_approved_count = 0 # type: int + character_approved_count: int = 0 + FREQUENCIES_language_set = set(FREQUENCIES[language]) for character in ordered_characters: - if character not in FREQUENCIES[language]: + if character not in FREQUENCIES_language_set: continue - characters_before_source = FREQUENCIES[language][ + characters_before_source: List[str] = FREQUENCIES[language][ 0 : FREQUENCIES[language].index(character) - ] # type: List[str] - characters_after_source = FREQUENCIES[language][ + ] + characters_after_source: List[str] = FREQUENCIES[language][ FREQUENCIES[language].index(character) : - ] # type: List[str] - - characters_before = ordered_characters[ + ] + characters_before: List[str] = ordered_characters[ 0 : ordered_characters.index(character) - ] # type: List[str] - characters_after = ordered_characters[ + ] + characters_after: List[str] = ordered_characters[ ordered_characters.index(character) : - ] # type: List[str] + ] - before_match_count = [ - e in characters_before for e in characters_before_source - ].count( - True - ) # type: int - after_match_count = [ - e in characters_after for e in characters_after_source - ].count( - True - ) # type: int + before_match_count: int = len( + set(characters_before) & set(characters_before_source) + ) + + after_match_count: int = len( + set(characters_after) & set(characters_after_source) + ) if len(characters_before_source) == 0 and before_match_count <= 4: character_approved_count += 1 @@ -229,18 +228,18 @@ def alpha_unicode_split(decoded_sequence: str) -> List[str]: Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list; One containing the latin letters and the other hebrew. """ - layers = OrderedDict() # type: Dict[str, str] + layers: Dict[str, str] = {} for character in decoded_sequence: if character.isalpha() is False: continue - character_range = unicode_range(character) # type: Optional[str] + character_range: Optional[str] = unicode_range(character) if character_range is None: continue - layer_target_range = None # type: Optional[str] + layer_target_range: Optional[str] = None for discovered_range in layers: if ( @@ -267,7 +266,7 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches: This function merge results previously given by the function coherence_ratio. The return type is the same as coherence_ratio. """ - per_language_ratios = OrderedDict() # type: Dict[str, List[float]] + per_language_ratios: Dict[str, List[float]] = {} for result in results: for sub_result in result: language, ratio = sub_result @@ -299,10 +298,10 @@ def coherence_ratio( A layer = Character extraction by alphabets/ranges. """ - results = [] # type: List[Tuple[str, float]] - ignore_non_latin = False # type: bool + results: List[Tuple[str, float]] = [] + ignore_non_latin: bool = False - sufficient_match_count = 0 # type: int + sufficient_match_count: int = 0 lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else [] if "Latin Based" in lg_inclusion_list: @@ -310,22 +309,22 @@ def coherence_ratio( lg_inclusion_list.remove("Latin Based") for layer in alpha_unicode_split(decoded_sequence): - sequence_frequencies = Counter(layer) # type: Counter + sequence_frequencies: TypeCounter[str] = Counter(layer) most_common = sequence_frequencies.most_common() - character_count = sum(o for c, o in most_common) # type: int + character_count: int = sum(o for c, o in most_common) if character_count <= TOO_SMALL_SEQUENCE: continue - popular_character_ordered = [c for c, o in most_common] # type: List[str] + popular_character_ordered: List[str] = [c for c, o in most_common] for language in lg_inclusion_list or alphabet_languages( popular_character_ordered, ignore_non_latin ): - ratio = characters_popularity_compare( + ratio: float = characters_popularity_compare( language, popular_character_ordered - ) # type: float + ) if ratio < threshold: continue diff --git a/lib/charset_normalizer/cli/normalizer.py b/lib/charset_normalizer/cli/normalizer.py index 5f912c92..b8b652a5 100644 --- a/lib/charset_normalizer/cli/normalizer.py +++ b/lib/charset_normalizer/cli/normalizer.py @@ -3,7 +3,12 @@ import sys from json import dumps from os.path import abspath from platform import python_version -from typing import List +from typing import List, Optional + +try: + from unicodedata2 import unidata_version +except ImportError: + from unicodedata import unidata_version from charset_normalizer import from_fp from charset_normalizer.models import CliDetectionResult @@ -43,7 +48,7 @@ def query_yes_no(question: str, default: str = "yes") -> bool: sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") -def cli_detect(argv: List[str] = None) -> int: +def cli_detect(argv: Optional[List[str]] = None) -> int: """ CLI assistant using ARGV and ArgumentParser :param argv: @@ -111,7 +116,7 @@ def cli_detect(argv: List[str] = None) -> int: "-t", "--threshold", action="store", - default=0.1, + default=0.2, type=float, dest="threshold", help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.", @@ -119,8 +124,8 @@ def cli_detect(argv: List[str] = None) -> int: parser.add_argument( "--version", action="version", - version="Charset-Normalizer {} - Python {}".format( - __version__, python_version() + version="Charset-Normalizer {} - Python {} - Unicode {}".format( + __version__, python_version(), unidata_version ), help="Show version information and exit.", ) @@ -229,7 +234,7 @@ def cli_detect(argv: List[str] = None) -> int: my_file.close() continue - o_ = my_file.name.split(".") # type: List[str] + o_: List[str] = my_file.name.split(".") if args.replace is False: o_.insert(-1, best_guess.encoding) diff --git a/lib/charset_normalizer/constant.py b/lib/charset_normalizer/constant.py index c32f5cf2..ac840c46 100644 --- a/lib/charset_normalizer/constant.py +++ b/lib/charset_normalizer/constant.py @@ -1,5 +1,4 @@ from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE -from collections import OrderedDict from encodings.aliases import aliases from re import IGNORECASE, compile as re_compile from typing import Dict, List, Set, Union @@ -7,31 +6,26 @@ from typing import Dict, List, Set, Union from .assets import FREQUENCIES # Contain for each eligible encoding a list of/item bytes SIG/BOM -ENCODING_MARKS = OrderedDict( - [ - ("utf_8", BOM_UTF8), - ( - "utf_7", - [ - b"\x2b\x2f\x76\x38", - b"\x2b\x2f\x76\x39", - b"\x2b\x2f\x76\x2b", - b"\x2b\x2f\x76\x2f", - b"\x2b\x2f\x76\x38\x2d", - ], - ), - ("gb18030", b"\x84\x31\x95\x33"), - ("utf_32", [BOM_UTF32_BE, BOM_UTF32_LE]), - ("utf_16", [BOM_UTF16_BE, BOM_UTF16_LE]), - ] -) # type: Dict[str, Union[bytes, List[bytes]]] +ENCODING_MARKS: Dict[str, Union[bytes, List[bytes]]] = { + "utf_8": BOM_UTF8, + "utf_7": [ + b"\x2b\x2f\x76\x38", + b"\x2b\x2f\x76\x39", + b"\x2b\x2f\x76\x2b", + b"\x2b\x2f\x76\x2f", + b"\x2b\x2f\x76\x38\x2d", + ], + "gb18030": b"\x84\x31\x95\x33", + "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE], + "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE], +} -TOO_SMALL_SEQUENCE = 32 # type: int -TOO_BIG_SEQUENCE = int(10e6) # type: int +TOO_SMALL_SEQUENCE: int = 32 +TOO_BIG_SEQUENCE: int = int(10e6) -UTF8_MAXIMAL_ALLOCATION = 1112064 # type: int +UTF8_MAXIMAL_ALLOCATION: int = 1112064 -UNICODE_RANGES_COMBINED = { +UNICODE_RANGES_COMBINED: Dict[str, range] = { "Control character": range(31 + 1), "Basic Latin": range(32, 127 + 1), "Latin-1 Supplement": range(128, 255 + 1), @@ -311,10 +305,10 @@ UNICODE_RANGES_COMBINED = { "CJK Compatibility Ideographs Supplement": range(194560, 195103 + 1), "Tags": range(917504, 917631 + 1), "Variation Selectors Supplement": range(917760, 917999 + 1), -} # type: Dict[str, range] +} -UNICODE_SECONDARY_RANGE_KEYWORD = [ +UNICODE_SECONDARY_RANGE_KEYWORD: List[str] = [ "Supplement", "Extended", "Extensions", @@ -330,25 +324,25 @@ UNICODE_SECONDARY_RANGE_KEYWORD = [ "Shapes", "Supplemental", "Tags", -] # type: List[str] +] RE_POSSIBLE_ENCODING_INDICATION = re_compile( r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)", IGNORECASE, ) -IANA_SUPPORTED = sorted( +IANA_SUPPORTED: List[str] = sorted( filter( lambda x: x.endswith("_codec") is False and x not in {"rot_13", "tactis", "mbcs"}, list(set(aliases.values())), ) -) # type: List[str] +) -IANA_SUPPORTED_COUNT = len(IANA_SUPPORTED) # type: int +IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED) # pre-computed code page that are similar using the function cp_similarity. -IANA_SUPPORTED_SIMILAR = { +IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = { "cp037": ["cp1026", "cp1140", "cp273", "cp500"], "cp1026": ["cp037", "cp1140", "cp273", "cp500"], "cp1125": ["cp866"], @@ -434,10 +428,10 @@ IANA_SUPPORTED_SIMILAR = { "mac_turkish": ["mac_iceland", "mac_roman"], "ptcp154": ["cp1251", "kz1048"], "tis_620": ["iso8859_11"], -} # type: Dict[str, List[str]] +} -CHARDET_CORRESPONDENCE = { +CHARDET_CORRESPONDENCE: Dict[str, str] = { "iso2022_kr": "ISO-2022-KR", "iso2022_jp": "ISO-2022-JP", "euc_kr": "EUC-KR", @@ -470,10 +464,10 @@ CHARDET_CORRESPONDENCE = { "cp1256": "windows-1256", "cp1254": "Windows-1254", "cp949": "CP949", -} # type: Dict[str, str] +} -COMMON_SAFE_ASCII_CHARACTERS = { +COMMON_SAFE_ASCII_CHARACTERS: Set[str] = { "<", ">", "=", @@ -489,15 +483,15 @@ COMMON_SAFE_ASCII_CHARACTERS = { "|", '"', "-", -} # type: Set[str] +} -KO_NAMES = {"johab", "cp949", "euc_kr"} # type: Set[str] -ZH_NAMES = {"big5", "cp950", "big5hkscs", "hz"} # type: Set[str] +KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"} +ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"} NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+") -LANGUAGE_SUPPORTED_COUNT = len(FREQUENCIES) # type: int +LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES) # Logging LEVEL bellow DEBUG -TRACE = 5 # type: int +TRACE: int = 5 diff --git a/lib/charset_normalizer/md.py b/lib/charset_normalizer/md.py index f3d6505c..31808af8 100644 --- a/lib/charset_normalizer/md.py +++ b/lib/charset_normalizer/md.py @@ -16,6 +16,7 @@ from .utils import ( is_separator, is_symbol, is_thai, + is_unprintable, remove_accent, unicode_range, ) @@ -57,12 +58,12 @@ class MessDetectorPlugin: class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin): def __init__(self) -> None: - self._punctuation_count = 0 # type: int - self._symbol_count = 0 # type: int - self._character_count = 0 # type: int + self._punctuation_count: int = 0 + self._symbol_count: int = 0 + self._character_count: int = 0 - self._last_printable_char = None # type: Optional[str] - self._frenzy_symbol_in_word = False # type: bool + self._last_printable_char: Optional[str] = None + self._frenzy_symbol_in_word: bool = False def eligible(self, character: str) -> bool: return character.isprintable() @@ -95,17 +96,17 @@ class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin): if self._character_count == 0: return 0.0 - ratio_of_punctuation = ( + ratio_of_punctuation: float = ( self._punctuation_count + self._symbol_count - ) / self._character_count # type: float + ) / self._character_count return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0 class TooManyAccentuatedPlugin(MessDetectorPlugin): def __init__(self) -> None: - self._character_count = 0 # type: int - self._accentuated_count = 0 # type: int + self._character_count: int = 0 + self._accentuated_count: int = 0 def eligible(self, character: str) -> bool: return character.isalpha() @@ -124,26 +125,20 @@ class TooManyAccentuatedPlugin(MessDetectorPlugin): def ratio(self) -> float: if self._character_count == 0: return 0.0 - ratio_of_accentuation = ( - self._accentuated_count / self._character_count - ) # type: float + ratio_of_accentuation: float = self._accentuated_count / self._character_count return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0 class UnprintablePlugin(MessDetectorPlugin): def __init__(self) -> None: - self._unprintable_count = 0 # type: int - self._character_count = 0 # type: int + self._unprintable_count: int = 0 + self._character_count: int = 0 def eligible(self, character: str) -> bool: return True def feed(self, character: str) -> None: - if ( - character.isspace() is False # includes \n \t \r \v - and character.isprintable() is False - and character != "\x1A" # Why? Its the ASCII substitute character. - ): + if is_unprintable(character): self._unprintable_count += 1 self._character_count += 1 @@ -160,10 +155,10 @@ class UnprintablePlugin(MessDetectorPlugin): class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin): def __init__(self) -> None: - self._successive_count = 0 # type: int - self._character_count = 0 # type: int + self._successive_count: int = 0 + self._character_count: int = 0 - self._last_latin_character = None # type: Optional[str] + self._last_latin_character: Optional[str] = None def eligible(self, character: str) -> bool: return character.isalpha() and is_latin(character) @@ -197,9 +192,9 @@ class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin): class SuspiciousRange(MessDetectorPlugin): def __init__(self) -> None: - self._suspicious_successive_range_count = 0 # type: int - self._character_count = 0 # type: int - self._last_printable_seen = None # type: Optional[str] + self._suspicious_successive_range_count: int = 0 + self._character_count: int = 0 + self._last_printable_seen: Optional[str] = None def eligible(self, character: str) -> bool: return character.isprintable() @@ -219,10 +214,8 @@ class SuspiciousRange(MessDetectorPlugin): self._last_printable_seen = character return - unicode_range_a = unicode_range( - self._last_printable_seen - ) # type: Optional[str] - unicode_range_b = unicode_range(character) # type: Optional[str] + unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen) + unicode_range_b: Optional[str] = unicode_range(character) if is_suspiciously_successive_range(unicode_range_a, unicode_range_b): self._suspicious_successive_range_count += 1 @@ -239,9 +232,9 @@ class SuspiciousRange(MessDetectorPlugin): if self._character_count == 0: return 0.0 - ratio_of_suspicious_range_usage = ( + ratio_of_suspicious_range_usage: float = ( self._suspicious_successive_range_count * 2 - ) / self._character_count # type: float + ) / self._character_count if ratio_of_suspicious_range_usage < 0.1: return 0.0 @@ -251,25 +244,25 @@ class SuspiciousRange(MessDetectorPlugin): class SuperWeirdWordPlugin(MessDetectorPlugin): def __init__(self) -> None: - self._word_count = 0 # type: int - self._bad_word_count = 0 # type: int - self._foreign_long_count = 0 # type: int + self._word_count: int = 0 + self._bad_word_count: int = 0 + self._foreign_long_count: int = 0 - self._is_current_word_bad = False # type: bool - self._foreign_long_watch = False # type: bool + self._is_current_word_bad: bool = False + self._foreign_long_watch: bool = False - self._character_count = 0 # type: int - self._bad_character_count = 0 # type: int + self._character_count: int = 0 + self._bad_character_count: int = 0 - self._buffer = "" # type: str - self._buffer_accent_count = 0 # type: int + self._buffer: str = "" + self._buffer_accent_count: int = 0 def eligible(self, character: str) -> bool: return True def feed(self, character: str) -> None: if character.isalpha(): - self._buffer = "".join([self._buffer, character]) + self._buffer += character if is_accentuated(character): self._buffer_accent_count += 1 if ( @@ -289,7 +282,7 @@ class SuperWeirdWordPlugin(MessDetectorPlugin): character.isspace() or is_punctuation(character) or is_separator(character) ) and self._buffer: self._word_count += 1 - buffer_length = len(self._buffer) # type: int + buffer_length: int = len(self._buffer) self._character_count += buffer_length @@ -346,8 +339,8 @@ class CjkInvalidStopPlugin(MessDetectorPlugin): """ def __init__(self) -> None: - self._wrong_stop_count = 0 # type: int - self._cjk_character_count = 0 # type: int + self._wrong_stop_count: int = 0 + self._cjk_character_count: int = 0 def eligible(self, character: str) -> bool: return True @@ -372,17 +365,17 @@ class CjkInvalidStopPlugin(MessDetectorPlugin): class ArchaicUpperLowerPlugin(MessDetectorPlugin): def __init__(self) -> None: - self._buf = False # type: bool + self._buf: bool = False - self._character_count_since_last_sep = 0 # type: int + self._character_count_since_last_sep: int = 0 - self._successive_upper_lower_count = 0 # type: int - self._successive_upper_lower_count_final = 0 # type: int + self._successive_upper_lower_count: int = 0 + self._successive_upper_lower_count_final: int = 0 - self._character_count = 0 # type: int + self._character_count: int = 0 - self._last_alpha_seen = None # type: Optional[str] - self._current_ascii_only = True # type: bool + self._last_alpha_seen: Optional[str] = None + self._current_ascii_only: bool = True def eligible(self, character: str) -> bool: return True @@ -446,6 +439,7 @@ class ArchaicUpperLowerPlugin(MessDetectorPlugin): return self._successive_upper_lower_count_final / self._character_count +@lru_cache(maxsize=1024) def is_suspiciously_successive_range( unicode_range_a: Optional[str], unicode_range_b: Optional[str] ) -> bool: @@ -524,16 +518,16 @@ def mess_ratio( Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. """ - detectors = [ + detectors: List[MessDetectorPlugin] = [ md_class() for md_class in MessDetectorPlugin.__subclasses__() - ] # type: List[MessDetectorPlugin] + ] - length = len(decoded_sequence) + 1 # type: int + length: int = len(decoded_sequence) + 1 - mean_mess_ratio = 0.0 # type: float + mean_mess_ratio: float = 0.0 if length < 512: - intermediary_mean_mess_ratio_calc = 32 # type: int + intermediary_mean_mess_ratio_calc: int = 32 elif length <= 1024: intermediary_mean_mess_ratio_calc = 64 else: diff --git a/lib/charset_normalizer/models.py b/lib/charset_normalizer/models.py index c38da31f..ccb0d475 100644 --- a/lib/charset_normalizer/models.py +++ b/lib/charset_normalizer/models.py @@ -4,7 +4,16 @@ from encodings.aliases import aliases from hashlib import sha256 from json import dumps from re import sub -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union +from typing import ( + Any, + Counter as TypeCounter, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, +) from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE from .md import mess_ratio @@ -21,21 +30,21 @@ class CharsetMatch: languages: "CoherenceMatches", decoded_payload: Optional[str] = None, ): - self._payload = payload # type: bytes + self._payload: bytes = payload - self._encoding = guessed_encoding # type: str - self._mean_mess_ratio = mean_mess_ratio # type: float - self._languages = languages # type: CoherenceMatches - self._has_sig_or_bom = has_sig_or_bom # type: bool - self._unicode_ranges = None # type: Optional[List[str]] + self._encoding: str = guessed_encoding + self._mean_mess_ratio: float = mean_mess_ratio + self._languages: CoherenceMatches = languages + self._has_sig_or_bom: bool = has_sig_or_bom + self._unicode_ranges: Optional[List[str]] = None - self._leaves = [] # type: List[CharsetMatch] - self._mean_coherence_ratio = 0.0 # type: float + self._leaves: List[CharsetMatch] = [] + self._mean_coherence_ratio: float = 0.0 - self._output_payload = None # type: Optional[bytes] - self._output_encoding = None # type: Optional[str] + self._output_payload: Optional[bytes] = None + self._output_encoding: Optional[str] = None - self._string = decoded_payload # type: Optional[str] + self._string: Optional[str] = decoded_payload def __eq__(self, other: object) -> bool: if not isinstance(other, CharsetMatch): @@ -53,8 +62,8 @@ class CharsetMatch: if not isinstance(other, CharsetMatch): raise ValueError - chaos_difference = abs(self.chaos - other.chaos) # type: float - coherence_difference = abs(self.coherence - other.coherence) # type: float + chaos_difference: float = abs(self.chaos - other.chaos) + coherence_difference: float = abs(self.coherence - other.coherence) # Bellow 1% difference --> Use Coherence if chaos_difference < 0.01 and coherence_difference > 0.02: @@ -95,7 +104,7 @@ class CharsetMatch: return 0.0 @property - def w_counter(self) -> Counter: + def w_counter(self) -> TypeCounter[str]: """ Word counter instance on decoded text. Notice: Will be removed in 3.0 @@ -137,7 +146,7 @@ class CharsetMatch: """ Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855. """ - also_known_as = [] # type: List[str] + also_known_as: List[str] = [] for u, p in aliases.items(): if self.encoding == u: also_known_as.append(p) @@ -227,9 +236,9 @@ class CharsetMatch: if self._unicode_ranges is not None: return self._unicode_ranges # list detected ranges - detected_ranges = [ + detected_ranges: List[Optional[str]] = [ unicode_range(char) for char in str(self) - ] # type: List[Optional[str]] + ] # filter and sort self._unicode_ranges = sorted(list({r for r in detected_ranges if r})) return self._unicode_ranges @@ -280,8 +289,8 @@ class CharsetMatches: Act like a list(iterable) but does not implements all related methods. """ - def __init__(self, results: List[CharsetMatch] = None): - self._results = sorted(results) if results else [] # type: List[CharsetMatch] + def __init__(self, results: Optional[List[CharsetMatch]] = None): + self._results: List[CharsetMatch] = sorted(results) if results else [] def __iter__(self) -> Iterator[CharsetMatch]: yield from self._results @@ -360,17 +369,17 @@ class CliDetectionResult: unicode_path: Optional[str], is_preferred: bool, ): - self.path = path # type: str - self.unicode_path = unicode_path # type: Optional[str] - self.encoding = encoding # type: Optional[str] - self.encoding_aliases = encoding_aliases # type: List[str] - self.alternative_encodings = alternative_encodings # type: List[str] - self.language = language # type: str - self.alphabets = alphabets # type: List[str] - self.has_sig_or_bom = has_sig_or_bom # type: bool - self.chaos = chaos # type: float - self.coherence = coherence # type: float - self.is_preferred = is_preferred # type: bool + self.path: str = path + self.unicode_path: Optional[str] = unicode_path + self.encoding: Optional[str] = encoding + self.encoding_aliases: List[str] = encoding_aliases + self.alternative_encodings: List[str] = alternative_encodings + self.language: str = language + self.alphabets: List[str] = alphabets + self.has_sig_or_bom: bool = has_sig_or_bom + self.chaos: float = chaos + self.coherence: float = coherence + self.is_preferred: bool = is_preferred @property def __dict__(self) -> Dict[str, Any]: # type: ignore diff --git a/lib/charset_normalizer/utils.py b/lib/charset_normalizer/utils.py index dcb14dfe..859f212b 100644 --- a/lib/charset_normalizer/utils.py +++ b/lib/charset_normalizer/utils.py @@ -1,4 +1,6 @@ try: + # WARNING: unicodedata2 support is going to be removed in 3.0 + # Python is quickly catching up. import unicodedata2 as unicodedata except ImportError: import unicodedata # type: ignore[no-redef] @@ -9,9 +11,9 @@ from codecs import IncrementalDecoder from encodings.aliases import aliases from functools import lru_cache from re import findall -from typing import List, Optional, Set, Tuple, Union +from typing import Generator, List, Optional, Set, Tuple, Union -from _multibytecodec import MultibyteIncrementalDecoder # type: ignore +from _multibytecodec import MultibyteIncrementalDecoder from .constant import ( ENCODING_MARKS, @@ -26,7 +28,7 @@ from .constant import ( @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def is_accentuated(character: str) -> bool: try: - description = unicodedata.name(character) # type: str + description: str = unicodedata.name(character) except ValueError: return False return ( @@ -41,11 +43,11 @@ def is_accentuated(character: str) -> bool: @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def remove_accent(character: str) -> str: - decomposed = unicodedata.decomposition(character) # type: str + decomposed: str = unicodedata.decomposition(character) if not decomposed: return character - codes = decomposed.split(" ") # type: List[str] + codes: List[str] = decomposed.split(" ") return chr(int(codes[0], 16)) @@ -55,7 +57,7 @@ def unicode_range(character: str) -> Optional[str]: """ Retrieve the Unicode range official name from a single character. """ - character_ord = ord(character) # type: int + character_ord: int = ord(character) for range_name, ord_range in UNICODE_RANGES_COMBINED.items(): if character_ord in ord_range: @@ -67,12 +69,13 @@ def unicode_range(character: str) -> Optional[str]: @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def is_latin(character: str) -> bool: try: - description = unicodedata.name(character) # type: str + description: str = unicodedata.name(character) except ValueError: return False return "LATIN" in description +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def is_ascii(character: str) -> bool: try: character.encode("ascii") @@ -83,12 +86,12 @@ def is_ascii(character: str) -> bool: @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def is_punctuation(character: str) -> bool: - character_category = unicodedata.category(character) # type: str + character_category: str = unicodedata.category(character) if "P" in character_category: return True - character_range = unicode_range(character) # type: Optional[str] + character_range: Optional[str] = unicode_range(character) if character_range is None: return False @@ -98,12 +101,12 @@ def is_punctuation(character: str) -> bool: @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def is_symbol(character: str) -> bool: - character_category = unicodedata.category(character) # type: str + character_category: str = unicodedata.category(character) if "S" in character_category or "N" in character_category: return True - character_range = unicode_range(character) # type: Optional[str] + character_range: Optional[str] = unicode_range(character) if character_range is None: return False @@ -113,7 +116,7 @@ def is_symbol(character: str) -> bool: @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) def is_emoticon(character: str) -> bool: - character_range = unicode_range(character) # type: Optional[str] + character_range: Optional[str] = unicode_range(character) if character_range is None: return False @@ -126,7 +129,7 @@ def is_separator(character: str) -> bool: if character.isspace() or character in {"|", "+", ",", ";", "<", ">"}: return True - character_category = unicodedata.category(character) # type: str + character_category: str = unicodedata.category(character) return "Z" in character_category @@ -137,7 +140,7 @@ def is_case_variable(character: str) -> bool: def is_private_use_only(character: str) -> bool: - character_category = unicodedata.category(character) # type: str + character_category: str = unicodedata.category(character) return character_category == "Co" @@ -197,6 +200,17 @@ def is_unicode_range_secondary(range_name: str) -> bool: return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD) +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_unprintable(character: str) -> bool: + return ( + character.isspace() is False # includes \n \t \r \v + and character.isprintable() is False + and character != "\x1A" # Why? Its the ASCII substitute character. + and character != "\ufeff" # bug discovered in Python, + # Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space. + ) + + def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional[str]: """ Extract using ASCII-only decoder any specified encoding in the first n-bytes. @@ -204,12 +218,12 @@ def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional if not isinstance(sequence, bytes): raise TypeError - seq_len = len(sequence) # type: int + seq_len: int = len(sequence) - results = findall( + results: List[str] = findall( RE_POSSIBLE_ENCODING_INDICATION, sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"), - ) # type: List[str] + ) if len(results) == 0: return None @@ -217,6 +231,9 @@ def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional for specified_encoding in results: specified_encoding = specified_encoding.lower().replace("-", "_") + encoding_alias: str + encoding_iana: str + for encoding_alias, encoding_iana in aliases.items(): if encoding_alias == specified_encoding: return encoding_iana @@ -242,7 +259,7 @@ def is_multi_byte_encoding(name: str) -> bool: "utf_32_be", "utf_7", } or issubclass( - importlib.import_module("encodings.{}".format(name)).IncrementalDecoder, # type: ignore + importlib.import_module("encodings.{}".format(name)).IncrementalDecoder, MultibyteIncrementalDecoder, ) @@ -253,7 +270,7 @@ def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]: """ for iana_encoding in ENCODING_MARKS: - marks = ENCODING_MARKS[iana_encoding] # type: Union[bytes, List[bytes]] + marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding] if isinstance(marks, bytes): marks = [marks] @@ -272,6 +289,9 @@ def should_strip_sig_or_bom(iana_encoding: str) -> bool: def iana_name(cp_name: str, strict: bool = True) -> str: cp_name = cp_name.lower().replace("-", "_") + encoding_alias: str + encoding_iana: str + for encoding_alias, encoding_iana in aliases.items(): if cp_name in [encoding_alias, encoding_iana]: return encoding_iana @@ -283,10 +303,10 @@ def iana_name(cp_name: str, strict: bool = True) -> str: def range_scan(decoded_sequence: str) -> List[str]: - ranges = set() # type: Set[str] + ranges: Set[str] = set() for character in decoded_sequence: - character_range = unicode_range(character) # type: Optional[str] + character_range: Optional[str] = unicode_range(character) if character_range is None: continue @@ -301,16 +321,20 @@ def cp_similarity(iana_name_a: str, iana_name_b: str) -> float: if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b): return 0.0 - decoder_a = importlib.import_module("encodings.{}".format(iana_name_a)).IncrementalDecoder # type: ignore - decoder_b = importlib.import_module("encodings.{}".format(iana_name_b)).IncrementalDecoder # type: ignore + decoder_a = importlib.import_module( + "encodings.{}".format(iana_name_a) + ).IncrementalDecoder + decoder_b = importlib.import_module( + "encodings.{}".format(iana_name_b) + ).IncrementalDecoder - id_a = decoder_a(errors="ignore") # type: IncrementalDecoder - id_b = decoder_b(errors="ignore") # type: IncrementalDecoder + id_a: IncrementalDecoder = decoder_a(errors="ignore") + id_b: IncrementalDecoder = decoder_b(errors="ignore") - character_match_count = 0 # type: int + character_match_count: int = 0 for i in range(255): - to_be_decoded = bytes([i]) # type: bytes + to_be_decoded: bytes = bytes([i]) if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded): character_match_count += 1 @@ -340,3 +364,61 @@ def set_logging_handler( handler = logging.StreamHandler() handler.setFormatter(logging.Formatter(format_string)) logger.addHandler(handler) + + +def cut_sequence_chunks( + sequences: bytes, + encoding_iana: str, + offsets: range, + chunk_size: int, + bom_or_sig_available: bool, + strip_sig_or_bom: bool, + sig_payload: bytes, + is_multi_byte_decoder: bool, + decoded_payload: Optional[str] = None, +) -> Generator[str, None, None]: + + if decoded_payload and is_multi_byte_decoder is False: + for i in offsets: + chunk = decoded_payload[i : i + chunk_size] + if not chunk: + break + yield chunk + else: + for i in offsets: + chunk_end = i + chunk_size + if chunk_end > len(sequences) + 8: + continue + + cut_sequence = sequences[i : i + chunk_size] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode( + encoding_iana, + errors="ignore" if is_multi_byte_decoder else "strict", + ) + + # multi-byte bad cutting detector and adjustment + # not the cleanest way to perform that fix but clever enough for now. + if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80: + + chunk_partial_size_chk: int = min(chunk_size, 16) + + if ( + decoded_payload + and chunk[:chunk_partial_size_chk] not in decoded_payload + ): + for j in range(i, i - 4, -1): + cut_sequence = sequences[j:chunk_end] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode(encoding_iana, errors="ignore") + + if chunk[:chunk_partial_size_chk] in decoded_payload: + break + + yield chunk diff --git a/lib/charset_normalizer/version.py b/lib/charset_normalizer/version.py index 77cfff25..64c0dbde 100644 --- a/lib/charset_normalizer/version.py +++ b/lib/charset_normalizer/version.py @@ -2,5 +2,5 @@ Expose version """ -__version__ = "2.0.12" +__version__ = "2.1.1" VERSION = __version__.split(".") diff --git a/lib/idna/core.py b/lib/idna/core.py index 55ab9678..4f300371 100644 --- a/lib/idna/core.py +++ b/lib/idna/core.py @@ -339,7 +339,10 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes: if isinstance(s, (bytes, bytearray)): - s = s.decode('ascii') + try: + s = s.decode('ascii') + except UnicodeDecodeError: + raise IDNAError('should pass a unicode string to the function rather than a byte string.') if uts46: s = uts46_remap(s, std3_rules, transitional) trailing_dot = False diff --git a/lib/idna/idnadata.py b/lib/idna/idnadata.py index 1b5805d1..67db4625 100644 --- a/lib/idna/idnadata.py +++ b/lib/idna/idnadata.py @@ -1,6 +1,6 @@ # This file is automatically generated by tools/idna-data -__version__ = '14.0.0' +__version__ = '15.0.0' scripts = { 'Greek': ( 0x37000000374, @@ -55,12 +55,13 @@ scripts = { 0x16fe200016fe4, 0x16ff000016ff2, 0x200000002a6e0, - 0x2a7000002b739, + 0x2a7000002b73a, 0x2b7400002b81e, 0x2b8200002cea2, 0x2ceb00002ebe1, 0x2f8000002fa1e, 0x300000003134b, + 0x31350000323b0, ), 'Hebrew': ( 0x591000005c8, @@ -77,6 +78,7 @@ scripts = { 0x304100003097, 0x309d000030a0, 0x1b0010001b120, + 0x1b1320001b133, 0x1b1500001b153, 0x1f2000001f201, ), @@ -93,6 +95,7 @@ scripts = { 0x1affd0001afff, 0x1b0000001b001, 0x1b1200001b123, + 0x1b1550001b156, 0x1b1640001b168, ), } @@ -1331,7 +1334,7 @@ codepoint_classes = { 0xcdd00000cdf, 0xce000000ce4, 0xce600000cf0, - 0xcf100000cf3, + 0xcf100000cf4, 0xd0000000d0d, 0xd0e00000d11, 0xd1200000d45, @@ -1366,7 +1369,7 @@ codepoint_classes = { 0xeb400000ebe, 0xec000000ec5, 0xec600000ec7, - 0xec800000ece, + 0xec800000ecf, 0xed000000eda, 0xede00000ee0, 0xf0000000f01, @@ -1859,7 +1862,7 @@ codepoint_classes = { 0xab200000ab27, 0xab280000ab2f, 0xab300000ab5b, - 0xab600000ab6a, + 0xab600000ab69, 0xabc00000abeb, 0xabec0000abee, 0xabf00000abfa, @@ -1943,7 +1946,7 @@ codepoint_classes = { 0x10e8000010eaa, 0x10eab00010ead, 0x10eb000010eb2, - 0x10f0000010f1d, + 0x10efd00010f1d, 0x10f2700010f28, 0x10f3000010f51, 0x10f7000010f86, @@ -1966,7 +1969,7 @@ codepoint_classes = { 0x111dc000111dd, 0x1120000011212, 0x1121300011238, - 0x1123e0001123f, + 0x1123e00011242, 0x1128000011287, 0x1128800011289, 0x1128a0001128e, @@ -2047,11 +2050,16 @@ codepoint_classes = { 0x11d9300011d99, 0x11da000011daa, 0x11ee000011ef7, + 0x11f0000011f11, + 0x11f1200011f3b, + 0x11f3e00011f43, + 0x11f5000011f5a, 0x11fb000011fb1, 0x120000001239a, 0x1248000012544, 0x12f9000012ff1, - 0x130000001342f, + 0x1300000013430, + 0x1344000013456, 0x1440000014647, 0x1680000016a39, 0x16a4000016a5f, @@ -2079,7 +2087,9 @@ codepoint_classes = { 0x1aff50001affc, 0x1affd0001afff, 0x1b0000001b123, + 0x1b1320001b133, 0x1b1500001b153, + 0x1b1550001b156, 0x1b1640001b168, 0x1b1700001b2fc, 0x1bc000001bc6b, @@ -2096,17 +2106,21 @@ codepoint_classes = { 0x1da9b0001daa0, 0x1daa10001dab0, 0x1df000001df1f, + 0x1df250001df2b, 0x1e0000001e007, 0x1e0080001e019, 0x1e01b0001e022, 0x1e0230001e025, 0x1e0260001e02b, + 0x1e0300001e06e, + 0x1e08f0001e090, 0x1e1000001e12d, 0x1e1300001e13e, 0x1e1400001e14a, 0x1e14e0001e14f, 0x1e2900001e2af, 0x1e2c00001e2fa, + 0x1e4d00001e4fa, 0x1e7e00001e7e7, 0x1e7e80001e7ec, 0x1e7ed0001e7ef, @@ -2115,13 +2129,13 @@ codepoint_classes = { 0x1e8d00001e8d7, 0x1e9220001e94c, 0x1e9500001e95a, - 0x1fbf00001fbfa, 0x200000002a6e0, - 0x2a7000002b739, + 0x2a7000002b73a, 0x2b7400002b81e, 0x2b8200002cea2, 0x2ceb00002ebe1, 0x300000003134b, + 0x31350000323b0, ), 'CONTEXTJ': ( 0x200c0000200e, diff --git a/lib/idna/package_data.py b/lib/idna/package_data.py index f5ea87c1..8501893b 100644 --- a/lib/idna/package_data.py +++ b/lib/idna/package_data.py @@ -1,2 +1,2 @@ -__version__ = '3.3' +__version__ = '3.4' diff --git a/lib/idna/uts46data.py b/lib/idna/uts46data.py index 8f65705e..186796c1 100644 --- a/lib/idna/uts46data.py +++ b/lib/idna/uts46data.py @@ -7,7 +7,7 @@ from typing import List, Tuple, Union """IDNA Mapping Table from UTS46.""" -__version__ = '14.0.0' +__version__ = '15.0.0' def _seg_0() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ (0x0, '3'), @@ -1300,7 +1300,7 @@ def _seg_12() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0xCE6, 'V'), (0xCF0, 'X'), (0xCF1, 'V'), - (0xCF3, 'X'), + (0xCF4, 'X'), (0xD00, 'V'), (0xD0D, 'X'), (0xD0E, 'V'), @@ -1368,7 +1368,7 @@ def _seg_13() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0xEC6, 'V'), (0xEC7, 'X'), (0xEC8, 'V'), - (0xECE, 'X'), + (0xECF, 'X'), (0xED0, 'V'), (0xEDA, 'X'), (0xEDC, 'M', 'ຫນ'), @@ -5917,7 +5917,7 @@ def _seg_56() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x10EAE, 'X'), (0x10EB0, 'V'), (0x10EB2, 'X'), - (0x10F00, 'V'), + (0x10EFD, 'V'), (0x10F28, 'X'), (0x10F30, 'V'), (0x10F5A, 'X'), @@ -5956,7 +5956,7 @@ def _seg_57() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x11200, 'V'), (0x11212, 'X'), (0x11213, 'V'), - (0x1123F, 'X'), + (0x11242, 'X'), (0x11280, 'V'), (0x11287, 'X'), (0x11288, 'V'), @@ -6097,6 +6097,8 @@ def _seg_58() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x11AA3, 'X'), (0x11AB0, 'V'), (0x11AF9, 'X'), + (0x11B00, 'V'), + (0x11B0A, 'X'), (0x11C00, 'V'), (0x11C09, 'X'), (0x11C0A, 'V'), @@ -6139,13 +6141,19 @@ def _seg_58() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x11DAA, 'X'), (0x11EE0, 'V'), (0x11EF9, 'X'), - (0x11FB0, 'V'), - (0x11FB1, 'X'), - (0x11FC0, 'V'), + (0x11F00, 'V'), ] def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: return [ + (0x11F11, 'X'), + (0x11F12, 'V'), + (0x11F3B, 'X'), + (0x11F3E, 'V'), + (0x11F5A, 'X'), + (0x11FB0, 'V'), + (0x11FB1, 'X'), + (0x11FC0, 'V'), (0x11FF2, 'X'), (0x11FFF, 'V'), (0x1239A, 'X'), @@ -6158,7 +6166,9 @@ def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x12F90, 'V'), (0x12FF3, 'X'), (0x13000, 'V'), - (0x1342F, 'X'), + (0x13430, 'X'), + (0x13440, 'V'), + (0x13456, 'X'), (0x14400, 'V'), (0x14647, 'X'), (0x16800, 'V'), @@ -6236,6 +6246,10 @@ def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x18D00, 'V'), (0x18D09, 'X'), (0x1AFF0, 'V'), + ] + +def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1AFF4, 'X'), (0x1AFF5, 'V'), (0x1AFFC, 'X'), @@ -6243,13 +6257,13 @@ def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1AFFF, 'X'), (0x1B000, 'V'), (0x1B123, 'X'), + (0x1B132, 'V'), + (0x1B133, 'X'), (0x1B150, 'V'), (0x1B153, 'X'), + (0x1B155, 'V'), + (0x1B156, 'X'), (0x1B164, 'V'), - ] - -def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1B168, 'X'), (0x1B170, 'V'), (0x1B2FC, 'X'), @@ -6295,6 +6309,8 @@ def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D1EB, 'X'), (0x1D200, 'V'), (0x1D246, 'X'), + (0x1D2C0, 'V'), + (0x1D2D4, 'X'), (0x1D2E0, 'V'), (0x1D2F4, 'X'), (0x1D300, 'V'), @@ -6334,6 +6350,10 @@ def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D41E, 'M', 'e'), (0x1D41F, 'M', 'f'), (0x1D420, 'M', 'g'), + ] + +def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D421, 'M', 'h'), (0x1D422, 'M', 'i'), (0x1D423, 'M', 'j'), @@ -6350,10 +6370,6 @@ def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D42E, 'M', 'u'), (0x1D42F, 'M', 'v'), (0x1D430, 'M', 'w'), - ] - -def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D431, 'M', 'x'), (0x1D432, 'M', 'y'), (0x1D433, 'M', 'z'), @@ -6438,6 +6454,10 @@ def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D482, 'M', 'a'), (0x1D483, 'M', 'b'), (0x1D484, 'M', 'c'), + ] + +def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D485, 'M', 'd'), (0x1D486, 'M', 'e'), (0x1D487, 'M', 'f'), @@ -6454,10 +6474,6 @@ def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D492, 'M', 'q'), (0x1D493, 'M', 'r'), (0x1D494, 'M', 's'), - ] - -def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D495, 'M', 't'), (0x1D496, 'M', 'u'), (0x1D497, 'M', 'v'), @@ -6542,6 +6558,10 @@ def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D4E9, 'M', 'z'), (0x1D4EA, 'M', 'a'), (0x1D4EB, 'M', 'b'), + ] + +def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D4EC, 'M', 'c'), (0x1D4ED, 'M', 'd'), (0x1D4EE, 'M', 'e'), @@ -6558,10 +6578,6 @@ def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D4F9, 'M', 'p'), (0x1D4FA, 'M', 'q'), (0x1D4FB, 'M', 'r'), - ] - -def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D4FC, 'M', 's'), (0x1D4FD, 'M', 't'), (0x1D4FE, 'M', 'u'), @@ -6646,6 +6662,10 @@ def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D550, 'M', 'y'), (0x1D551, 'X'), (0x1D552, 'M', 'a'), + ] + +def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D553, 'M', 'b'), (0x1D554, 'M', 'c'), (0x1D555, 'M', 'd'), @@ -6662,10 +6682,6 @@ def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D560, 'M', 'o'), (0x1D561, 'M', 'p'), (0x1D562, 'M', 'q'), - ] - -def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D563, 'M', 'r'), (0x1D564, 'M', 's'), (0x1D565, 'M', 't'), @@ -6750,6 +6766,10 @@ def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D5B4, 'M', 'u'), (0x1D5B5, 'M', 'v'), (0x1D5B6, 'M', 'w'), + ] + +def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D5B7, 'M', 'x'), (0x1D5B8, 'M', 'y'), (0x1D5B9, 'M', 'z'), @@ -6766,10 +6786,6 @@ def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D5C4, 'M', 'k'), (0x1D5C5, 'M', 'l'), (0x1D5C6, 'M', 'm'), - ] - -def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D5C7, 'M', 'n'), (0x1D5C8, 'M', 'o'), (0x1D5C9, 'M', 'p'), @@ -6854,6 +6870,10 @@ def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D618, 'M', 'q'), (0x1D619, 'M', 'r'), (0x1D61A, 'M', 's'), + ] + +def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D61B, 'M', 't'), (0x1D61C, 'M', 'u'), (0x1D61D, 'M', 'v'), @@ -6870,10 +6890,6 @@ def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D628, 'M', 'g'), (0x1D629, 'M', 'h'), (0x1D62A, 'M', 'i'), - ] - -def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D62B, 'M', 'j'), (0x1D62C, 'M', 'k'), (0x1D62D, 'M', 'l'), @@ -6958,6 +6974,10 @@ def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D67C, 'M', 'm'), (0x1D67D, 'M', 'n'), (0x1D67E, 'M', 'o'), + ] + +def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D67F, 'M', 'p'), (0x1D680, 'M', 'q'), (0x1D681, 'M', 'r'), @@ -6974,10 +6994,6 @@ def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D68C, 'M', 'c'), (0x1D68D, 'M', 'd'), (0x1D68E, 'M', 'e'), - ] - -def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D68F, 'M', 'f'), (0x1D690, 'M', 'g'), (0x1D691, 'M', 'h'), @@ -7062,6 +7078,10 @@ def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D6E2, 'M', 'α'), (0x1D6E3, 'M', 'β'), (0x1D6E4, 'M', 'γ'), + ] + +def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D6E5, 'M', 'δ'), (0x1D6E6, 'M', 'ε'), (0x1D6E7, 'M', 'ζ'), @@ -7078,10 +7098,6 @@ def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D6F2, 'M', 'ρ'), (0x1D6F3, 'M', 'θ'), (0x1D6F4, 'M', 'σ'), - ] - -def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D6F5, 'M', 'τ'), (0x1D6F6, 'M', 'υ'), (0x1D6F7, 'M', 'φ'), @@ -7166,6 +7182,10 @@ def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D747, 'M', 'σ'), (0x1D749, 'M', 'τ'), (0x1D74A, 'M', 'υ'), + ] + +def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D74B, 'M', 'φ'), (0x1D74C, 'M', 'χ'), (0x1D74D, 'M', 'ψ'), @@ -7182,10 +7202,6 @@ def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D758, 'M', 'γ'), (0x1D759, 'M', 'δ'), (0x1D75A, 'M', 'ε'), - ] - -def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D75B, 'M', 'ζ'), (0x1D75C, 'M', 'η'), (0x1D75D, 'M', 'θ'), @@ -7270,6 +7286,10 @@ def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D7AD, 'M', 'δ'), (0x1D7AE, 'M', 'ε'), (0x1D7AF, 'M', 'ζ'), + ] + +def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1D7B0, 'M', 'η'), (0x1D7B1, 'M', 'θ'), (0x1D7B2, 'M', 'ι'), @@ -7286,10 +7306,6 @@ def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1D7BE, 'M', 'υ'), (0x1D7BF, 'M', 'φ'), (0x1D7C0, 'M', 'χ'), - ] - -def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1D7C1, 'M', 'ψ'), (0x1D7C2, 'M', 'ω'), (0x1D7C3, 'M', '∂'), @@ -7359,6 +7375,8 @@ def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1DAB0, 'X'), (0x1DF00, 'V'), (0x1DF1F, 'X'), + (0x1DF25, 'V'), + (0x1DF2B, 'X'), (0x1E000, 'V'), (0x1E007, 'X'), (0x1E008, 'V'), @@ -7369,6 +7387,75 @@ def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1E025, 'X'), (0x1E026, 'V'), (0x1E02B, 'X'), + (0x1E030, 'M', 'а'), + (0x1E031, 'M', 'б'), + (0x1E032, 'M', 'в'), + ] + +def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E033, 'M', 'г'), + (0x1E034, 'M', 'д'), + (0x1E035, 'M', 'е'), + (0x1E036, 'M', 'ж'), + (0x1E037, 'M', 'з'), + (0x1E038, 'M', 'и'), + (0x1E039, 'M', 'к'), + (0x1E03A, 'M', 'л'), + (0x1E03B, 'M', 'м'), + (0x1E03C, 'M', 'о'), + (0x1E03D, 'M', 'п'), + (0x1E03E, 'M', 'р'), + (0x1E03F, 'M', 'с'), + (0x1E040, 'M', 'т'), + (0x1E041, 'M', 'у'), + (0x1E042, 'M', 'ф'), + (0x1E043, 'M', 'х'), + (0x1E044, 'M', 'ц'), + (0x1E045, 'M', 'ч'), + (0x1E046, 'M', 'ш'), + (0x1E047, 'M', 'ы'), + (0x1E048, 'M', 'э'), + (0x1E049, 'M', 'ю'), + (0x1E04A, 'M', 'ꚉ'), + (0x1E04B, 'M', 'ә'), + (0x1E04C, 'M', 'і'), + (0x1E04D, 'M', 'ј'), + (0x1E04E, 'M', 'ө'), + (0x1E04F, 'M', 'ү'), + (0x1E050, 'M', 'ӏ'), + (0x1E051, 'M', 'а'), + (0x1E052, 'M', 'б'), + (0x1E053, 'M', 'в'), + (0x1E054, 'M', 'г'), + (0x1E055, 'M', 'д'), + (0x1E056, 'M', 'е'), + (0x1E057, 'M', 'ж'), + (0x1E058, 'M', 'з'), + (0x1E059, 'M', 'и'), + (0x1E05A, 'M', 'к'), + (0x1E05B, 'M', 'л'), + (0x1E05C, 'M', 'о'), + (0x1E05D, 'M', 'п'), + (0x1E05E, 'M', 'с'), + (0x1E05F, 'M', 'у'), + (0x1E060, 'M', 'ф'), + (0x1E061, 'M', 'х'), + (0x1E062, 'M', 'ц'), + (0x1E063, 'M', 'ч'), + (0x1E064, 'M', 'ш'), + (0x1E065, 'M', 'ъ'), + (0x1E066, 'M', 'ы'), + (0x1E067, 'M', 'ґ'), + (0x1E068, 'M', 'і'), + (0x1E069, 'M', 'ѕ'), + (0x1E06A, 'M', 'џ'), + (0x1E06B, 'M', 'ҫ'), + (0x1E06C, 'M', 'ꙑ'), + (0x1E06D, 'M', 'ұ'), + (0x1E06E, 'X'), + (0x1E08F, 'V'), + (0x1E090, 'X'), (0x1E100, 'V'), (0x1E12D, 'X'), (0x1E130, 'V'), @@ -7383,6 +7470,8 @@ def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1E2FA, 'X'), (0x1E2FF, 'V'), (0x1E300, 'X'), + (0x1E4D0, 'V'), + (0x1E4FA, 'X'), (0x1E7E0, 'V'), (0x1E7E7, 'X'), (0x1E7E8, 'V'), @@ -7390,10 +7479,6 @@ def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1E7ED, 'V'), (0x1E7EF, 'X'), (0x1E7F0, 'V'), - ] - -def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1E7FF, 'X'), (0x1E800, 'V'), (0x1E8C5, 'X'), @@ -7409,6 +7494,10 @@ def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1E907, 'M', '𞤩'), (0x1E908, 'M', '𞤪'), (0x1E909, 'M', '𞤫'), + ] + +def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1E90A, 'M', '𞤬'), (0x1E90B, 'M', '𞤭'), (0x1E90C, 'M', '𞤮'), @@ -7494,10 +7583,6 @@ def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1EE31, 'M', 'ص'), (0x1EE32, 'M', 'ق'), (0x1EE33, 'X'), - ] - -def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1EE34, 'M', 'ش'), (0x1EE35, 'M', 'ت'), (0x1EE36, 'M', 'ث'), @@ -7513,6 +7598,10 @@ def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1EE48, 'X'), (0x1EE49, 'M', 'ي'), (0x1EE4A, 'X'), + ] + +def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1EE4B, 'M', 'ل'), (0x1EE4C, 'X'), (0x1EE4D, 'M', 'ن'), @@ -7598,10 +7687,6 @@ def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1EEA3, 'M', 'د'), (0x1EEA4, 'X'), (0x1EEA5, 'M', 'و'), - ] - -def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1EEA6, 'M', 'ز'), (0x1EEA7, 'M', 'ح'), (0x1EEA8, 'M', 'ط'), @@ -7617,6 +7702,10 @@ def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1EEB2, 'M', 'ق'), (0x1EEB3, 'M', 'ر'), (0x1EEB4, 'M', 'ش'), + ] + +def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1EEB5, 'M', 'ت'), (0x1EEB6, 'M', 'ث'), (0x1EEB7, 'M', 'خ'), @@ -7702,10 +7791,6 @@ def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1F141, 'M', 'r'), (0x1F142, 'M', 's'), (0x1F143, 'M', 't'), - ] - -def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1F144, 'M', 'u'), (0x1F145, 'M', 'v'), (0x1F146, 'M', 'w'), @@ -7721,6 +7806,10 @@ def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1F150, 'V'), (0x1F16A, 'M', 'mc'), (0x1F16B, 'M', 'md'), + ] + +def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1F16C, 'M', 'mr'), (0x1F16D, 'V'), (0x1F190, 'M', 'dj'), @@ -7793,23 +7882,19 @@ def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1F266, 'X'), (0x1F300, 'V'), (0x1F6D8, 'X'), - (0x1F6DD, 'V'), + (0x1F6DC, 'V'), (0x1F6ED, 'X'), (0x1F6F0, 'V'), (0x1F6FD, 'X'), (0x1F700, 'V'), - (0x1F774, 'X'), - (0x1F780, 'V'), - (0x1F7D9, 'X'), + (0x1F777, 'X'), + (0x1F77B, 'V'), + (0x1F7DA, 'X'), (0x1F7E0, 'V'), (0x1F7EC, 'X'), (0x1F7F0, 'V'), (0x1F7F1, 'X'), (0x1F800, 'V'), - ] - -def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x1F80C, 'X'), (0x1F810, 'V'), (0x1F848, 'X'), @@ -7825,24 +7910,24 @@ def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x1FA54, 'X'), (0x1FA60, 'V'), (0x1FA6E, 'X'), + ] + +def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x1FA70, 'V'), - (0x1FA75, 'X'), - (0x1FA78, 'V'), (0x1FA7D, 'X'), (0x1FA80, 'V'), - (0x1FA87, 'X'), + (0x1FA89, 'X'), (0x1FA90, 'V'), - (0x1FAAD, 'X'), - (0x1FAB0, 'V'), - (0x1FABB, 'X'), - (0x1FAC0, 'V'), + (0x1FABE, 'X'), + (0x1FABF, 'V'), (0x1FAC6, 'X'), - (0x1FAD0, 'V'), - (0x1FADA, 'X'), + (0x1FACE, 'V'), + (0x1FADC, 'X'), (0x1FAE0, 'V'), - (0x1FAE8, 'X'), + (0x1FAE9, 'X'), (0x1FAF0, 'V'), - (0x1FAF7, 'X'), + (0x1FAF9, 'X'), (0x1FB00, 'V'), (0x1FB93, 'X'), (0x1FB94, 'V'), @@ -7861,7 +7946,7 @@ def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x20000, 'V'), (0x2A6E0, 'X'), (0x2A700, 'V'), - (0x2B739, 'X'), + (0x2B73A, 'X'), (0x2B740, 'V'), (0x2B81E, 'X'), (0x2B820, 'V'), @@ -7910,10 +7995,6 @@ def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F827, 'M', '勤'), (0x2F828, 'M', '勺'), (0x2F829, 'M', '包'), - ] - -def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x2F82A, 'M', '匆'), (0x2F82B, 'M', '北'), (0x2F82C, 'M', '卉'), @@ -7933,6 +8014,10 @@ def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F83C, 'M', '咞'), (0x2F83D, 'M', '吸'), (0x2F83E, 'M', '呈'), + ] + +def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x2F83F, 'M', '周'), (0x2F840, 'M', '咢'), (0x2F841, 'M', '哶'), @@ -8014,10 +8099,6 @@ def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F88F, 'M', '𪎒'), (0x2F890, 'M', '廾'), (0x2F891, 'M', '𢌱'), - ] - -def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x2F893, 'M', '舁'), (0x2F894, 'M', '弢'), (0x2F896, 'M', '㣇'), @@ -8037,6 +8118,10 @@ def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F8A4, 'M', '𢛔'), (0x2F8A5, 'M', '惇'), (0x2F8A6, 'M', '慈'), + ] + +def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x2F8A7, 'M', '慌'), (0x2F8A8, 'M', '慎'), (0x2F8A9, 'M', '慌'), @@ -8118,10 +8203,6 @@ def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F8F5, 'M', '殺'), (0x2F8F6, 'M', '殻'), (0x2F8F7, 'M', '𣪍'), - ] - -def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x2F8F8, 'M', '𡴋'), (0x2F8F9, 'M', '𣫺'), (0x2F8FA, 'M', '汎'), @@ -8141,6 +8222,10 @@ def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F908, 'M', '港'), (0x2F909, 'M', '湮'), (0x2F90A, 'M', '㴳'), + ] + +def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x2F90B, 'M', '滋'), (0x2F90C, 'M', '滇'), (0x2F90D, 'M', '𣻑'), @@ -8222,10 +8307,6 @@ def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F95B, 'M', '穏'), (0x2F95C, 'M', '𥥼'), (0x2F95D, 'M', '𥪧'), - ] - -def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x2F95F, 'X'), (0x2F960, 'M', '䈂'), (0x2F961, 'M', '𥮫'), @@ -8245,6 +8326,10 @@ def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F96F, 'M', '縂'), (0x2F970, 'M', '繅'), (0x2F971, 'M', '䌴'), + ] + +def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x2F972, 'M', '𦈨'), (0x2F973, 'M', '𦉇'), (0x2F974, 'M', '䍙'), @@ -8326,10 +8411,6 @@ def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F9C0, 'M', '蟡'), (0x2F9C1, 'M', '蠁'), (0x2F9C2, 'M', '䗹'), - ] - -def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: - return [ (0x2F9C3, 'M', '衠'), (0x2F9C4, 'M', '衣'), (0x2F9C5, 'M', '𧙧'), @@ -8349,6 +8430,10 @@ def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2F9D3, 'M', '𧲨'), (0x2F9D4, 'M', '貫'), (0x2F9D5, 'M', '賁'), + ] + +def _seg_81() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ (0x2F9D6, 'M', '贛'), (0x2F9D7, 'M', '起'), (0x2F9D8, 'M', '𧼯'), @@ -8423,6 +8508,8 @@ def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: (0x2FA1E, 'X'), (0x30000, 'V'), (0x3134B, 'X'), + (0x31350, 'V'), + (0x323B0, 'X'), (0xE0100, 'I'), (0xE01F0, 'X'), ] @@ -8509,4 +8596,5 @@ uts46data = tuple( + _seg_78() + _seg_79() + _seg_80() + + _seg_81() ) # type: Tuple[Union[Tuple[int, str], Tuple[int, str, str]], ...] diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index 53a5b42a..7ac8e297 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - # __ # /__) _ _ _ _ _/ _ # / ( (- (/ (/ (- _) / _) @@ -40,8 +38,10 @@ is at . :license: Apache 2.0, see LICENSE for more details. """ -import urllib3 import warnings + +import urllib3 + from .exceptions import RequestsDependencyWarning try: @@ -54,13 +54,14 @@ try: except ImportError: chardet_version = None + def check_compatibility(urllib3_version, chardet_version, charset_normalizer_version): - urllib3_version = urllib3_version.split('.') - assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git. + urllib3_version = urllib3_version.split(".") + assert urllib3_version != ["dev"] # Verify urllib3 isn't installed from git. # Sometimes, urllib3 only reports its version as 16.1. if len(urllib3_version) == 2: - urllib3_version.append('0') + urllib3_version.append("0") # Check urllib3 for compatibility. major, minor, patch = urllib3_version # noqa: F811 @@ -72,36 +73,46 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver # Check charset_normalizer for compatibility. if chardet_version: - major, minor, patch = chardet_version.split('.')[:3] + major, minor, patch = chardet_version.split(".")[:3] major, minor, patch = int(major), int(minor), int(patch) - # chardet_version >= 3.0.2, < 5.0.0 - assert (3, 0, 2) <= (major, minor, patch) < (5, 0, 0) + # chardet_version >= 3.0.2, < 6.0.0 + assert (3, 0, 2) <= (major, minor, patch) < (6, 0, 0) elif charset_normalizer_version: - major, minor, patch = charset_normalizer_version.split('.')[:3] + major, minor, patch = charset_normalizer_version.split(".")[:3] major, minor, patch = int(major), int(minor), int(patch) # charset_normalizer >= 2.0.0 < 3.0.0 assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0) else: raise Exception("You need either charset_normalizer or chardet installed") + def _check_cryptography(cryptography_version): # cryptography < 1.3.4 try: - cryptography_version = list(map(int, cryptography_version.split('.'))) + cryptography_version = list(map(int, cryptography_version.split("."))) except ValueError: return if cryptography_version < [1, 3, 4]: - warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version) + warning = "Old version of cryptography ({}) may cause slowdown.".format( + cryptography_version + ) warnings.warn(warning, RequestsDependencyWarning) + # Check imported dependencies for compatibility. try: - check_compatibility(urllib3.__version__, chardet_version, charset_normalizer_version) + check_compatibility( + urllib3.__version__, chardet_version, charset_normalizer_version + ) except (AssertionError, ValueError): - warnings.warn("urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported " - "version!".format(urllib3.__version__, chardet_version, charset_normalizer_version), - RequestsDependencyWarning) + warnings.warn( + "urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported " + "version!".format( + urllib3.__version__, chardet_version, charset_normalizer_version + ), + RequestsDependencyWarning, + ) # Attempt to enable urllib3's fallback for SNI support # if the standard library doesn't support SNI or the @@ -114,39 +125,56 @@ try: if not getattr(ssl, "HAS_SNI", False): from urllib3.contrib import pyopenssl + pyopenssl.inject_into_urllib3() # Check cryptography version from cryptography import __version__ as cryptography_version + _check_cryptography(cryptography_version) except ImportError: pass # urllib3's DependencyWarnings should be silenced. from urllib3.exceptions import DependencyWarning -warnings.simplefilter('ignore', DependencyWarning) -from .__version__ import __title__, __description__, __url__, __version__ -from .__version__ import __build__, __author__, __author_email__, __license__ -from .__version__ import __copyright__, __cake__ - -from . import utils -from . import packages -from .models import Request, Response, PreparedRequest -from .api import request, get, head, post, patch, put, delete, options -from .sessions import session, Session -from .status_codes import codes -from .exceptions import ( - RequestException, Timeout, URLRequired, - TooManyRedirects, HTTPError, ConnectionError, - FileModeWarning, ConnectTimeout, ReadTimeout, JSONDecodeError -) +warnings.simplefilter("ignore", DependencyWarning) # Set default logging handler to avoid "No handler found" warnings. import logging from logging import NullHandler +from . import packages, utils +from .__version__ import ( + __author__, + __author_email__, + __build__, + __cake__, + __copyright__, + __description__, + __license__, + __title__, + __url__, + __version__, +) +from .api import delete, get, head, options, patch, post, put, request +from .exceptions import ( + ConnectionError, + ConnectTimeout, + FileModeWarning, + HTTPError, + JSONDecodeError, + ReadTimeout, + RequestException, + Timeout, + TooManyRedirects, + URLRequired, +) +from .models import PreparedRequest, Request, Response +from .sessions import Session, session +from .status_codes import codes + logging.getLogger(__name__).addHandler(NullHandler()) # FileModeWarnings go off per the default. -warnings.simplefilter('default', FileModeWarning, append=True) +warnings.simplefilter("default", FileModeWarning, append=True) diff --git a/lib/requests/__version__.py b/lib/requests/__version__.py index e973b03b..e725ada6 100644 --- a/lib/requests/__version__.py +++ b/lib/requests/__version__.py @@ -2,13 +2,13 @@ # |( |- |.| | | |- `-. | `-. # ' ' `-' `-`.`-' `-' `-' ' `-' -__title__ = 'requests' -__description__ = 'Python HTTP for Humans.' -__url__ = 'https://requests.readthedocs.io' -__version__ = '2.27.1' -__build__ = 0x022701 -__author__ = 'Kenneth Reitz' -__author_email__ = 'me@kennethreitz.org' -__license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2022 Kenneth Reitz' -__cake__ = u'\u2728 \U0001f370 \u2728' +__title__ = "requests" +__description__ = "Python HTTP for Humans." +__url__ = "https://requests.readthedocs.io" +__version__ = "2.28.1" +__build__ = 0x022801 +__author__ = "Kenneth Reitz" +__author_email__ = "me@kennethreitz.org" +__license__ = "Apache 2.0" +__copyright__ = "Copyright 2022 Kenneth Reitz" +__cake__ = "\u2728 \U0001f370 \u2728" diff --git a/lib/requests/_internal_utils.py b/lib/requests/_internal_utils.py index 759d9a56..7dc9bc53 100644 --- a/lib/requests/_internal_utils.py +++ b/lib/requests/_internal_utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests._internal_utils ~~~~~~~~~~~~~~ @@ -7,11 +5,22 @@ requests._internal_utils Provides utility functions that are consumed internally by Requests which depend on extremely few external helpers (such as compat) """ +import re -from .compat import is_py2, builtin_str, str +from .compat import builtin_str + +_VALID_HEADER_NAME_RE_BYTE = re.compile(rb"^[^:\s][^:\r\n]*$") +_VALID_HEADER_NAME_RE_STR = re.compile(r"^[^:\s][^:\r\n]*$") +_VALID_HEADER_VALUE_RE_BYTE = re.compile(rb"^\S[^\r\n]*$|^$") +_VALID_HEADER_VALUE_RE_STR = re.compile(r"^\S[^\r\n]*$|^$") + +HEADER_VALIDATORS = { + bytes: (_VALID_HEADER_NAME_RE_BYTE, _VALID_HEADER_VALUE_RE_BYTE), + str: (_VALID_HEADER_NAME_RE_STR, _VALID_HEADER_VALUE_RE_STR), +} -def to_native_string(string, encoding='ascii'): +def to_native_string(string, encoding="ascii"): """Given a string object, regardless of type, returns a representation of that string in the native string type, encoding and decoding where necessary. This assumes ASCII unless told otherwise. @@ -19,10 +28,7 @@ def to_native_string(string, encoding='ascii'): if isinstance(string, builtin_str): out = string else: - if is_py2: - out = string.encode(encoding) - else: - out = string.decode(encoding) + out = string.decode(encoding) return out @@ -36,7 +42,7 @@ def unicode_is_ascii(u_string): """ assert isinstance(u_string, str) try: - u_string.encode('ascii') + u_string.encode("ascii") return True except UnicodeEncodeError: return False diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index fe22ff45..d3b2d5bb 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.adapters ~~~~~~~~~~~~~~~~~ @@ -9,58 +7,76 @@ and maintain connections. """ import os.path -import socket +import socket # noqa: F401 -from urllib3.poolmanager import PoolManager, proxy_from_url -from urllib3.response import HTTPResponse -from urllib3.util import parse_url -from urllib3.util import Timeout as TimeoutSauce -from urllib3.util.retry import Retry -from urllib3.exceptions import ClosedPoolError -from urllib3.exceptions import ConnectTimeoutError +from urllib3.exceptions import ClosedPoolError, ConnectTimeoutError from urllib3.exceptions import HTTPError as _HTTPError from urllib3.exceptions import InvalidHeader as _InvalidHeader -from urllib3.exceptions import MaxRetryError -from urllib3.exceptions import NewConnectionError +from urllib3.exceptions import ( + LocationValueError, + MaxRetryError, + NewConnectionError, + ProtocolError, +) from urllib3.exceptions import ProxyError as _ProxyError -from urllib3.exceptions import ProtocolError -from urllib3.exceptions import ReadTimeoutError +from urllib3.exceptions import ReadTimeoutError, ResponseError from urllib3.exceptions import SSLError as _SSLError -from urllib3.exceptions import ResponseError -from urllib3.exceptions import LocationValueError +from urllib3.poolmanager import PoolManager, proxy_from_url +from urllib3.response import HTTPResponse +from urllib3.util import Timeout as TimeoutSauce +from urllib3.util import parse_url +from urllib3.util.retry import Retry -from .models import Response -from .compat import urlparse, basestring -from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths, - get_encoding_from_headers, prepend_scheme_if_needed, - get_auth_from_url, urldefragauth, select_proxy) -from .structures import CaseInsensitiveDict -from .cookies import extract_cookies_to_jar -from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError, RetryError, InvalidSchema, InvalidProxyURL, - InvalidURL, InvalidHeader) from .auth import _basic_auth_str +from .compat import basestring, urlparse +from .cookies import extract_cookies_to_jar +from .exceptions import ( + ConnectionError, + ConnectTimeout, + InvalidHeader, + InvalidProxyURL, + InvalidSchema, + InvalidURL, + ProxyError, + ReadTimeout, + RetryError, + SSLError, +) +from .models import Response +from .structures import CaseInsensitiveDict +from .utils import ( + DEFAULT_CA_BUNDLE_PATH, + extract_zipped_paths, + get_auth_from_url, + get_encoding_from_headers, + prepend_scheme_if_needed, + select_proxy, + urldefragauth, +) try: from urllib3.contrib.socks import SOCKSProxyManager except ImportError: + def SOCKSProxyManager(*args, **kwargs): raise InvalidSchema("Missing dependencies for SOCKS support.") + DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 DEFAULT_RETRIES = 0 DEFAULT_POOL_TIMEOUT = None -class BaseAdapter(object): +class BaseAdapter: """The Base Transport Adapter""" def __init__(self): - super(BaseAdapter, self).__init__() + super().__init__() - def send(self, request, stream=False, timeout=None, verify=True, - cert=None, proxies=None): + def send( + self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None + ): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest ` being sent. @@ -108,12 +124,22 @@ class HTTPAdapter(BaseAdapter): >>> a = requests.adapters.HTTPAdapter(max_retries=3) >>> s.mount('http://', a) """ - __attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize', - '_pool_block'] - def __init__(self, pool_connections=DEFAULT_POOLSIZE, - pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, - pool_block=DEFAULT_POOLBLOCK): + __attrs__ = [ + "max_retries", + "config", + "_pool_connections", + "_pool_maxsize", + "_pool_block", + ] + + def __init__( + self, + pool_connections=DEFAULT_POOLSIZE, + pool_maxsize=DEFAULT_POOLSIZE, + max_retries=DEFAULT_RETRIES, + pool_block=DEFAULT_POOLBLOCK, + ): if max_retries == DEFAULT_RETRIES: self.max_retries = Retry(0, read=False) else: @@ -121,7 +147,7 @@ class HTTPAdapter(BaseAdapter): self.config = {} self.proxy_manager = {} - super(HTTPAdapter, self).__init__() + super().__init__() self._pool_connections = pool_connections self._pool_maxsize = pool_maxsize @@ -141,10 +167,13 @@ class HTTPAdapter(BaseAdapter): for attr, value in state.items(): setattr(self, attr, value) - self.init_poolmanager(self._pool_connections, self._pool_maxsize, - block=self._pool_block) + self.init_poolmanager( + self._pool_connections, self._pool_maxsize, block=self._pool_block + ) - def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs): + def init_poolmanager( + self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs + ): """Initializes a urllib3 PoolManager. This method should not be called from user code, and is only @@ -161,8 +190,13 @@ class HTTPAdapter(BaseAdapter): self._pool_maxsize = maxsize self._pool_block = block - self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, - block=block, strict=True, **pool_kwargs) + self.poolmanager = PoolManager( + num_pools=connections, + maxsize=maxsize, + block=block, + strict=True, + **pool_kwargs, + ) def proxy_manager_for(self, proxy, **proxy_kwargs): """Return urllib3 ProxyManager for the given proxy. @@ -178,7 +212,7 @@ class HTTPAdapter(BaseAdapter): """ if proxy in self.proxy_manager: manager = self.proxy_manager[proxy] - elif proxy.lower().startswith('socks'): + elif proxy.lower().startswith("socks"): username, password = get_auth_from_url(proxy) manager = self.proxy_manager[proxy] = SOCKSProxyManager( proxy, @@ -187,7 +221,7 @@ class HTTPAdapter(BaseAdapter): num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, - **proxy_kwargs + **proxy_kwargs, ) else: proxy_headers = self.proxy_headers(proxy) @@ -197,7 +231,8 @@ class HTTPAdapter(BaseAdapter): num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, - **proxy_kwargs) + **proxy_kwargs, + ) return manager @@ -213,7 +248,7 @@ class HTTPAdapter(BaseAdapter): to a CA bundle to use :param cert: The SSL certificate to verify. """ - if url.lower().startswith('https') and verify: + if url.lower().startswith("https") and verify: cert_loc = None @@ -225,17 +260,19 @@ class HTTPAdapter(BaseAdapter): cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH) if not cert_loc or not os.path.exists(cert_loc): - raise IOError("Could not find a suitable TLS CA certificate bundle, " - "invalid path: {}".format(cert_loc)) + raise OSError( + f"Could not find a suitable TLS CA certificate bundle, " + f"invalid path: {cert_loc}" + ) - conn.cert_reqs = 'CERT_REQUIRED' + conn.cert_reqs = "CERT_REQUIRED" if not os.path.isdir(cert_loc): conn.ca_certs = cert_loc else: conn.ca_cert_dir = cert_loc else: - conn.cert_reqs = 'CERT_NONE' + conn.cert_reqs = "CERT_NONE" conn.ca_certs = None conn.ca_cert_dir = None @@ -247,11 +284,14 @@ class HTTPAdapter(BaseAdapter): conn.cert_file = cert conn.key_file = None if conn.cert_file and not os.path.exists(conn.cert_file): - raise IOError("Could not find the TLS certificate file, " - "invalid path: {}".format(conn.cert_file)) + raise OSError( + f"Could not find the TLS certificate file, " + f"invalid path: {conn.cert_file}" + ) if conn.key_file and not os.path.exists(conn.key_file): - raise IOError("Could not find the TLS key file, " - "invalid path: {}".format(conn.key_file)) + raise OSError( + f"Could not find the TLS key file, invalid path: {conn.key_file}" + ) def build_response(self, req, resp): """Builds a :class:`Response ` object from a urllib3 @@ -266,10 +306,10 @@ class HTTPAdapter(BaseAdapter): response = Response() # Fallback to None if there's no status_code, for whatever reason. - response.status_code = getattr(resp, 'status', None) + response.status_code = getattr(resp, "status", None) # Make headers case-insensitive. - response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) + response.headers = CaseInsensitiveDict(getattr(resp, "headers", {})) # Set encoding. response.encoding = get_encoding_from_headers(response.headers) @@ -277,7 +317,7 @@ class HTTPAdapter(BaseAdapter): response.reason = response.raw.reason if isinstance(req.url, bytes): - response.url = req.url.decode('utf-8') + response.url = req.url.decode("utf-8") else: response.url = req.url @@ -302,11 +342,13 @@ class HTTPAdapter(BaseAdapter): proxy = select_proxy(url, proxies) if proxy: - proxy = prepend_scheme_if_needed(proxy, 'http') + proxy = prepend_scheme_if_needed(proxy, "http") proxy_url = parse_url(proxy) if not proxy_url.host: - raise InvalidProxyURL("Please check proxy URL. It is malformed" - " and could be missing the host.") + raise InvalidProxyURL( + "Please check proxy URL. It is malformed " + "and could be missing the host." + ) proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: @@ -344,11 +386,11 @@ class HTTPAdapter(BaseAdapter): proxy = select_proxy(request.url, proxies) scheme = urlparse(request.url).scheme - is_proxied_http_request = (proxy and scheme != 'https') + is_proxied_http_request = proxy and scheme != "https" using_socks_proxy = False if proxy: proxy_scheme = urlparse(proxy).scheme.lower() - using_socks_proxy = proxy_scheme.startswith('socks') + using_socks_proxy = proxy_scheme.startswith("socks") url = request.path_url if is_proxied_http_request and not using_socks_proxy: @@ -387,12 +429,13 @@ class HTTPAdapter(BaseAdapter): username, password = get_auth_from_url(proxy) if username: - headers['Proxy-Authorization'] = _basic_auth_str(username, - password) + headers["Proxy-Authorization"] = _basic_auth_str(username, password) return headers - def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): + def send( + self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None + ): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest ` being sent. @@ -416,20 +459,26 @@ class HTTPAdapter(BaseAdapter): self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) + self.add_headers( + request, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies, + ) - chunked = not (request.body is None or 'Content-Length' in request.headers) + chunked = not (request.body is None or "Content-Length" in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) - except ValueError as e: - # this may raise a string formatting error. - err = ("Invalid timeout {}. Pass a (connect, read) " - "timeout tuple, or a single float to set " - "both timeouts to the same value".format(timeout)) - raise ValueError(err) + except ValueError: + raise ValueError( + f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, " + f"or a single float to set both timeouts to the same value." + ) elif isinstance(timeout, TimeoutSauce): pass else: @@ -447,22 +496,24 @@ class HTTPAdapter(BaseAdapter): preload_content=False, decode_content=False, retries=self.max_retries, - timeout=timeout + timeout=timeout, ) # Send the request. else: - if hasattr(conn, 'proxy_pool'): + if hasattr(conn, "proxy_pool"): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: - skip_host = 'Host' in request.headers - low_conn.putrequest(request.method, - url, - skip_accept_encoding=True, - skip_host=skip_host) + skip_host = "Host" in request.headers + low_conn.putrequest( + request.method, + url, + skip_accept_encoding=True, + skip_host=skip_host, + ) for header, value in request.headers.items(): low_conn.putheader(header, value) @@ -470,34 +521,29 @@ class HTTPAdapter(BaseAdapter): low_conn.endheaders() for i in request.body: - low_conn.send(hex(len(i))[2:].encode('utf-8')) - low_conn.send(b'\r\n') + low_conn.send(hex(len(i))[2:].encode("utf-8")) + low_conn.send(b"\r\n") low_conn.send(i) - low_conn.send(b'\r\n') - low_conn.send(b'0\r\n\r\n') + low_conn.send(b"\r\n") + low_conn.send(b"0\r\n\r\n") # Receive the response from the server - try: - # For Python 2.7, use buffering of HTTP responses - r = low_conn.getresponse(buffering=True) - except TypeError: - # For compatibility with Python 3.3+ - r = low_conn.getresponse() + r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, - decode_content=False + decode_content=False, ) - except: + except Exception: # If we hit any problems here, clean up the connection. - # Then, reraise so that we can handle the actual exception. + # Then, raise so that we can handle the actual exception. low_conn.close() raise - except (ProtocolError, socket.error) as err: + except (ProtocolError, OSError) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: diff --git a/lib/requests/api.py b/lib/requests/api.py index 4cba90ee..2f71aaed 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.api ~~~~~~~~~~~~ @@ -72,7 +70,7 @@ def get(url, params=None, **kwargs): :rtype: requests.Response """ - return request('get', url, params=params, **kwargs) + return request("get", url, params=params, **kwargs) def options(url, **kwargs): @@ -84,7 +82,7 @@ def options(url, **kwargs): :rtype: requests.Response """ - return request('options', url, **kwargs) + return request("options", url, **kwargs) def head(url, **kwargs): @@ -98,8 +96,8 @@ def head(url, **kwargs): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', False) - return request('head', url, **kwargs) + kwargs.setdefault("allow_redirects", False) + return request("head", url, **kwargs) def post(url, data=None, json=None, **kwargs): @@ -114,7 +112,7 @@ def post(url, data=None, json=None, **kwargs): :rtype: requests.Response """ - return request('post', url, data=data, json=json, **kwargs) + return request("post", url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): @@ -129,7 +127,7 @@ def put(url, data=None, **kwargs): :rtype: requests.Response """ - return request('put', url, data=data, **kwargs) + return request("put", url, data=data, **kwargs) def patch(url, data=None, **kwargs): @@ -144,7 +142,7 @@ def patch(url, data=None, **kwargs): :rtype: requests.Response """ - return request('patch', url, data=data, **kwargs) + return request("patch", url, data=data, **kwargs) def delete(url, **kwargs): @@ -156,4 +154,4 @@ def delete(url, **kwargs): :rtype: requests.Response """ - return request('delete', url, **kwargs) + return request("delete", url, **kwargs) diff --git a/lib/requests/auth.py b/lib/requests/auth.py index eeface39..9733686d 100644 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.auth ~~~~~~~~~~~~~ @@ -7,22 +5,21 @@ requests.auth This module contains the authentication handlers for Requests. """ +import hashlib import os import re -import time -import hashlib import threading +import time import warnings - from base64 import b64encode -from .compat import urlparse, str, basestring -from .cookies import extract_cookies_to_jar from ._internal_utils import to_native_string +from .compat import basestring, str, urlparse +from .cookies import extract_cookies_to_jar from .utils import parse_dict_header -CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' -CONTENT_TYPE_MULTI_PART = 'multipart/form-data' +CONTENT_TYPE_FORM_URLENCODED = "application/x-www-form-urlencoded" +CONTENT_TYPE_MULTI_PART = "multipart/form-data" def _basic_auth_str(username, password): @@ -57,23 +54,23 @@ def _basic_auth_str(username, password): # -- End Removal -- if isinstance(username, str): - username = username.encode('latin1') + username = username.encode("latin1") if isinstance(password, str): - password = password.encode('latin1') + password = password.encode("latin1") - authstr = 'Basic ' + to_native_string( - b64encode(b':'.join((username, password))).strip() + authstr = "Basic " + to_native_string( + b64encode(b":".join((username, password))).strip() ) return authstr -class AuthBase(object): +class AuthBase: """Base class that all auth implementations derive from""" def __call__(self, r): - raise NotImplementedError('Auth hooks must be callable.') + raise NotImplementedError("Auth hooks must be callable.") class HTTPBasicAuth(AuthBase): @@ -84,16 +81,18 @@ class HTTPBasicAuth(AuthBase): self.password = password def __eq__(self, other): - return all([ - self.username == getattr(other, 'username', None), - self.password == getattr(other, 'password', None) - ]) + return all( + [ + self.username == getattr(other, "username", None), + self.password == getattr(other, "password", None), + ] + ) def __ne__(self, other): return not self == other def __call__(self, r): - r.headers['Authorization'] = _basic_auth_str(self.username, self.password) + r.headers["Authorization"] = _basic_auth_str(self.username, self.password) return r @@ -101,7 +100,7 @@ class HTTPProxyAuth(HTTPBasicAuth): """Attaches HTTP Proxy Authentication to a given Request object.""" def __call__(self, r): - r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) + r.headers["Proxy-Authorization"] = _basic_auth_str(self.username, self.password) return r @@ -116,9 +115,9 @@ class HTTPDigestAuth(AuthBase): def init_per_thread_state(self): # Ensure state is initialized just once per-thread - if not hasattr(self._thread_local, 'init'): + if not hasattr(self._thread_local, "init"): self._thread_local.init = True - self._thread_local.last_nonce = '' + self._thread_local.last_nonce = "" self._thread_local.nonce_count = 0 self._thread_local.chal = {} self._thread_local.pos = None @@ -129,44 +128,52 @@ class HTTPDigestAuth(AuthBase): :rtype: str """ - realm = self._thread_local.chal['realm'] - nonce = self._thread_local.chal['nonce'] - qop = self._thread_local.chal.get('qop') - algorithm = self._thread_local.chal.get('algorithm') - opaque = self._thread_local.chal.get('opaque') + realm = self._thread_local.chal["realm"] + nonce = self._thread_local.chal["nonce"] + qop = self._thread_local.chal.get("qop") + algorithm = self._thread_local.chal.get("algorithm") + opaque = self._thread_local.chal.get("opaque") hash_utf8 = None if algorithm is None: - _algorithm = 'MD5' + _algorithm = "MD5" else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level - if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': + if _algorithm == "MD5" or _algorithm == "MD5-SESS": + def md5_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.md5(x).hexdigest() + hash_utf8 = md5_utf8 - elif _algorithm == 'SHA': + elif _algorithm == "SHA": + def sha_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.sha1(x).hexdigest() + hash_utf8 = sha_utf8 - elif _algorithm == 'SHA-256': + elif _algorithm == "SHA-256": + def sha256_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.sha256(x).hexdigest() + hash_utf8 = sha256_utf8 - elif _algorithm == 'SHA-512': + elif _algorithm == "SHA-512": + def sha512_utf8(x): if isinstance(x, str): - x = x.encode('utf-8') + x = x.encode("utf-8") return hashlib.sha512(x).hexdigest() + hash_utf8 = sha512_utf8 - KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) + KD = lambda s, d: hash_utf8(f"{s}:{d}") # noqa:E731 if hash_utf8 is None: return None @@ -177,10 +184,10 @@ class HTTPDigestAuth(AuthBase): #: path is request-uri defined in RFC 2616 which should not be empty path = p_parsed.path or "/" if p_parsed.query: - path += '?' + p_parsed.query + path += f"?{p_parsed.query}" - A1 = '%s:%s:%s' % (self.username, realm, self.password) - A2 = '%s:%s' % (method, path) + A1 = f"{self.username}:{realm}:{self.password}" + A2 = f"{method}:{path}" HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) @@ -189,22 +196,20 @@ class HTTPDigestAuth(AuthBase): self._thread_local.nonce_count += 1 else: self._thread_local.nonce_count = 1 - ncvalue = '%08x' % self._thread_local.nonce_count - s = str(self._thread_local.nonce_count).encode('utf-8') - s += nonce.encode('utf-8') - s += time.ctime().encode('utf-8') + ncvalue = f"{self._thread_local.nonce_count:08x}" + s = str(self._thread_local.nonce_count).encode("utf-8") + s += nonce.encode("utf-8") + s += time.ctime().encode("utf-8") s += os.urandom(8) - cnonce = (hashlib.sha1(s).hexdigest()[:16]) - if _algorithm == 'MD5-SESS': - HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) + cnonce = hashlib.sha1(s).hexdigest()[:16] + if _algorithm == "MD5-SESS": + HA1 = hash_utf8(f"{HA1}:{nonce}:{cnonce}") if not qop: - respdig = KD(HA1, "%s:%s" % (nonce, HA2)) - elif qop == 'auth' or 'auth' in qop.split(','): - noncebit = "%s:%s:%s:%s:%s" % ( - nonce, ncvalue, cnonce, 'auth', HA2 - ) + respdig = KD(HA1, f"{nonce}:{HA2}") + elif qop == "auth" or "auth" in qop.split(","): + noncebit = f"{nonce}:{ncvalue}:{cnonce}:auth:{HA2}" respdig = KD(HA1, noncebit) else: # XXX handle auth-int. @@ -213,18 +218,20 @@ class HTTPDigestAuth(AuthBase): self._thread_local.last_nonce = nonce # XXX should the partial digests be encoded too? - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (self.username, realm, nonce, path, respdig) + base = ( + f'username="{self.username}", realm="{realm}", nonce="{nonce}", ' + f'uri="{path}", response="{respdig}"' + ) if opaque: - base += ', opaque="%s"' % opaque + base += f', opaque="{opaque}"' if algorithm: - base += ', algorithm="%s"' % algorithm + base += f', algorithm="{algorithm}"' if entdig: - base += ', digest="%s"' % entdig + base += f', digest="{entdig}"' if qop: - base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) + base += f', qop="auth", nc={ncvalue}, cnonce="{cnonce}"' - return 'Digest %s' % (base) + return f"Digest {base}" def handle_redirect(self, r, **kwargs): """Reset num_401_calls counter on redirects.""" @@ -248,13 +255,13 @@ class HTTPDigestAuth(AuthBase): # Rewind the file position indicator of the body to where # it was to resend the request. r.request.body.seek(self._thread_local.pos) - s_auth = r.headers.get('www-authenticate', '') + s_auth = r.headers.get("www-authenticate", "") - if 'digest' in s_auth.lower() and self._thread_local.num_401_calls < 2: + if "digest" in s_auth.lower() and self._thread_local.num_401_calls < 2: self._thread_local.num_401_calls += 1 - pat = re.compile(r'digest ', flags=re.IGNORECASE) - self._thread_local.chal = parse_dict_header(pat.sub('', s_auth, count=1)) + pat = re.compile(r"digest ", flags=re.IGNORECASE) + self._thread_local.chal = parse_dict_header(pat.sub("", s_auth, count=1)) # Consume content and release the original connection # to allow our new request to reuse the same one. @@ -264,8 +271,9 @@ class HTTPDigestAuth(AuthBase): extract_cookies_to_jar(prep._cookies, r.request, r.raw) prep.prepare_cookies(prep._cookies) - prep.headers['Authorization'] = self.build_digest_header( - prep.method, prep.url) + prep.headers["Authorization"] = self.build_digest_header( + prep.method, prep.url + ) _r = r.connection.send(prep, **kwargs) _r.history.append(r) _r.request = prep @@ -280,7 +288,7 @@ class HTTPDigestAuth(AuthBase): self.init_per_thread_state() # If we have a saved nonce, skip the 401 if self._thread_local.last_nonce: - r.headers['Authorization'] = self.build_digest_header(r.method, r.url) + r.headers["Authorization"] = self.build_digest_header(r.method, r.url) try: self._thread_local.pos = r.body.tell() except AttributeError: @@ -289,17 +297,19 @@ class HTTPDigestAuth(AuthBase): # file position of the previous body. Ensure it's set to # None. self._thread_local.pos = None - r.register_hook('response', self.handle_401) - r.register_hook('response', self.handle_redirect) + r.register_hook("response", self.handle_401) + r.register_hook("response", self.handle_redirect) self._thread_local.num_401_calls = 1 return r def __eq__(self, other): - return all([ - self.username == getattr(other, 'username', None), - self.password == getattr(other, 'password', None) - ]) + return all( + [ + self.username == getattr(other, "username", None), + self.password == getattr(other, "password", None), + ] + ) def __ne__(self, other): return not self == other diff --git a/lib/requests/certs.py b/lib/requests/certs.py index d1a378d7..be422c3e 100644 --- a/lib/requests/certs.py +++ b/lib/requests/certs.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ requests.certs @@ -14,5 +13,5 @@ packaged CA bundle. """ from certifi import where -if __name__ == '__main__': +if __name__ == "__main__": print(where()) diff --git a/lib/requests/compat.py b/lib/requests/compat.py index 029ae62a..6776163c 100644 --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -1,11 +1,10 @@ -# -*- coding: utf-8 -*- - """ requests.compat ~~~~~~~~~~~~~~~ -This module handles import compatibility issues between Python 2 and -Python 3. +This module previously handled import compatibility issues +between Python 2 and Python 3. It remains for backwards +compatibility until the next major version. """ try: @@ -23,59 +22,58 @@ import sys _ver = sys.version_info #: Python 2.x? -is_py2 = (_ver[0] == 2) +is_py2 = _ver[0] == 2 #: Python 3.x? -is_py3 = (_ver[0] == 3) +is_py3 = _ver[0] == 3 +# json/simplejson module import resolution has_simplejson = False try: import simplejson as json + has_simplejson = True except ImportError: import json -# --------- -# Specifics -# --------- +if has_simplejson: + from simplejson import JSONDecodeError +else: + from json import JSONDecodeError -if is_py2: - from urllib import ( - quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, - proxy_bypass, proxy_bypass_environment, getproxies_environment) - from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag - from urllib2 import parse_http_list - import cookielib - from Cookie import Morsel - from StringIO import StringIO - # Keep OrderedDict for backwards compatibility. - from collections import Callable, Mapping, MutableMapping, OrderedDict +# Keep OrderedDict for backwards compatibility. +from collections import OrderedDict +from collections.abc import Callable, Mapping, MutableMapping +from http import cookiejar as cookielib +from http.cookies import Morsel +from io import StringIO - builtin_str = str - bytes = str - str = unicode - basestring = basestring - numeric_types = (int, long, float) - integer_types = (int, long) - JSONDecodeError = ValueError +# -------------- +# Legacy Imports +# -------------- +from urllib.parse import ( + quote, + quote_plus, + unquote, + unquote_plus, + urldefrag, + urlencode, + urljoin, + urlparse, + urlsplit, + urlunparse, +) +from urllib.request import ( + getproxies, + getproxies_environment, + parse_http_list, + proxy_bypass, + proxy_bypass_environment, +) -elif is_py3: - from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag - from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment - from http import cookiejar as cookielib - from http.cookies import Morsel - from io import StringIO - # Keep OrderedDict for backwards compatibility. - from collections import OrderedDict - from collections.abc import Callable, Mapping, MutableMapping - if has_simplejson: - from simplejson import JSONDecodeError - else: - from json import JSONDecodeError - - builtin_str = str - str = str - bytes = bytes - basestring = (str, bytes) - numeric_types = (int, float) - integer_types = (int,) +builtin_str = str +str = str +bytes = bytes +basestring = (str, bytes) +numeric_types = (int, float) +integer_types = (int,) diff --git a/lib/requests/cookies.py b/lib/requests/cookies.py index 56fccd9c..bf54ab23 100644 --- a/lib/requests/cookies.py +++ b/lib/requests/cookies.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.cookies ~~~~~~~~~~~~~~~~ @@ -9,12 +7,12 @@ Compatibility code to be able to use `cookielib.CookieJar` with requests. requests.utils imports from here, so be careful with imports. """ +import calendar import copy import time -import calendar from ._internal_utils import to_native_string -from .compat import cookielib, urlparse, urlunparse, Morsel, MutableMapping +from .compat import Morsel, MutableMapping, cookielib, urlparse, urlunparse try: import threading @@ -22,7 +20,7 @@ except ImportError: import dummy_threading as threading -class MockRequest(object): +class MockRequest: """Wraps a `requests.Request` to mimic a `urllib2.Request`. The code in `cookielib.CookieJar` expects this interface in order to correctly @@ -51,16 +49,22 @@ class MockRequest(object): def get_full_url(self): # Only return the response's URL if the user hadn't set the Host # header - if not self._r.headers.get('Host'): + if not self._r.headers.get("Host"): return self._r.url # If they did set it, retrieve it and reconstruct the expected domain - host = to_native_string(self._r.headers['Host'], encoding='utf-8') + host = to_native_string(self._r.headers["Host"], encoding="utf-8") parsed = urlparse(self._r.url) # Reconstruct the URL as we expect it - return urlunparse([ - parsed.scheme, host, parsed.path, parsed.params, parsed.query, - parsed.fragment - ]) + return urlunparse( + [ + parsed.scheme, + host, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ] + ) def is_unverifiable(self): return True @@ -73,7 +77,9 @@ class MockRequest(object): def add_header(self, key, val): """cookielib has no legitimate use for this method; add it back if you find one.""" - raise NotImplementedError("Cookie headers should be added with add_unredirected_header()") + raise NotImplementedError( + "Cookie headers should be added with add_unredirected_header()" + ) def add_unredirected_header(self, name, value): self._new_headers[name] = value @@ -94,7 +100,7 @@ class MockRequest(object): return self.get_host() -class MockResponse(object): +class MockResponse: """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. ...what? Basically, expose the parsed HTTP headers from the server response @@ -122,8 +128,7 @@ def extract_cookies_to_jar(jar, request, response): :param request: our own requests.Request object :param response: urllib3.HTTPResponse object """ - if not (hasattr(response, '_original_response') and - response._original_response): + if not (hasattr(response, "_original_response") and response._original_response): return # the _original_response field is the wrapped httplib.HTTPResponse object, req = MockRequest(request) @@ -140,7 +145,7 @@ def get_cookie_header(jar, request): """ r = MockRequest(request) jar.add_cookie_header(r) - return r.get_new_headers().get('Cookie') + return r.get_new_headers().get("Cookie") def remove_cookie_by_name(cookiejar, name, domain=None, path=None): @@ -205,7 +210,9 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): """ # support client code that unsets cookies by assignment of a None value: if value is None: - remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path')) + remove_cookie_by_name( + self, name, domain=kwargs.get("domain"), path=kwargs.get("path") + ) return if isinstance(value, Morsel): @@ -305,16 +312,15 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): """ dictionary = {} for cookie in iter(self): - if ( - (domain is None or cookie.domain == domain) and - (path is None or cookie.path == path) + if (domain is None or cookie.domain == domain) and ( + path is None or cookie.path == path ): dictionary[cookie.name] = cookie.value return dictionary def __contains__(self, name): try: - return super(RequestsCookieJar, self).__contains__(name) + return super().__contains__(name) except CookieConflictError: return True @@ -341,9 +347,13 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): - if hasattr(cookie.value, 'startswith') and cookie.value.startswith('"') and cookie.value.endswith('"'): - cookie.value = cookie.value.replace('\\"', '') - return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs) + if ( + hasattr(cookie.value, "startswith") + and cookie.value.startswith('"') + and cookie.value.endswith('"') + ): + cookie.value = cookie.value.replace('\\"', "") + return super().set_cookie(cookie, *args, **kwargs) def update(self, other): """Updates this jar with cookies from another CookieJar or dict-like""" @@ -351,7 +361,7 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): for cookie in other: self.set_cookie(copy.copy(cookie)) else: - super(RequestsCookieJar, self).update(other) + super().update(other) def _find(self, name, domain=None, path=None): """Requests uses this method internally to get cookie values. @@ -371,7 +381,7 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): if path is None or cookie.path == path: return cookie.value - raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) + raise KeyError(f"name={name!r}, domain={domain!r}, path={path!r}") def _find_no_duplicates(self, name, domain=None, path=None): """Both ``__get_item__`` and ``get`` call this function: it's never @@ -390,25 +400,29 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping): if cookie.name == name: if domain is None or cookie.domain == domain: if path is None or cookie.path == path: - if toReturn is not None: # if there are multiple cookies that meet passed in criteria - raise CookieConflictError('There are multiple cookies with name, %r' % (name)) - toReturn = cookie.value # we will eventually return this as long as no cookie conflict + if toReturn is not None: + # if there are multiple cookies that meet passed in criteria + raise CookieConflictError( + f"There are multiple cookies with name, {name!r}" + ) + # we will eventually return this as long as no cookie conflict + toReturn = cookie.value if toReturn: return toReturn - raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) + raise KeyError(f"name={name!r}, domain={domain!r}, path={path!r}") def __getstate__(self): """Unlike a normal CookieJar, this class is pickleable.""" state = self.__dict__.copy() # remove the unpickleable RLock object - state.pop('_cookies_lock') + state.pop("_cookies_lock") return state def __setstate__(self, state): """Unlike a normal CookieJar, this class is pickleable.""" self.__dict__.update(state) - if '_cookies_lock' not in self.__dict__: + if "_cookies_lock" not in self.__dict__: self._cookies_lock = threading.RLock() def copy(self): @@ -427,7 +441,7 @@ def _copy_cookie_jar(jar): if jar is None: return None - if hasattr(jar, 'copy'): + if hasattr(jar, "copy"): # We're dealing with an instance of RequestsCookieJar return jar.copy() # We're dealing with a generic CookieJar instance @@ -445,31 +459,32 @@ def create_cookie(name, value, **kwargs): and sent on every request (this is sometimes called a "supercookie"). """ result = { - 'version': 0, - 'name': name, - 'value': value, - 'port': None, - 'domain': '', - 'path': '/', - 'secure': False, - 'expires': None, - 'discard': True, - 'comment': None, - 'comment_url': None, - 'rest': {'HttpOnly': None}, - 'rfc2109': False, + "version": 0, + "name": name, + "value": value, + "port": None, + "domain": "", + "path": "/", + "secure": False, + "expires": None, + "discard": True, + "comment": None, + "comment_url": None, + "rest": {"HttpOnly": None}, + "rfc2109": False, } badargs = set(kwargs) - set(result) if badargs: - err = 'create_cookie() got unexpected keyword arguments: %s' - raise TypeError(err % list(badargs)) + raise TypeError( + f"create_cookie() got unexpected keyword arguments: {list(badargs)}" + ) result.update(kwargs) - result['port_specified'] = bool(result['port']) - result['domain_specified'] = bool(result['domain']) - result['domain_initial_dot'] = result['domain'].startswith('.') - result['path_specified'] = bool(result['path']) + result["port_specified"] = bool(result["port"]) + result["domain_specified"] = bool(result["domain"]) + result["domain_initial_dot"] = result["domain"].startswith(".") + result["path_specified"] = bool(result["path"]) return cookielib.Cookie(**result) @@ -478,30 +493,28 @@ def morsel_to_cookie(morsel): """Convert a Morsel object into a Cookie containing the one k/v pair.""" expires = None - if morsel['max-age']: + if morsel["max-age"]: try: - expires = int(time.time() + int(morsel['max-age'])) + expires = int(time.time() + int(morsel["max-age"])) except ValueError: - raise TypeError('max-age: %s must be integer' % morsel['max-age']) - elif morsel['expires']: - time_template = '%a, %d-%b-%Y %H:%M:%S GMT' - expires = calendar.timegm( - time.strptime(morsel['expires'], time_template) - ) + raise TypeError(f"max-age: {morsel['max-age']} must be integer") + elif morsel["expires"]: + time_template = "%a, %d-%b-%Y %H:%M:%S GMT" + expires = calendar.timegm(time.strptime(morsel["expires"], time_template)) return create_cookie( - comment=morsel['comment'], - comment_url=bool(morsel['comment']), + comment=morsel["comment"], + comment_url=bool(morsel["comment"]), discard=False, - domain=morsel['domain'], + domain=morsel["domain"], expires=expires, name=morsel.key, - path=morsel['path'], + path=morsel["path"], port=None, - rest={'HttpOnly': morsel['httponly']}, + rest={"HttpOnly": morsel["httponly"]}, rfc2109=False, - secure=bool(morsel['secure']), + secure=bool(morsel["secure"]), value=morsel.value, - version=morsel['version'] or 0, + version=morsel["version"] or 0, ) @@ -534,11 +547,10 @@ def merge_cookies(cookiejar, cookies): :rtype: CookieJar """ if not isinstance(cookiejar, cookielib.CookieJar): - raise ValueError('You can only merge into CookieJar') + raise ValueError("You can only merge into CookieJar") if isinstance(cookies, dict): - cookiejar = cookiejar_from_dict( - cookies, cookiejar=cookiejar, overwrite=False) + cookiejar = cookiejar_from_dict(cookies, cookiejar=cookiejar, overwrite=False) elif isinstance(cookies, cookielib.CookieJar): try: cookiejar.update(cookies) diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py index 79697635..e1cedf88 100644 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.exceptions ~~~~~~~~~~~~~~~~~~~ @@ -18,13 +16,12 @@ class RequestException(IOError): def __init__(self, *args, **kwargs): """Initialize RequestException with `request` and `response` objects.""" - response = kwargs.pop('response', None) + response = kwargs.pop("response", None) self.response = response - self.request = kwargs.pop('request', None) - if (response is not None and not self.request and - hasattr(response, 'request')): + self.request = kwargs.pop("request", None) + if response is not None and not self.request and hasattr(response, "request"): self.request = self.response.request - super(RequestException, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) class InvalidJSONError(RequestException): @@ -34,6 +31,16 @@ class InvalidJSONError(RequestException): class JSONDecodeError(InvalidJSONError, CompatJSONDecodeError): """Couldn't decode the text into json""" + def __init__(self, *args, **kwargs): + """ + Construct the JSONDecodeError instance first with all + args. Then use it's args to construct the IOError so that + the json specific args aren't used as IOError specific args + and the error message from JSONDecodeError is preserved. + """ + CompatJSONDecodeError.__init__(self, *args) + InvalidJSONError.__init__(self, *self.args, **kwargs) + class HTTPError(RequestException): """An HTTP error occurred.""" @@ -118,6 +125,7 @@ class RetryError(RequestException): class UnrewindableBodyError(RequestException): """Requests encountered an error when trying to rewind a body.""" + # Warnings diff --git a/lib/requests/help.py b/lib/requests/help.py index 4cd6389f..8fbcd656 100644 --- a/lib/requests/help.py +++ b/lib/requests/help.py @@ -1,10 +1,9 @@ """Module containing bug report helper(s).""" -from __future__ import print_function import json import platform -import sys import ssl +import sys import idna import urllib3 @@ -28,16 +27,16 @@ except ImportError: OpenSSL = None cryptography = None else: - import OpenSSL import cryptography + import OpenSSL def _implementation(): """Return a dict with the Python implementation and version. Provide both the name and the version of the Python implementation - currently running. For example, on CPython 2.7.5 it will return - {'name': 'CPython', 'version': '2.7.5'}. + currently running. For example, on CPython 3.10.3 it will return + {'name': 'CPython', 'version': '3.10.3'}. This function works best on CPython and PyPy: in particular, it probably doesn't work for Jython or IronPython. Future investigation should be done @@ -45,83 +44,83 @@ def _implementation(): """ implementation = platform.python_implementation() - if implementation == 'CPython': + if implementation == "CPython": implementation_version = platform.python_version() - elif implementation == 'PyPy': - implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, - sys.pypy_version_info.minor, - sys.pypy_version_info.micro) - if sys.pypy_version_info.releaselevel != 'final': - implementation_version = ''.join([ - implementation_version, sys.pypy_version_info.releaselevel - ]) - elif implementation == 'Jython': + elif implementation == "PyPy": + implementation_version = "{}.{}.{}".format( + sys.pypy_version_info.major, + sys.pypy_version_info.minor, + sys.pypy_version_info.micro, + ) + if sys.pypy_version_info.releaselevel != "final": + implementation_version = "".join( + [implementation_version, sys.pypy_version_info.releaselevel] + ) + elif implementation == "Jython": implementation_version = platform.python_version() # Complete Guess - elif implementation == 'IronPython': + elif implementation == "IronPython": implementation_version = platform.python_version() # Complete Guess else: - implementation_version = 'Unknown' + implementation_version = "Unknown" - return {'name': implementation, 'version': implementation_version} + return {"name": implementation, "version": implementation_version} def info(): """Generate information for a bug report.""" try: platform_info = { - 'system': platform.system(), - 'release': platform.release(), + "system": platform.system(), + "release": platform.release(), } - except IOError: + except OSError: platform_info = { - 'system': 'Unknown', - 'release': 'Unknown', + "system": "Unknown", + "release": "Unknown", } implementation_info = _implementation() - urllib3_info = {'version': urllib3.__version__} - charset_normalizer_info = {'version': None} - chardet_info = {'version': None} + urllib3_info = {"version": urllib3.__version__} + charset_normalizer_info = {"version": None} + chardet_info = {"version": None} if charset_normalizer: - charset_normalizer_info = {'version': charset_normalizer.__version__} + charset_normalizer_info = {"version": charset_normalizer.__version__} if chardet: - chardet_info = {'version': chardet.__version__} + chardet_info = {"version": chardet.__version__} pyopenssl_info = { - 'version': None, - 'openssl_version': '', + "version": None, + "openssl_version": "", } if OpenSSL: pyopenssl_info = { - 'version': OpenSSL.__version__, - 'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER, + "version": OpenSSL.__version__, + "openssl_version": f"{OpenSSL.SSL.OPENSSL_VERSION_NUMBER:x}", } cryptography_info = { - 'version': getattr(cryptography, '__version__', ''), + "version": getattr(cryptography, "__version__", ""), } idna_info = { - 'version': getattr(idna, '__version__', ''), + "version": getattr(idna, "__version__", ""), } system_ssl = ssl.OPENSSL_VERSION_NUMBER - system_ssl_info = { - 'version': '%x' % system_ssl if system_ssl is not None else '' - } + system_ssl_info = {"version": f"{system_ssl:x}" if system_ssl is not None else ""} return { - 'platform': platform_info, - 'implementation': implementation_info, - 'system_ssl': system_ssl_info, - 'using_pyopenssl': pyopenssl is not None, - 'using_charset_normalizer': chardet is None, - 'pyOpenSSL': pyopenssl_info, - 'urllib3': urllib3_info, - 'chardet': chardet_info, - 'charset_normalizer': charset_normalizer_info, - 'cryptography': cryptography_info, - 'idna': idna_info, - 'requests': { - 'version': requests_version, + "platform": platform_info, + "implementation": implementation_info, + "system_ssl": system_ssl_info, + "using_pyopenssl": pyopenssl is not None, + "using_charset_normalizer": chardet is None, + "pyOpenSSL": pyopenssl_info, + "urllib3": urllib3_info, + "chardet": chardet_info, + "charset_normalizer": charset_normalizer_info, + "cryptography": cryptography_info, + "idna": idna_info, + "requests": { + "version": requests_version, }, } @@ -131,5 +130,5 @@ def main(): print(json.dumps(info(), sort_keys=True, indent=2)) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/requests/hooks.py b/lib/requests/hooks.py index 7a51f212..d181ba2e 100644 --- a/lib/requests/hooks.py +++ b/lib/requests/hooks.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.hooks ~~~~~~~~~~~~~~ @@ -11,12 +9,13 @@ Available hooks: ``response``: The response generated from a Request. """ -HOOKS = ['response'] +HOOKS = ["response"] def default_hooks(): return {event: [] for event in HOOKS} + # TODO: response is the only one @@ -25,7 +24,7 @@ def dispatch_hook(key, hooks, hook_data, **kwargs): hooks = hooks or {} hooks = hooks.get(key) if hooks: - if hasattr(hooks, '__call__'): + if hasattr(hooks, "__call__"): hooks = [hooks] for hook in hooks: _hook_data = hook(hook_data, **kwargs) diff --git a/lib/requests/models.py b/lib/requests/models.py index dfbea854..3cd49f5b 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.models ~~~~~~~~~~~~~~~ @@ -8,48 +6,72 @@ This module contains the primary objects that power Requests. """ import datetime -import sys # Import encoding now, to avoid implicit import later. # Implicit import within threads may cause LookupError when standard library is in a ZIP, # such as in Embedded Python. See https://github.com/psf/requests/issues/3578. -import encodings.idna +import encodings.idna # noqa: F401 +from io import UnsupportedOperation +from urllib3.exceptions import ( + DecodeError, + LocationParseError, + ProtocolError, + ReadTimeoutError, + SSLError, +) from urllib3.fields import RequestField from urllib3.filepost import encode_multipart_formdata from urllib3.util import parse_url -from urllib3.exceptions import ( - DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) -from io import UnsupportedOperation -from .hooks import default_hooks -from .structures import CaseInsensitiveDict - -from .auth import HTTPBasicAuth -from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar -from .exceptions import ( - HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, - ContentDecodingError, ConnectionError, StreamConsumedError, - InvalidJSONError) -from .exceptions import JSONDecodeError as RequestsJSONDecodeError from ._internal_utils import to_native_string, unicode_is_ascii -from .utils import ( - guess_filename, get_auth_from_url, requote_uri, - stream_decode_response_unicode, to_key_val_list, parse_header_links, - iter_slices, guess_json_utf, super_len, check_header_validity) +from .auth import HTTPBasicAuth from .compat import ( - Callable, Mapping, - cookielib, urlunparse, urlsplit, urlencode, str, bytes, - is_py2, chardet, builtin_str, basestring, JSONDecodeError) + Callable, + JSONDecodeError, + Mapping, + basestring, + builtin_str, + chardet, + cookielib, +) from .compat import json as complexjson +from .compat import urlencode, urlsplit, urlunparse +from .cookies import _copy_cookie_jar, cookiejar_from_dict, get_cookie_header +from .exceptions import ( + ChunkedEncodingError, + ConnectionError, + ContentDecodingError, + HTTPError, + InvalidJSONError, + InvalidURL, +) +from .exceptions import JSONDecodeError as RequestsJSONDecodeError +from .exceptions import MissingSchema +from .exceptions import SSLError as RequestsSSLError +from .exceptions import StreamConsumedError +from .hooks import default_hooks from .status_codes import codes +from .structures import CaseInsensitiveDict +from .utils import ( + check_header_validity, + get_auth_from_url, + guess_filename, + guess_json_utf, + iter_slices, + parse_header_links, + requote_uri, + stream_decode_response_unicode, + super_len, + to_key_val_list, +) #: The set of HTTP status codes that indicate an automatically #: processable redirect. REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 codes.temporary_redirect, # 307 codes.permanent_redirect, # 308 ) @@ -59,7 +81,7 @@ CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -class RequestEncodingMixin(object): +class RequestEncodingMixin: @property def path_url(self): """Build the path URL to use.""" @@ -70,16 +92,16 @@ class RequestEncodingMixin(object): path = p.path if not path: - path = '/' + path = "/" url.append(path) query = p.query if query: - url.append('?') + url.append("?") url.append(query) - return ''.join(url) + return "".join(url) @staticmethod def _encode_params(data): @@ -92,18 +114,21 @@ class RequestEncodingMixin(object): if isinstance(data, (str, bytes)): return data - elif hasattr(data, 'read'): + elif hasattr(data, "read"): return data - elif hasattr(data, '__iter__'): + elif hasattr(data, "__iter__"): result = [] for k, vs in to_key_val_list(data): - if isinstance(vs, basestring) or not hasattr(vs, '__iter__'): + if isinstance(vs, basestring) or not hasattr(vs, "__iter__"): vs = [vs] for v in vs: if v is not None: result.append( - (k.encode('utf-8') if isinstance(k, str) else k, - v.encode('utf-8') if isinstance(v, str) else v)) + ( + k.encode("utf-8") if isinstance(k, str) else k, + v.encode("utf-8") if isinstance(v, str) else v, + ) + ) return urlencode(result, doseq=True) else: return data @@ -118,7 +143,7 @@ class RequestEncodingMixin(object): The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype) or 4-tuples (filename, fileobj, contentype, custom_headers). """ - if (not files): + if not files: raise ValueError("Files must be provided.") elif isinstance(data, basestring): raise ValueError("Data must not be a string.") @@ -128,7 +153,7 @@ class RequestEncodingMixin(object): files = to_key_val_list(files or {}) for field, val in fields: - if isinstance(val, basestring) or not hasattr(val, '__iter__'): + if isinstance(val, basestring) or not hasattr(val, "__iter__"): val = [val] for v in val: if v is not None: @@ -137,8 +162,13 @@ class RequestEncodingMixin(object): v = str(v) new_fields.append( - (field.decode('utf-8') if isinstance(field, bytes) else field, - v.encode('utf-8') if isinstance(v, str) else v)) + ( + field.decode("utf-8") + if isinstance(field, bytes) + else field, + v.encode("utf-8") if isinstance(v, str) else v, + ) + ) for (k, v) in files: # support for explicit filename @@ -157,7 +187,7 @@ class RequestEncodingMixin(object): if isinstance(fp, (str, bytes, bytearray)): fdata = fp - elif hasattr(fp, 'read'): + elif hasattr(fp, "read"): fdata = fp.read() elif fp is None: continue @@ -173,16 +203,16 @@ class RequestEncodingMixin(object): return body, content_type -class RequestHooksMixin(object): +class RequestHooksMixin: def register_hook(self, event, hook): """Properly register a hook.""" if event not in self.hooks: - raise ValueError('Unsupported event specified, with event name "%s"' % (event)) + raise ValueError(f'Unsupported event specified, with event name "{event}"') if isinstance(hook, Callable): self.hooks[event].append(hook) - elif hasattr(hook, '__iter__'): + elif hasattr(hook, "__iter__"): self.hooks[event].extend(h for h in hook if isinstance(h, Callable)) def deregister_hook(self, event, hook): @@ -225,9 +255,19 @@ class Request(RequestHooksMixin): """ - def __init__(self, - method=None, url=None, headers=None, files=None, data=None, - params=None, auth=None, cookies=None, hooks=None, json=None): + def __init__( + self, + method=None, + url=None, + headers=None, + files=None, + data=None, + params=None, + auth=None, + cookies=None, + hooks=None, + json=None, + ): # Default empty dicts for dict params. data = [] if data is None else data @@ -251,7 +291,7 @@ class Request(RequestHooksMixin): self.cookies = cookies def __repr__(self): - return '' % (self.method) + return f"" def prepare(self): """Constructs a :class:`PreparedRequest ` for transmission and returns it.""" @@ -309,9 +349,19 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): #: integer denoting starting position of a readable file-like body. self._body_position = None - def prepare(self, - method=None, url=None, headers=None, files=None, data=None, - params=None, auth=None, cookies=None, hooks=None, json=None): + def prepare( + self, + method=None, + url=None, + headers=None, + files=None, + data=None, + params=None, + auth=None, + cookies=None, + hooks=None, + json=None, + ): """Prepares the entire request with the given parameters.""" self.prepare_method(method) @@ -328,7 +378,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_hooks(hooks) def __repr__(self): - return '' % (self.method) + return f"" def copy(self): p = PreparedRequest() @@ -352,7 +402,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): import idna try: - host = idna.encode(host, uts46=True).decode('utf-8') + host = idna.encode(host, uts46=True).decode("utf-8") except idna.IDNAError: raise UnicodeError return host @@ -365,9 +415,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): #: on python 3.x. #: https://github.com/psf/requests/pull/2238 if isinstance(url, bytes): - url = url.decode('utf8') + url = url.decode("utf8") else: - url = unicode(url) if is_py2 else str(url) + url = str(url) # Remove leading whitespaces from url url = url.lstrip() @@ -375,7 +425,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. - if ':' in url and not url.lower().startswith('http'): + if ":" in url and not url.lower().startswith("http"): self.url = url return @@ -386,13 +436,13 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): raise InvalidURL(*e.args) if not scheme: - error = ("Invalid URL {0!r}: No scheme supplied. Perhaps you meant http://{0}?") - error = error.format(to_native_string(url, 'utf8')) - - raise MissingSchema(error) + raise MissingSchema( + f"Invalid URL {url!r}: No scheme supplied. " + f"Perhaps you meant http://{url}?" + ) if not host: - raise InvalidURL("Invalid URL %r: No host supplied" % url) + raise InvalidURL(f"Invalid URL {url!r}: No host supplied") # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA @@ -402,33 +452,21 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): try: host = self._get_idna_encoded_host(host) except UnicodeError: - raise InvalidURL('URL has an invalid label.') - elif host.startswith((u'*', u'.')): - raise InvalidURL('URL has an invalid label.') + raise InvalidURL("URL has an invalid label.") + elif host.startswith(("*", ".")): + raise InvalidURL("URL has an invalid label.") # Carefully reconstruct the network location - netloc = auth or '' + netloc = auth or "" if netloc: - netloc += '@' + netloc += "@" netloc += host if port: - netloc += ':' + str(port) + netloc += f":{port}" # Bare domains aren't valid URLs. if not path: - path = '/' - - if is_py2: - if isinstance(scheme, str): - scheme = scheme.encode('utf-8') - if isinstance(netloc, str): - netloc = netloc.encode('utf-8') - if isinstance(path, str): - path = path.encode('utf-8') - if isinstance(query, str): - query = query.encode('utf-8') - if isinstance(fragment, str): - fragment = fragment.encode('utf-8') + path = "/" if isinstance(params, (str, bytes)): params = to_native_string(params) @@ -436,7 +474,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): enc_params = self._encode_params(params) if enc_params: if query: - query = '%s&%s' % (query, enc_params) + query = f"{query}&{enc_params}" else: query = enc_params @@ -467,7 +505,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if not data and json is not None: # urllib3 requires a bytes-like body. Python 2's json.dumps # provides this natively, but Python 3 gives a Unicode string. - content_type = 'application/json' + content_type = "application/json" try: body = complexjson.dumps(json, allow_nan=False) @@ -475,12 +513,14 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): raise InvalidJSONError(ve, request=self) if not isinstance(body, bytes): - body = body.encode('utf-8') + body = body.encode("utf-8") - is_stream = all([ - hasattr(data, '__iter__'), - not isinstance(data, (basestring, list, tuple, Mapping)) - ]) + is_stream = all( + [ + hasattr(data, "__iter__"), + not isinstance(data, (basestring, list, tuple, Mapping)), + ] + ) if is_stream: try: @@ -490,24 +530,26 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): body = data - if getattr(body, 'tell', None) is not None: + if getattr(body, "tell", None) is not None: # Record the current file position before reading. # This will allow us to rewind a file in the event # of a redirect. try: self._body_position = body.tell() - except (IOError, OSError): + except OSError: # This differentiates from None, allowing us to catch # a failed `tell()` later when trying to rewind the body self._body_position = object() if files: - raise NotImplementedError('Streamed bodies and files are mutually exclusive.') + raise NotImplementedError( + "Streamed bodies and files are mutually exclusive." + ) if length: - self.headers['Content-Length'] = builtin_str(length) + self.headers["Content-Length"] = builtin_str(length) else: - self.headers['Transfer-Encoding'] = 'chunked' + self.headers["Transfer-Encoding"] = "chunked" else: # Multi-part file uploads. if files: @@ -515,16 +557,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): else: if data: body = self._encode_params(data) - if isinstance(data, basestring) or hasattr(data, 'read'): + if isinstance(data, basestring) or hasattr(data, "read"): content_type = None else: - content_type = 'application/x-www-form-urlencoded' + content_type = "application/x-www-form-urlencoded" self.prepare_content_length(body) # Add content-type if it wasn't explicitly provided. - if content_type and ('content-type' not in self.headers): - self.headers['Content-Type'] = content_type + if content_type and ("content-type" not in self.headers): + self.headers["Content-Type"] = content_type self.body = body @@ -535,13 +577,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if length: # If length exists, set it. Otherwise, we fallback # to Transfer-Encoding: chunked. - self.headers['Content-Length'] = builtin_str(length) - elif self.method not in ('GET', 'HEAD') and self.headers.get('Content-Length') is None: + self.headers["Content-Length"] = builtin_str(length) + elif ( + self.method not in ("GET", "HEAD") + and self.headers.get("Content-Length") is None + ): # Set Content-Length to 0 for methods that can have a body # but don't provide one. (i.e. not GET or HEAD) - self.headers['Content-Length'] = '0' + self.headers["Content-Length"] = "0" - def prepare_auth(self, auth, url=''): + def prepare_auth(self, auth, url=""): """Prepares the given HTTP auth data.""" # If no Auth is explicitly provided, extract it from the URL first. @@ -581,7 +626,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): cookie_header = get_cookie_header(self._cookies, self) if cookie_header is not None: - self.headers['Cookie'] = cookie_header + self.headers["Cookie"] = cookie_header def prepare_hooks(self, hooks): """Prepares the given hooks.""" @@ -593,14 +638,22 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.register_hook(event, hooks[event]) -class Response(object): +class Response: """The :class:`Response ` object, which contains a server's response to an HTTP request. """ __attrs__ = [ - '_content', 'status_code', 'headers', 'url', 'history', - 'encoding', 'reason', 'cookies', 'elapsed', 'request' + "_content", + "status_code", + "headers", + "url", + "history", + "encoding", + "reason", + "cookies", + "elapsed", + "request", ] def __init__(self): @@ -669,11 +722,11 @@ class Response(object): setattr(self, name, value) # pickled objects do not have .raw - setattr(self, '_content_consumed', True) - setattr(self, 'raw', None) + setattr(self, "_content_consumed", True) + setattr(self, "raw", None) def __repr__(self): - return '' % (self.status_code) + return f"" def __bool__(self): """Returns True if :attr:`status_code` is less than 400. @@ -719,12 +772,15 @@ class Response(object): """True if this Response is a well-formed HTTP redirect that could have been processed automatically (by :meth:`Session.resolve_redirects`). """ - return ('location' in self.headers and self.status_code in REDIRECT_STATI) + return "location" in self.headers and self.status_code in REDIRECT_STATI @property def is_permanent_redirect(self): """True if this Response one of the permanent versions of redirect.""" - return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) + return "location" in self.headers and self.status_code in ( + codes.moved_permanently, + codes.permanent_redirect, + ) @property def next(self): @@ -734,7 +790,7 @@ class Response(object): @property def apparent_encoding(self): """The apparent encoding, provided by the charset_normalizer or chardet libraries.""" - return chardet.detect(self.content)['encoding'] + return chardet.detect(self.content)["encoding"] def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the @@ -755,16 +811,17 @@ class Response(object): def generate(): # Special case for urllib3. - if hasattr(self.raw, 'stream'): + if hasattr(self.raw, "stream"): try: - for chunk in self.raw.stream(chunk_size, decode_content=True): - yield chunk + yield from self.raw.stream(chunk_size, decode_content=True) except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) + except SSLError as e: + raise RequestsSSLError(e) else: # Standard file-like object. while True: @@ -778,7 +835,9 @@ class Response(object): if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): - raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) + raise TypeError( + f"chunk_size must be an int, it is instead a {type(chunk_size)}." + ) # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) @@ -791,7 +850,9 @@ class Response(object): return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None): + def iter_lines( + self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None + ): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. @@ -801,7 +862,9 @@ class Response(object): pending = None - for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): + for chunk in self.iter_content( + chunk_size=chunk_size, decode_unicode=decode_unicode + ): if pending is not None: chunk = pending + chunk @@ -816,8 +879,7 @@ class Response(object): else: pending = None - for line in lines: - yield line + yield from lines if pending is not None: yield pending @@ -829,13 +891,12 @@ class Response(object): if self._content is False: # Read the contents. if self._content_consumed: - raise RuntimeError( - 'The content for this response was already consumed') + raise RuntimeError("The content for this response was already consumed") if self.status_code == 0 or self.raw is None: self._content = None else: - self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b'' + self._content = b"".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b"" self._content_consumed = True # don't need to release the connection; that's been handled by urllib3 @@ -860,7 +921,7 @@ class Response(object): encoding = self.encoding if not self.content: - return str('') + return "" # Fallback to auto-detected encoding. if self.encoding is None: @@ -868,7 +929,7 @@ class Response(object): # Decode unicode from given encoding. try: - content = str(self.content, encoding, errors='replace') + content = str(self.content, encoding, errors="replace") except (LookupError, TypeError): # A LookupError is raised if the encoding was not found which could # indicate a misspelling or similar mistake. @@ -876,7 +937,7 @@ class Response(object): # A TypeError can be raised if encoding is None # # So we try blindly encoding. - content = str(self.content, errors='replace') + content = str(self.content, errors="replace") return content @@ -896,65 +957,65 @@ class Response(object): encoding = guess_json_utf(self.content) if encoding is not None: try: - return complexjson.loads( - self.content.decode(encoding), **kwargs - ) + return complexjson.loads(self.content.decode(encoding), **kwargs) except UnicodeDecodeError: # Wrong UTF codec detected; usually because it's not UTF-8 # but some other 8-bit codec. This is an RFC violation, # and the server didn't bother to tell us what codec *was* # used. pass + except JSONDecodeError as e: + raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) try: return complexjson.loads(self.text, **kwargs) except JSONDecodeError as e: # Catch JSON-related errors and raise as requests.JSONDecodeError # This aliases json.JSONDecodeError and simplejson.JSONDecodeError - if is_py2: # e is a ValueError - raise RequestsJSONDecodeError(e.message) - else: - raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) + raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) @property def links(self): """Returns the parsed header links of the response, if any.""" - header = self.headers.get('link') + header = self.headers.get("link") - # l = MultiDict() - l = {} + resolved_links = {} if header: links = parse_header_links(header) for link in links: - key = link.get('rel') or link.get('url') - l[key] = link + key = link.get("rel") or link.get("url") + resolved_links[key] = link - return l + return resolved_links def raise_for_status(self): """Raises :class:`HTTPError`, if one occurred.""" - http_error_msg = '' + http_error_msg = "" if isinstance(self.reason, bytes): # We attempt to decode utf-8 first because some servers # choose to localize their reason strings. If the string # isn't utf-8, we fall back to iso-8859-1 for all other # encodings. (See PR #3538) try: - reason = self.reason.decode('utf-8') + reason = self.reason.decode("utf-8") except UnicodeDecodeError: - reason = self.reason.decode('iso-8859-1') + reason = self.reason.decode("iso-8859-1") else: reason = self.reason if 400 <= self.status_code < 500: - http_error_msg = u'%s Client Error: %s for url: %s' % (self.status_code, reason, self.url) + http_error_msg = ( + f"{self.status_code} Client Error: {reason} for url: {self.url}" + ) elif 500 <= self.status_code < 600: - http_error_msg = u'%s Server Error: %s for url: %s' % (self.status_code, reason, self.url) + http_error_msg = ( + f"{self.status_code} Server Error: {reason} for url: {self.url}" + ) if http_error_msg: raise HTTPError(http_error_msg, response=self) @@ -968,6 +1029,6 @@ class Response(object): if not self._content_consumed: self.raw.close() - release_conn = getattr(self.raw, 'release_conn', None) + release_conn = getattr(self.raw, "release_conn", None) if release_conn is not None: release_conn() diff --git a/lib/requests/packages.py b/lib/requests/packages.py index 00196bff..77c45c9e 100644 --- a/lib/requests/packages.py +++ b/lib/requests/packages.py @@ -3,24 +3,26 @@ import sys try: import chardet except ImportError: - import charset_normalizer as chardet import warnings - warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer') + import charset_normalizer as chardet + + warnings.filterwarnings("ignore", "Trying to detect", module="charset_normalizer") # This code exists for backwards compatibility reasons. # I don't like it either. Just look the other way. :) -for package in ('urllib3', 'idna'): +for package in ("urllib3", "idna"): locals()[package] = __import__(package) # This traversal is apparently necessary such that the identities are # preserved (requests.packages.urllib3.* is urllib3.*) for mod in list(sys.modules): - if mod == package or mod.startswith(package + '.'): - sys.modules['requests.packages.' + mod] = sys.modules[mod] + if mod == package or mod.startswith(f"{package}."): + sys.modules[f"requests.packages.{mod}"] = sys.modules[mod] target = chardet.__name__ for mod in list(sys.modules): - if mod == target or mod.startswith(target + '.'): - sys.modules['requests.packages.' + target.replace(target, 'chardet')] = sys.modules[mod] + if mod == target or mod.startswith(f"{target}."): + target = target.replace(target, "chardet") + sys.modules[f"requests.packages.{target}"] = sys.modules[mod] # Kinda cool, though, right? diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index 3f59cab9..6cb3b4da 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.sessions ~~~~~~~~~~~~~~~~~ @@ -10,39 +8,52 @@ requests (cookies, auth, proxies). import os import sys import time -from datetime import timedelta from collections import OrderedDict +from datetime import timedelta -from .auth import _basic_auth_str -from .compat import cookielib, is_py3, urljoin, urlparse, Mapping -from .cookies import ( - cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) -from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT -from .hooks import default_hooks, dispatch_hook from ._internal_utils import to_native_string -from .utils import to_key_val_list, default_headers, DEFAULT_PORTS -from .exceptions import ( - TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) - -from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter - -from .utils import ( - requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, - get_auth_from_url, rewind_body, resolve_proxies +from .auth import _basic_auth_str +from .compat import Mapping, cookielib, urljoin, urlparse +from .cookies import ( + RequestsCookieJar, + cookiejar_from_dict, + extract_cookies_to_jar, + merge_cookies, ) - -from .status_codes import codes +from .exceptions import ( + ChunkedEncodingError, + ContentDecodingError, + InvalidSchema, + TooManyRedirects, +) +from .hooks import default_hooks, dispatch_hook # formerly defined here, reexposed here for backward compatibility -from .models import REDIRECT_STATI +from .models import ( # noqa: F401 + DEFAULT_REDIRECT_LIMIT, + REDIRECT_STATI, + PreparedRequest, + Request, +) +from .status_codes import codes +from .structures import CaseInsensitiveDict +from .utils import ( # noqa: F401 + DEFAULT_PORTS, + default_headers, + get_auth_from_url, + get_environ_proxies, + get_netrc_auth, + requote_uri, + resolve_proxies, + rewind_body, + should_bypass_proxies, + to_key_val_list, +) # Preferred clock, based on which one is more accurate on a given system. -if sys.platform == 'win32': - try: # Python 3.4+ - preferred_clock = time.perf_counter - except AttributeError: # Earlier than Python 3. - preferred_clock = time.clock +if sys.platform == "win32": + preferred_clock = time.perf_counter else: preferred_clock = time.time @@ -61,8 +72,7 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): # Bypass if not a dictionary (e.g. verify) if not ( - isinstance(session_setting, Mapping) and - isinstance(request_setting, Mapping) + isinstance(session_setting, Mapping) and isinstance(request_setting, Mapping) ): return request_setting @@ -84,17 +94,16 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): This is necessary because when request_hooks == {'response': []}, the merge breaks Session hooks entirely. """ - if session_hooks is None or session_hooks.get('response') == []: + if session_hooks is None or session_hooks.get("response") == []: return request_hooks - if request_hooks is None or request_hooks.get('response') == []: + if request_hooks is None or request_hooks.get("response") == []: return session_hooks return merge_setting(request_hooks, session_hooks, dict_class) -class SessionRedirectMixin(object): - +class SessionRedirectMixin: def get_redirect_target(self, resp): """Receives a Response. Returns a redirect URI or ``None``""" # Due to the nature of how requests processes redirects this method will @@ -104,16 +113,15 @@ class SessionRedirectMixin(object): # to cache the redirect location onto the response object as a private # attribute. if resp.is_redirect: - location = resp.headers['location'] + location = resp.headers["location"] # Currently the underlying http module on py3 decode headers # in latin1, but empirical evidence suggests that latin1 is very # rarely used with non-ASCII characters in HTTP headers. # It is more likely to get UTF8 header rather than latin1. # This causes incorrect handling of UTF8 encoded location headers. # To solve this, we re-encode the location in latin1. - if is_py3: - location = location.encode('latin1') - return to_native_string(location, 'utf8') + location = location.encode("latin1") + return to_native_string(location, "utf8") return None def should_strip_auth(self, old_url, new_url): @@ -126,23 +134,40 @@ class SessionRedirectMixin(object): # ports. This isn't specified by RFC 7235, but is kept to avoid # breaking backwards compatibility with older versions of requests # that allowed any redirects on the same host. - if (old_parsed.scheme == 'http' and old_parsed.port in (80, None) - and new_parsed.scheme == 'https' and new_parsed.port in (443, None)): + if ( + old_parsed.scheme == "http" + and old_parsed.port in (80, None) + and new_parsed.scheme == "https" + and new_parsed.port in (443, None) + ): return False # Handle default port usage corresponding to scheme. changed_port = old_parsed.port != new_parsed.port changed_scheme = old_parsed.scheme != new_parsed.scheme default_port = (DEFAULT_PORTS.get(old_parsed.scheme, None), None) - if (not changed_scheme and old_parsed.port in default_port - and new_parsed.port in default_port): + if ( + not changed_scheme + and old_parsed.port in default_port + and new_parsed.port in default_port + ): return False # Standard case: root URI must match return changed_port or changed_scheme - def resolve_redirects(self, resp, req, stream=False, timeout=None, - verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): + def resolve_redirects( + self, + resp, + req, + stream=False, + timeout=None, + verify=True, + cert=None, + proxies=None, + yield_requests=False, + **adapter_kwargs, + ): """Receives a Response. Returns a generator of Responses or Requests.""" hist = [] # keep track of history @@ -163,19 +188,21 @@ class SessionRedirectMixin(object): resp.raw.read(decode_content=False) if len(resp.history) >= self.max_redirects: - raise TooManyRedirects('Exceeded {} redirects.'.format(self.max_redirects), response=resp) + raise TooManyRedirects( + f"Exceeded {self.max_redirects} redirects.", response=resp + ) # Release the connection back into the pool. resp.close() # Handle redirection without scheme (see: RFC 1808 Section 4) - if url.startswith('//'): + if url.startswith("//"): parsed_rurl = urlparse(resp.url) - url = ':'.join([to_native_string(parsed_rurl.scheme), url]) + url = ":".join([to_native_string(parsed_rurl.scheme), url]) # Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2) parsed = urlparse(url) - if parsed.fragment == '' and previous_fragment: + if parsed.fragment == "" and previous_fragment: parsed = parsed._replace(fragment=previous_fragment) elif parsed.fragment: previous_fragment = parsed.fragment @@ -194,15 +221,18 @@ class SessionRedirectMixin(object): self.rebuild_method(prepared_request, resp) # https://github.com/psf/requests/issues/1084 - if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): + if resp.status_code not in ( + codes.temporary_redirect, + codes.permanent_redirect, + ): # https://github.com/psf/requests/issues/3490 - purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') + purged_headers = ("Content-Length", "Content-Type", "Transfer-Encoding") for header in purged_headers: prepared_request.headers.pop(header, None) prepared_request.body = None headers = prepared_request.headers - headers.pop('Cookie', None) + headers.pop("Cookie", None) # Extract any cookies sent on the response to the cookiejar # in the new request. Because we've mutated our copied prepared @@ -218,9 +248,8 @@ class SessionRedirectMixin(object): # A failed tell() sets `_body_position` to `object()`. This non-None # value ensures `rewindable` will be True, allowing us to raise an # UnrewindableBodyError, instead of hanging the connection. - rewindable = ( - prepared_request._body_position is not None and - ('Content-Length' in headers or 'Transfer-Encoding' in headers) + rewindable = prepared_request._body_position is not None and ( + "Content-Length" in headers or "Transfer-Encoding" in headers ) # Attempt to rewind consumed file-like object. @@ -242,7 +271,7 @@ class SessionRedirectMixin(object): cert=cert, proxies=proxies, allow_redirects=False, - **adapter_kwargs + **adapter_kwargs, ) extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) @@ -259,10 +288,12 @@ class SessionRedirectMixin(object): headers = prepared_request.headers url = prepared_request.url - if 'Authorization' in headers and self.should_strip_auth(response.request.url, url): + if "Authorization" in headers and self.should_strip_auth( + response.request.url, url + ): # If we get redirected to a new host, we should strip out any # authentication headers. - del headers['Authorization'] + del headers["Authorization"] # .netrc might have more auth for us on our new host. new_auth = get_netrc_auth(url) if self.trust_env else None @@ -285,8 +316,8 @@ class SessionRedirectMixin(object): scheme = urlparse(prepared_request.url).scheme new_proxies = resolve_proxies(prepared_request, proxies, self.trust_env) - if 'Proxy-Authorization' in headers: - del headers['Proxy-Authorization'] + if "Proxy-Authorization" in headers: + del headers["Proxy-Authorization"] try: username, password = get_auth_from_url(new_proxies[scheme]) @@ -294,7 +325,7 @@ class SessionRedirectMixin(object): username, password = None, None if username and password: - headers['Proxy-Authorization'] = _basic_auth_str(username, password) + headers["Proxy-Authorization"] = _basic_auth_str(username, password) return new_proxies @@ -305,18 +336,18 @@ class SessionRedirectMixin(object): method = prepared_request.method # https://tools.ietf.org/html/rfc7231#section-6.4.4 - if response.status_code == codes.see_other and method != 'HEAD': - method = 'GET' + if response.status_code == codes.see_other and method != "HEAD": + method = "GET" # Do what the browsers do, despite standards... # First, turn 302s into GETs. - if response.status_code == codes.found and method != 'HEAD': - method = 'GET' + if response.status_code == codes.found and method != "HEAD": + method = "GET" # Second, if a POST is responded to with a 301, turn it into a GET. # This bizarre behaviour is explained in Issue 1704. - if response.status_code == codes.moved and method == 'POST': - method = 'GET' + if response.status_code == codes.moved and method == "POST": + method = "GET" prepared_request.method = method @@ -341,9 +372,18 @@ class Session(SessionRedirectMixin): """ __attrs__ = [ - 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'adapters', 'stream', 'trust_env', - 'max_redirects', + "headers", + "cookies", + "auth", + "proxies", + "hooks", + "params", + "verify", + "cert", + "adapters", + "stream", + "trust_env", + "max_redirects", ] def __init__(self): @@ -405,8 +445,8 @@ class Session(SessionRedirectMixin): # Default connection adapters. self.adapters = OrderedDict() - self.mount('https://', HTTPAdapter()) - self.mount('http://', HTTPAdapter()) + self.mount("https://", HTTPAdapter()) + self.mount("http://", HTTPAdapter()) def __enter__(self): return self @@ -432,7 +472,8 @@ class Session(SessionRedirectMixin): # Merge with session cookies merged_cookies = merge_cookies( - merge_cookies(RequestsCookieJar(), self.cookies), cookies) + merge_cookies(RequestsCookieJar(), self.cookies), cookies + ) # Set environment's basic authentication if not explicitly set. auth = request.auth @@ -446,7 +487,9 @@ class Session(SessionRedirectMixin): files=request.files, data=request.data, json=request.json, - headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), + headers=merge_setting( + request.headers, self.headers, dict_class=CaseInsensitiveDict + ), params=merge_setting(request.params, self.params), auth=merge_setting(auth, self.auth), cookies=merged_cookies, @@ -454,10 +497,25 @@ class Session(SessionRedirectMixin): ) return p - def request(self, method, url, - params=None, data=None, headers=None, cookies=None, files=None, - auth=None, timeout=None, allow_redirects=True, proxies=None, - hooks=None, stream=None, verify=None, cert=None, json=None): + def request( + self, + method, + url, + params=None, + data=None, + headers=None, + cookies=None, + files=None, + auth=None, + timeout=None, + allow_redirects=True, + proxies=None, + hooks=None, + stream=None, + verify=None, + cert=None, + json=None, + ): """Constructs a :class:`Request `, prepares it and sends it. Returns :class:`Response ` object. @@ -493,7 +551,7 @@ class Session(SessionRedirectMixin): ``False``, requests will accept any TLS certificate presented by the server, and will ignore hostname mismatches and/or expired certificates, which will make your application vulnerable to - man-in-the-middle (MitM) attacks. Setting verify to ``False`` + man-in-the-middle (MitM) attacks. Setting verify to ``False`` may be useful during local development or testing. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. @@ -522,8 +580,8 @@ class Session(SessionRedirectMixin): # Send the request. send_kwargs = { - 'timeout': timeout, - 'allow_redirects': allow_redirects, + "timeout": timeout, + "allow_redirects": allow_redirects, } send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) @@ -538,8 +596,8 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return self.request('GET', url, **kwargs) + kwargs.setdefault("allow_redirects", True) + return self.request("GET", url, **kwargs) def options(self, url, **kwargs): r"""Sends a OPTIONS request. Returns :class:`Response` object. @@ -549,8 +607,8 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return self.request('OPTIONS', url, **kwargs) + kwargs.setdefault("allow_redirects", True) + return self.request("OPTIONS", url, **kwargs) def head(self, url, **kwargs): r"""Sends a HEAD request. Returns :class:`Response` object. @@ -560,8 +618,8 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', False) - return self.request('HEAD', url, **kwargs) + kwargs.setdefault("allow_redirects", False) + return self.request("HEAD", url, **kwargs) def post(self, url, data=None, json=None, **kwargs): r"""Sends a POST request. Returns :class:`Response` object. @@ -574,7 +632,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('POST', url, data=data, json=json, **kwargs) + return self.request("POST", url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): r"""Sends a PUT request. Returns :class:`Response` object. @@ -586,7 +644,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('PUT', url, data=data, **kwargs) + return self.request("PUT", url, data=data, **kwargs) def patch(self, url, data=None, **kwargs): r"""Sends a PATCH request. Returns :class:`Response` object. @@ -598,7 +656,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('PATCH', url, data=data, **kwargs) + return self.request("PATCH", url, data=data, **kwargs) def delete(self, url, **kwargs): r"""Sends a DELETE request. Returns :class:`Response` object. @@ -608,7 +666,7 @@ class Session(SessionRedirectMixin): :rtype: requests.Response """ - return self.request('DELETE', url, **kwargs) + return self.request("DELETE", url, **kwargs) def send(self, request, **kwargs): """Send a given PreparedRequest. @@ -617,22 +675,20 @@ class Session(SessionRedirectMixin): """ # Set defaults that the hooks can utilize to ensure they always have # the correct parameters to reproduce the previous request. - kwargs.setdefault('stream', self.stream) - kwargs.setdefault('verify', self.verify) - kwargs.setdefault('cert', self.cert) - if 'proxies' not in kwargs: - kwargs['proxies'] = resolve_proxies( - request, self.proxies, self.trust_env - ) + kwargs.setdefault("stream", self.stream) + kwargs.setdefault("verify", self.verify) + kwargs.setdefault("cert", self.cert) + if "proxies" not in kwargs: + kwargs["proxies"] = resolve_proxies(request, self.proxies, self.trust_env) # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. if isinstance(request, Request): - raise ValueError('You can only send PreparedRequests.') + raise ValueError("You can only send PreparedRequests.") # Set up variables needed for resolve_redirects and dispatching of hooks - allow_redirects = kwargs.pop('allow_redirects', True) - stream = kwargs.get('stream') + allow_redirects = kwargs.pop("allow_redirects", True) + stream = kwargs.get("stream") hooks = request.hooks # Get the appropriate adapter to use @@ -649,7 +705,7 @@ class Session(SessionRedirectMixin): r.elapsed = timedelta(seconds=elapsed) # Response manipulation hooks - r = dispatch_hook('response', hooks, r, **kwargs) + r = dispatch_hook("response", hooks, r, **kwargs) # Persist cookies if r.history: @@ -679,7 +735,9 @@ class Session(SessionRedirectMixin): # If redirects aren't being followed, store the response on the Request for Response.next(). if not allow_redirects: try: - r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs)) + r._next = next( + self.resolve_redirects(r, request, yield_requests=True, **kwargs) + ) except StopIteration: pass @@ -697,16 +755,19 @@ class Session(SessionRedirectMixin): # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. - no_proxy = proxies.get('no_proxy') if proxies is not None else None + no_proxy = proxies.get("no_proxy") if proxies is not None else None env_proxies = get_environ_proxies(url, no_proxy=no_proxy) for (k, v) in env_proxies.items(): proxies.setdefault(k, v) - # Look for requests environment configuration and be compatible - # with cURL. + # Look for requests environment configuration + # and be compatible with cURL. if verify is True or verify is None: - verify = (os.environ.get('REQUESTS_CA_BUNDLE') or - os.environ.get('CURL_CA_BUNDLE')) + verify = ( + os.environ.get("REQUESTS_CA_BUNDLE") + or os.environ.get("CURL_CA_BUNDLE") + or verify + ) # Merge all the kwargs. proxies = merge_setting(proxies, self.proxies) @@ -714,8 +775,7 @@ class Session(SessionRedirectMixin): verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert) - return {'verify': verify, 'proxies': proxies, 'stream': stream, - 'cert': cert} + return {"proxies": proxies, "stream": stream, "verify": verify, "cert": cert} def get_adapter(self, url): """ @@ -729,7 +789,7 @@ class Session(SessionRedirectMixin): return adapter # Nothing matches :-/ - raise InvalidSchema("No connection adapters were found for {!r}".format(url)) + raise InvalidSchema(f"No connection adapters were found for {url!r}") def close(self): """Closes all adapters and as such the session""" diff --git a/lib/requests/status_codes.py b/lib/requests/status_codes.py index d80a7cd4..4bd072be 100644 --- a/lib/requests/status_codes.py +++ b/lib/requests/status_codes.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - r""" The ``codes`` object defines a mapping from common names for HTTP statuses to their numerical codes, accessible either as attributes or as dictionary @@ -23,101 +21,108 @@ the names are allowed. For example, ``codes.ok``, ``codes.OK``, and from .structures import LookupDict _codes = { - # Informational. - 100: ('continue',), - 101: ('switching_protocols',), - 102: ('processing',), - 103: ('checkpoint',), - 122: ('uri_too_long', 'request_uri_too_long'), - 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), - 201: ('created',), - 202: ('accepted',), - 203: ('non_authoritative_info', 'non_authoritative_information'), - 204: ('no_content',), - 205: ('reset_content', 'reset'), - 206: ('partial_content', 'partial'), - 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), - 208: ('already_reported',), - 226: ('im_used',), - + 100: ("continue",), + 101: ("switching_protocols",), + 102: ("processing",), + 103: ("checkpoint",), + 122: ("uri_too_long", "request_uri_too_long"), + 200: ("ok", "okay", "all_ok", "all_okay", "all_good", "\\o/", "✓"), + 201: ("created",), + 202: ("accepted",), + 203: ("non_authoritative_info", "non_authoritative_information"), + 204: ("no_content",), + 205: ("reset_content", "reset"), + 206: ("partial_content", "partial"), + 207: ("multi_status", "multiple_status", "multi_stati", "multiple_stati"), + 208: ("already_reported",), + 226: ("im_used",), # Redirection. - 300: ('multiple_choices',), - 301: ('moved_permanently', 'moved', '\\o-'), - 302: ('found',), - 303: ('see_other', 'other'), - 304: ('not_modified',), - 305: ('use_proxy',), - 306: ('switch_proxy',), - 307: ('temporary_redirect', 'temporary_moved', 'temporary'), - 308: ('permanent_redirect', - 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 - + 300: ("multiple_choices",), + 301: ("moved_permanently", "moved", "\\o-"), + 302: ("found",), + 303: ("see_other", "other"), + 304: ("not_modified",), + 305: ("use_proxy",), + 306: ("switch_proxy",), + 307: ("temporary_redirect", "temporary_moved", "temporary"), + 308: ( + "permanent_redirect", + "resume_incomplete", + "resume", + ), # "resume" and "resume_incomplete" to be removed in 3.0 # Client Error. - 400: ('bad_request', 'bad'), - 401: ('unauthorized',), - 402: ('payment_required', 'payment'), - 403: ('forbidden',), - 404: ('not_found', '-o-'), - 405: ('method_not_allowed', 'not_allowed'), - 406: ('not_acceptable',), - 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), - 408: ('request_timeout', 'timeout'), - 409: ('conflict',), - 410: ('gone',), - 411: ('length_required',), - 412: ('precondition_failed', 'precondition'), - 413: ('request_entity_too_large',), - 414: ('request_uri_too_large',), - 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), - 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), - 417: ('expectation_failed',), - 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), - 421: ('misdirected_request',), - 422: ('unprocessable_entity', 'unprocessable'), - 423: ('locked',), - 424: ('failed_dependency', 'dependency'), - 425: ('unordered_collection', 'unordered'), - 426: ('upgrade_required', 'upgrade'), - 428: ('precondition_required', 'precondition'), - 429: ('too_many_requests', 'too_many'), - 431: ('header_fields_too_large', 'fields_too_large'), - 444: ('no_response', 'none'), - 449: ('retry_with', 'retry'), - 450: ('blocked_by_windows_parental_controls', 'parental_controls'), - 451: ('unavailable_for_legal_reasons', 'legal_reasons'), - 499: ('client_closed_request',), - + 400: ("bad_request", "bad"), + 401: ("unauthorized",), + 402: ("payment_required", "payment"), + 403: ("forbidden",), + 404: ("not_found", "-o-"), + 405: ("method_not_allowed", "not_allowed"), + 406: ("not_acceptable",), + 407: ("proxy_authentication_required", "proxy_auth", "proxy_authentication"), + 408: ("request_timeout", "timeout"), + 409: ("conflict",), + 410: ("gone",), + 411: ("length_required",), + 412: ("precondition_failed", "precondition"), + 413: ("request_entity_too_large",), + 414: ("request_uri_too_large",), + 415: ("unsupported_media_type", "unsupported_media", "media_type"), + 416: ( + "requested_range_not_satisfiable", + "requested_range", + "range_not_satisfiable", + ), + 417: ("expectation_failed",), + 418: ("im_a_teapot", "teapot", "i_am_a_teapot"), + 421: ("misdirected_request",), + 422: ("unprocessable_entity", "unprocessable"), + 423: ("locked",), + 424: ("failed_dependency", "dependency"), + 425: ("unordered_collection", "unordered"), + 426: ("upgrade_required", "upgrade"), + 428: ("precondition_required", "precondition"), + 429: ("too_many_requests", "too_many"), + 431: ("header_fields_too_large", "fields_too_large"), + 444: ("no_response", "none"), + 449: ("retry_with", "retry"), + 450: ("blocked_by_windows_parental_controls", "parental_controls"), + 451: ("unavailable_for_legal_reasons", "legal_reasons"), + 499: ("client_closed_request",), # Server Error. - 500: ('internal_server_error', 'server_error', '/o\\', '✗'), - 501: ('not_implemented',), - 502: ('bad_gateway',), - 503: ('service_unavailable', 'unavailable'), - 504: ('gateway_timeout',), - 505: ('http_version_not_supported', 'http_version'), - 506: ('variant_also_negotiates',), - 507: ('insufficient_storage',), - 509: ('bandwidth_limit_exceeded', 'bandwidth'), - 510: ('not_extended',), - 511: ('network_authentication_required', 'network_auth', 'network_authentication'), + 500: ("internal_server_error", "server_error", "/o\\", "✗"), + 501: ("not_implemented",), + 502: ("bad_gateway",), + 503: ("service_unavailable", "unavailable"), + 504: ("gateway_timeout",), + 505: ("http_version_not_supported", "http_version"), + 506: ("variant_also_negotiates",), + 507: ("insufficient_storage",), + 509: ("bandwidth_limit_exceeded", "bandwidth"), + 510: ("not_extended",), + 511: ("network_authentication_required", "network_auth", "network_authentication"), } -codes = LookupDict(name='status_codes') +codes = LookupDict(name="status_codes") + def _init(): for code, titles in _codes.items(): for title in titles: setattr(codes, title, code) - if not title.startswith(('\\', '/')): + if not title.startswith(("\\", "/")): setattr(codes, title.upper(), code) def doc(code): - names = ', '.join('``%s``' % n for n in _codes[code]) - return '* %d: %s' % (code, names) + names = ", ".join(f"``{n}``" for n in _codes[code]) + return "* %d: %s" % (code, names) global __doc__ - __doc__ = (__doc__ + '\n' + - '\n'.join(doc(code) for code in sorted(_codes)) - if __doc__ is not None else None) + __doc__ = ( + __doc__ + "\n" + "\n".join(doc(code) for code in sorted(_codes)) + if __doc__ is not None + else None + ) + _init() diff --git a/lib/requests/structures.py b/lib/requests/structures.py index 8ee0ba7a..188e13e4 100644 --- a/lib/requests/structures.py +++ b/lib/requests/structures.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.structures ~~~~~~~~~~~~~~~~~~~ @@ -64,11 +62,7 @@ class CaseInsensitiveDict(MutableMapping): def lower_items(self): """Like iteritems(), but with all lowercase keys.""" - return ( - (lowerkey, keyval[1]) - for (lowerkey, keyval) - in self._store.items() - ) + return ((lowerkey, keyval[1]) for (lowerkey, keyval) in self._store.items()) def __eq__(self, other): if isinstance(other, Mapping): @@ -91,10 +85,10 @@ class LookupDict(dict): def __init__(self, name=None): self.name = name - super(LookupDict, self).__init__() + super().__init__() def __repr__(self): - return '' % (self.name) + return f"" def __getitem__(self, key): # We allow fall-through here, so values default to None diff --git a/lib/requests/utils.py b/lib/requests/utils.py index 153776c7..ad535838 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ requests.utils ~~~~~~~~~~~~~~ @@ -20,28 +18,46 @@ import tempfile import warnings import zipfile from collections import OrderedDict -from urllib3.util import make_headers -from urllib3.util import parse_url -from .__version__ import __version__ +from urllib3.util import make_headers, parse_url + from . import certs +from .__version__ import __version__ + # to_native_string is unused here, but imported here for backwards compatibility -from ._internal_utils import to_native_string +from ._internal_utils import HEADER_VALIDATORS, to_native_string # noqa: F401 +from .compat import ( + Mapping, + basestring, + bytes, + getproxies, + getproxies_environment, + integer_types, +) from .compat import parse_http_list as _parse_list_header from .compat import ( - quote, urlparse, bytes, str, unquote, getproxies, - proxy_bypass, urlunparse, basestring, integer_types, is_py3, - proxy_bypass_environment, getproxies_environment, Mapping) + proxy_bypass, + proxy_bypass_environment, + quote, + str, + unquote, + urlparse, + urlunparse, +) from .cookies import cookiejar_from_dict -from .structures import CaseInsensitiveDict from .exceptions import ( - InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) + FileModeWarning, + InvalidHeader, + InvalidURL, + UnrewindableBodyError, +) +from .structures import CaseInsensitiveDict -NETRC_FILES = ('.netrc', '_netrc') +NETRC_FILES = (".netrc", "_netrc") DEFAULT_CA_BUNDLE_PATH = certs.where() -DEFAULT_PORTS = {'http': 80, 'https': 443} +DEFAULT_PORTS = {"http": 80, "https": 443} # Ensure that ', ' is used to preserve previous delimiter behavior. DEFAULT_ACCEPT_ENCODING = ", ".join( @@ -49,28 +65,25 @@ DEFAULT_ACCEPT_ENCODING = ", ".join( ) -if sys.platform == 'win32': +if sys.platform == "win32": # provide a proxy_bypass version on Windows without DNS lookups def proxy_bypass_registry(host): try: - if is_py3: - import winreg - else: - import _winreg as winreg + import winreg except ImportError: return False try: - internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + internetSettings = winreg.OpenKey( + winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Internet Settings", + ) # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it - proxyEnable = int(winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0]) + proxyEnable = int(winreg.QueryValueEx(internetSettings, "ProxyEnable")[0]) # ProxyOverride is almost always a string - proxyOverride = winreg.QueryValueEx(internetSettings, - 'ProxyOverride')[0] - except OSError: + proxyOverride = winreg.QueryValueEx(internetSettings, "ProxyOverride")[0] + except (OSError, ValueError): return False if not proxyEnable or not proxyOverride: return False @@ -78,15 +91,15 @@ if sys.platform == 'win32': # make a check value list from the registry entry: replace the # '' string by the localhost entry and the corresponding # canonical entry. - proxyOverride = proxyOverride.split(';') + proxyOverride = proxyOverride.split(";") # now check if we match one of the registry values. for test in proxyOverride: - if test == '': - if '.' not in host: + if test == "": + if "." not in host: return True - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char + test = test.replace(".", r"\.") # mask dots + test = test.replace("*", r".*") # change glob sequence + test = test.replace("?", r".") # change glob char if re.match(test, host, re.I): return True return False @@ -106,7 +119,7 @@ if sys.platform == 'win32': def dict_to_sequence(d): """Returns an internal sequence dictionary update.""" - if hasattr(d, 'items'): + if hasattr(d, "items"): d = d.items() return d @@ -116,13 +129,13 @@ def super_len(o): total_length = None current_position = 0 - if hasattr(o, '__len__'): + if hasattr(o, "__len__"): total_length = len(o) - elif hasattr(o, 'len'): + elif hasattr(o, "len"): total_length = o.len - elif hasattr(o, 'fileno'): + elif hasattr(o, "fileno"): try: fileno = o.fileno() except (io.UnsupportedOperation, AttributeError): @@ -135,21 +148,23 @@ def super_len(o): # Having used fstat to determine the file length, we need to # confirm that this file was opened up in binary mode. - if 'b' not in o.mode: - warnings.warn(( - "Requests has determined the content-length for this " - "request using the binary size of the file: however, the " - "file has been opened in text mode (i.e. without the 'b' " - "flag in the mode). This may lead to an incorrect " - "content-length. In Requests 3.0, support will be removed " - "for files in text mode."), - FileModeWarning + if "b" not in o.mode: + warnings.warn( + ( + "Requests has determined the content-length for this " + "request using the binary size of the file: however, the " + "file has been opened in text mode (i.e. without the 'b' " + "flag in the mode). This may lead to an incorrect " + "content-length. In Requests 3.0, support will be removed " + "for files in text mode." + ), + FileModeWarning, ) - if hasattr(o, 'tell'): + if hasattr(o, "tell"): try: current_position = o.tell() - except (OSError, IOError): + except OSError: # This can happen in some weird situations, such as when the file # is actually a special file descriptor like stdin. In this # instance, we don't know what the length is, so set it to zero and @@ -157,7 +172,7 @@ def super_len(o): if total_length is not None: current_position = total_length else: - if hasattr(o, 'seek') and total_length is None: + if hasattr(o, "seek") and total_length is None: # StringIO and BytesIO have seek but no usable fileno try: # seek to end of file @@ -167,7 +182,7 @@ def super_len(o): # seek back to current position to support # partially read file-like objects o.seek(current_position or 0) - except (OSError, IOError): + except OSError: total_length = 0 if total_length is None: @@ -179,14 +194,14 @@ def super_len(o): def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" - netrc_file = os.environ.get('NETRC') + netrc_file = os.environ.get("NETRC") if netrc_file is not None: netrc_locations = (netrc_file,) else: - netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES) + netrc_locations = (f"~/{f}" for f in NETRC_FILES) try: - from netrc import netrc, NetrcParseError + from netrc import NetrcParseError, netrc netrc_path = None @@ -211,18 +226,18 @@ def get_netrc_auth(url, raise_errors=False): # Strip port numbers from netloc. This weird `if...encode`` dance is # used for Python 3.2, which doesn't support unicode literals. - splitstr = b':' + splitstr = b":" if isinstance(url, str): - splitstr = splitstr.decode('ascii') + splitstr = splitstr.decode("ascii") host = ri.netloc.split(splitstr)[0] try: _netrc = netrc(netrc_path).authenticators(host) if _netrc: # Return with login / password - login_i = (0 if _netrc[0] else 1) + login_i = 0 if _netrc[0] else 1 return (_netrc[login_i], _netrc[2]) - except (NetrcParseError, IOError): + except (NetrcParseError, OSError): # If there was a parsing error or a permissions issue reading the file, # we'll just skip netrc auth unless explicitly asked to raise errors. if raise_errors: @@ -235,9 +250,8 @@ def get_netrc_auth(url, raise_errors=False): def guess_filename(obj): """Tries to guess the filename of the given object.""" - name = getattr(obj, 'name', None) - if (name and isinstance(name, basestring) and name[0] != '<' and - name[-1] != '>'): + name = getattr(obj, "name", None) + if name and isinstance(name, basestring) and name[0] != "<" and name[-1] != ">": return os.path.basename(name) @@ -259,7 +273,7 @@ def extract_zipped_paths(path): # If we don't check for an empty prefix after the split (in other words, archive remains unchanged after the split), # we _can_ end up in an infinite loop on a rare corner case affecting a small number of users break - member = '/'.join([prefix, member]) + member = "/".join([prefix, member]) if not zipfile.is_zipfile(archive): return path @@ -270,7 +284,7 @@ def extract_zipped_paths(path): # we have a valid zip archive and a valid member of that archive tmp = tempfile.gettempdir() - extracted_path = os.path.join(tmp, member.split('/')[-1]) + extracted_path = os.path.join(tmp, member.split("/")[-1]) if not os.path.exists(extracted_path): # use read + write to avoid the creating nested folders, we only want the file, avoids mkdir racing condition with atomic_open(extracted_path) as file_handler: @@ -281,12 +295,11 @@ def extract_zipped_paths(path): @contextlib.contextmanager def atomic_open(filename): """Write a file to the disk in an atomic fashion""" - replacer = os.rename if sys.version_info[0] == 2 else os.replace tmp_descriptor, tmp_name = tempfile.mkstemp(dir=os.path.dirname(filename)) try: - with os.fdopen(tmp_descriptor, 'wb') as tmp_handler: + with os.fdopen(tmp_descriptor, "wb") as tmp_handler: yield tmp_handler - replacer(tmp_name, filename) + os.replace(tmp_name, filename) except BaseException: os.remove(tmp_name) raise @@ -314,7 +327,7 @@ def from_key_val_list(value): return None if isinstance(value, (str, bytes, bool, int)): - raise ValueError('cannot encode objects that are not 2-tuples') + raise ValueError("cannot encode objects that are not 2-tuples") return OrderedDict(value) @@ -340,7 +353,7 @@ def to_key_val_list(value): return None if isinstance(value, (str, bytes, bool, int)): - raise ValueError('cannot encode objects that are not 2-tuples') + raise ValueError("cannot encode objects that are not 2-tuples") if isinstance(value, Mapping): value = value.items() @@ -405,10 +418,10 @@ def parse_dict_header(value): """ result = {} for item in _parse_list_header(value): - if '=' not in item: + if "=" not in item: result[item] = None continue - name, value = item.split('=', 1) + name, value = item.split("=", 1) if value[:1] == value[-1:] == '"': value = unquote_header_value(value[1:-1]) result[name] = value @@ -436,8 +449,8 @@ def unquote_header_value(value, is_filename=False): # replace sequence below on a UNC path has the effect of turning # the leading double slash into a single slash and then # _fix_ie_filename() doesn't work correctly. See #458. - if not is_filename or value[:2] != '\\\\': - return value.replace('\\\\', '\\').replace('\\"', '"') + if not is_filename or value[:2] != "\\\\": + return value.replace("\\\\", "\\").replace('\\"', '"') return value @@ -472,19 +485,24 @@ def get_encodings_from_content(content): :param content: bytestring to extract encodings from. """ - warnings.warn(( - 'In requests 3.0, get_encodings_from_content will be removed. For ' - 'more information, please see the discussion on issue #2266. (This' - ' warning should only appear once.)'), - DeprecationWarning) + warnings.warn( + ( + "In requests 3.0, get_encodings_from_content will be removed. For " + "more information, please see the discussion on issue #2266. (This" + " warning should only appear once.)" + ), + DeprecationWarning, + ) charset_re = re.compile(r']', flags=re.I) pragma_re = re.compile(r']', flags=re.I) xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') - return (charset_re.findall(content) + - pragma_re.findall(content) + - xml_re.findall(content)) + return ( + charset_re.findall(content) + + pragma_re.findall(content) + + xml_re.findall(content) + ) def _parse_content_type_header(header): @@ -495,7 +513,7 @@ def _parse_content_type_header(header): parameters """ - tokens = header.split(';') + tokens = header.split(";") content_type, params = tokens[0].strip(), tokens[1:] params_dict = {} items_to_strip = "\"' " @@ -507,7 +525,7 @@ def _parse_content_type_header(header): index_of_equals = param.find("=") if index_of_equals != -1: key = param[:index_of_equals].strip(items_to_strip) - value = param[index_of_equals + 1:].strip(items_to_strip) + value = param[index_of_equals + 1 :].strip(items_to_strip) params_dict[key.lower()] = value return content_type, params_dict @@ -519,38 +537,37 @@ def get_encoding_from_headers(headers): :rtype: str """ - content_type = headers.get('content-type') + content_type = headers.get("content-type") if not content_type: return None content_type, params = _parse_content_type_header(content_type) - if 'charset' in params: - return params['charset'].strip("'\"") + if "charset" in params: + return params["charset"].strip("'\"") - if 'text' in content_type: - return 'ISO-8859-1' + if "text" in content_type: + return "ISO-8859-1" - if 'application/json' in content_type: + if "application/json" in content_type: # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset - return 'utf-8' + return "utf-8" def stream_decode_response_unicode(iterator, r): - """Stream decodes a iterator.""" + """Stream decodes an iterator.""" if r.encoding is None: - for item in iterator: - yield item + yield from iterator return - decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') + decoder = codecs.getincrementaldecoder(r.encoding)(errors="replace") for chunk in iterator: rv = decoder.decode(chunk) if rv: yield rv - rv = decoder.decode(b'', final=True) + rv = decoder.decode(b"", final=True) if rv: yield rv @@ -561,7 +578,7 @@ def iter_slices(string, slice_length): if slice_length is None or slice_length <= 0: slice_length = len(string) while pos < len(string): - yield string[pos:pos + slice_length] + yield string[pos : pos + slice_length] pos += slice_length @@ -577,11 +594,14 @@ def get_unicode_from_response(r): :rtype: str """ - warnings.warn(( - 'In requests 3.0, get_unicode_from_response will be removed. For ' - 'more information, please see the discussion on issue #2266. (This' - ' warning should only appear once.)'), - DeprecationWarning) + warnings.warn( + ( + "In requests 3.0, get_unicode_from_response will be removed. For " + "more information, please see the discussion on issue #2266. (This" + " warning should only appear once.)" + ), + DeprecationWarning, + ) tried_encodings = [] @@ -596,14 +616,15 @@ def get_unicode_from_response(r): # Fall back: try: - return str(r.content, encoding, errors='replace') + return str(r.content, encoding, errors="replace") except TypeError: return r.content # The unreserved URI characters (RFC 3986) UNRESERVED_SET = frozenset( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~" +) def unquote_unreserved(uri): @@ -612,22 +633,22 @@ def unquote_unreserved(uri): :rtype: str """ - parts = uri.split('%') + parts = uri.split("%") for i in range(1, len(parts)): h = parts[i][0:2] if len(h) == 2 and h.isalnum(): try: c = chr(int(h, 16)) except ValueError: - raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) + raise InvalidURL(f"Invalid percent-escape sequence: '{h}'") if c in UNRESERVED_SET: parts[i] = c + parts[i][2:] else: - parts[i] = '%' + parts[i] + parts[i] = f"%{parts[i]}" else: - parts[i] = '%' + parts[i] - return ''.join(parts) + parts[i] = f"%{parts[i]}" + return "".join(parts) def requote_uri(uri): @@ -660,10 +681,10 @@ def address_in_network(ip, net): :rtype: bool """ - ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] - netaddr, bits = net.split('/') - netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0] - network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask + ipaddr = struct.unpack("=L", socket.inet_aton(ip))[0] + netaddr, bits = net.split("/") + netmask = struct.unpack("=L", socket.inet_aton(dotted_netmask(int(bits))))[0] + network = struct.unpack("=L", socket.inet_aton(netaddr))[0] & netmask return (ipaddr & netmask) == (network & netmask) @@ -674,8 +695,8 @@ def dotted_netmask(mask): :rtype: str """ - bits = 0xffffffff ^ (1 << 32 - mask) - 1 - return socket.inet_ntoa(struct.pack('>I', bits)) + bits = 0xFFFFFFFF ^ (1 << 32 - mask) - 1 + return socket.inet_ntoa(struct.pack(">I", bits)) def is_ipv4_address(string_ip): @@ -684,7 +705,7 @@ def is_ipv4_address(string_ip): """ try: socket.inet_aton(string_ip) - except socket.error: + except OSError: return False return True @@ -695,9 +716,9 @@ def is_valid_cidr(string_network): :rtype: bool """ - if string_network.count('/') == 1: + if string_network.count("/") == 1: try: - mask = int(string_network.split('/')[1]) + mask = int(string_network.split("/")[1]) except ValueError: return False @@ -705,8 +726,8 @@ def is_valid_cidr(string_network): return False try: - socket.inet_aton(string_network.split('/')[0]) - except socket.error: + socket.inet_aton(string_network.split("/")[0]) + except OSError: return False else: return False @@ -743,13 +764,14 @@ def should_bypass_proxies(url, no_proxy): """ # Prioritize lowercase environment variables over uppercase # to keep a consistent behaviour with other http projects (curl, wget). - get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) + def get_proxy(key): + return os.environ.get(key) or os.environ.get(key.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy_arg = no_proxy if no_proxy is None: - no_proxy = get_proxy('no_proxy') + no_proxy = get_proxy("no_proxy") parsed = urlparse(url) if parsed.hostname is None: @@ -759,9 +781,7 @@ def should_bypass_proxies(url, no_proxy): if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the hostname, both with and without the port. - no_proxy = ( - host for host in no_proxy.replace(' ', '').split(',') if host - ) + no_proxy = (host for host in no_proxy.replace(" ", "").split(",") if host) if is_ipv4_address(parsed.hostname): for proxy_ip in no_proxy: @@ -775,7 +795,7 @@ def should_bypass_proxies(url, no_proxy): else: host_with_port = parsed.hostname if parsed.port: - host_with_port += ':{}'.format(parsed.port) + host_with_port += f":{parsed.port}" for host in no_proxy: if parsed.hostname.endswith(host) or host_with_port.endswith(host): @@ -783,7 +803,7 @@ def should_bypass_proxies(url, no_proxy): # to apply the proxies on this URL. return True - with set_environ('no_proxy', no_proxy_arg): + with set_environ("no_proxy", no_proxy_arg): # parsed.hostname can be `None` in cases such as a file URI. try: bypass = proxy_bypass(parsed.hostname) @@ -817,13 +837,13 @@ def select_proxy(url, proxies): proxies = proxies or {} urlparts = urlparse(url) if urlparts.hostname is None: - return proxies.get(urlparts.scheme, proxies.get('all')) + return proxies.get(urlparts.scheme, proxies.get("all")) proxy_keys = [ - urlparts.scheme + '://' + urlparts.hostname, + urlparts.scheme + "://" + urlparts.hostname, urlparts.scheme, - 'all://' + urlparts.hostname, - 'all', + "all://" + urlparts.hostname, + "all", ] proxy = None for proxy_key in proxy_keys: @@ -848,13 +868,13 @@ def resolve_proxies(request, proxies, trust_env=True): proxies = proxies if proxies is not None else {} url = request.url scheme = urlparse(url).scheme - no_proxy = proxies.get('no_proxy') + no_proxy = proxies.get("no_proxy") new_proxies = proxies.copy() if trust_env and not should_bypass_proxies(url, no_proxy=no_proxy): environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) - proxy = environ_proxies.get(scheme, environ_proxies.get('all')) + proxy = environ_proxies.get(scheme, environ_proxies.get("all")) if proxy: new_proxies.setdefault(scheme, proxy) @@ -867,19 +887,21 @@ def default_user_agent(name="python-requests"): :rtype: str """ - return '%s/%s' % (name, __version__) + return f"{name}/{__version__}" def default_headers(): """ :rtype: requests.structures.CaseInsensitiveDict """ - return CaseInsensitiveDict({ - 'User-Agent': default_user_agent(), - 'Accept-Encoding': DEFAULT_ACCEPT_ENCODING, - 'Accept': '*/*', - 'Connection': 'keep-alive', - }) + return CaseInsensitiveDict( + { + "User-Agent": default_user_agent(), + "Accept-Encoding": DEFAULT_ACCEPT_ENCODING, + "Accept": "*/*", + "Connection": "keep-alive", + } + ) def parse_header_links(value): @@ -892,23 +914,23 @@ def parse_header_links(value): links = [] - replace_chars = ' \'"' + replace_chars = " '\"" value = value.strip(replace_chars) if not value: return links - for val in re.split(', *<', value): + for val in re.split(", *<", value): try: - url, params = val.split(';', 1) + url, params = val.split(";", 1) except ValueError: - url, params = val, '' + url, params = val, "" - link = {'url': url.strip('<> \'"')} + link = {"url": url.strip("<> '\"")} - for param in params.split(';'): + for param in params.split(";"): try: - key, value = param.split('=') + key, value = param.split("=") except ValueError: break @@ -920,7 +942,7 @@ def parse_header_links(value): # Null bytes; no need to recreate these on each call to guess_json_utf -_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 +_null = "\x00".encode("ascii") # encoding to ASCII for Python 3 _null2 = _null * 2 _null3 = _null * 3 @@ -934,25 +956,25 @@ def guess_json_utf(data): # determine the encoding. Also detect a BOM, if present. sample = data[:4] if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): - return 'utf-32' # BOM included + return "utf-32" # BOM included if sample[:3] == codecs.BOM_UTF8: - return 'utf-8-sig' # BOM included, MS style (discouraged) + return "utf-8-sig" # BOM included, MS style (discouraged) if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): - return 'utf-16' # BOM included + return "utf-16" # BOM included nullcount = sample.count(_null) if nullcount == 0: - return 'utf-8' + return "utf-8" if nullcount == 2: - if sample[::2] == _null2: # 1st and 3rd are null - return 'utf-16-be' + if sample[::2] == _null2: # 1st and 3rd are null + return "utf-16-be" if sample[1::2] == _null2: # 2nd and 4th are null - return 'utf-16-le' + return "utf-16-le" # Did not detect 2 valid UTF-16 ascii-range characters if nullcount == 3: if sample[:3] == _null3: - return 'utf-32-be' + return "utf-32-be" if sample[1:] == _null3: - return 'utf-32-le' + return "utf-32-le" # Did not detect a valid UTF-32 ascii-range character return None @@ -977,13 +999,13 @@ def prepend_scheme_if_needed(url, new_scheme): if auth: # parse_url doesn't provide the netloc with auth # so we'll add it ourselves. - netloc = '@'.join([auth, netloc]) + netloc = "@".join([auth, netloc]) if scheme is None: scheme = new_scheme if path is None: - path = '' + path = "" - return urlunparse((scheme, netloc, path, '', query, fragment)) + return urlunparse((scheme, netloc, path, "", query, fragment)) def get_auth_from_url(url): @@ -997,35 +1019,36 @@ def get_auth_from_url(url): try: auth = (unquote(parsed.username), unquote(parsed.password)) except (AttributeError, TypeError): - auth = ('', '') + auth = ("", "") return auth -# Moved outside of function to avoid recompile every call -_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') -_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') - - def check_header_validity(header): - """Verifies that header value is a string which doesn't contain - leading whitespace or return characters. This prevents unintended - header injection. + """Verifies that header parts don't contain leading whitespace + reserved characters, or return characters. :param header: tuple, in the format (name, value). """ name, value = header - if isinstance(value, bytes): - pat = _CLEAN_HEADER_REGEX_BYTE - else: - pat = _CLEAN_HEADER_REGEX_STR - try: - if not pat.match(value): - raise InvalidHeader("Invalid return character or leading space in header: %s" % name) - except TypeError: - raise InvalidHeader("Value for header {%s: %s} must be of type str or " - "bytes, not %s" % (name, value, type(value))) + for part in header: + if type(part) not in HEADER_VALIDATORS: + raise InvalidHeader( + f"Header part ({part!r}) from {{{name!r}: {value!r}}} must be " + f"of type str or bytes, not {type(part)}" + ) + + _validate_header_part(name, "name", HEADER_VALIDATORS[type(name)][0]) + _validate_header_part(value, "value", HEADER_VALIDATORS[type(value)][1]) + + +def _validate_header_part(header_part, header_kind, validator): + if not validator.match(header_part): + raise InvalidHeader( + f"Invalid leading whitespace, reserved character(s), or return" + f"character(s) in header {header_kind}: {header_part!r}" + ) def urldefragauth(url): @@ -1040,21 +1063,24 @@ def urldefragauth(url): if not netloc: netloc, path = path, netloc - netloc = netloc.rsplit('@', 1)[-1] + netloc = netloc.rsplit("@", 1)[-1] - return urlunparse((scheme, netloc, path, params, query, '')) + return urlunparse((scheme, netloc, path, params, query, "")) def rewind_body(prepared_request): """Move file pointer back to its recorded starting position so it can be read again on redirect. """ - body_seek = getattr(prepared_request.body, 'seek', None) - if body_seek is not None and isinstance(prepared_request._body_position, integer_types): + body_seek = getattr(prepared_request.body, "seek", None) + if body_seek is not None and isinstance( + prepared_request._body_position, integer_types + ): try: body_seek(prepared_request._body_position) - except (IOError, OSError): - raise UnrewindableBodyError("An error occurred when rewinding request " - "body for redirect.") + except OSError: + raise UnrewindableBodyError( + "An error occurred when rewinding request body for redirect." + ) else: raise UnrewindableBodyError("Unable to rewind request body for redirect.") diff --git a/lib/urllib3/__init__.py b/lib/urllib3/__init__.py index fe86b59d..c6fa3821 100644 --- a/lib/urllib3/__init__.py +++ b/lib/urllib3/__init__.py @@ -19,6 +19,23 @@ from .util.retry import Retry from .util.timeout import Timeout from .util.url import get_host +# === NOTE TO REPACKAGERS AND VENDORS === +# Please delete this block, this logic is only +# for urllib3 being distributed via PyPI. +# See: https://github.com/urllib3/urllib3/issues/2680 +try: + import urllib3_secure_extra # type: ignore # noqa: F401 +except ImportError: + pass +else: + warnings.warn( + "'urllib3[secure]' extra is deprecated and will be removed " + "in a future release of urllib3 2.x. Read more in this issue: " + "https://github.com/urllib3/urllib3/issues/2680", + category=DeprecationWarning, + stacklevel=2, + ) + __author__ = "Andrey Petrov (andrey.petrov@shazow.net)" __license__ = "MIT" __version__ = __version__ diff --git a/lib/urllib3/_version.py b/lib/urllib3/_version.py index d905b697..6fbc84b3 100644 --- a/lib/urllib3/_version.py +++ b/lib/urllib3/_version.py @@ -1,2 +1,2 @@ # This file is protected via CODEOWNERS -__version__ = "1.26.9" +__version__ = "1.26.12" diff --git a/lib/urllib3/connection.py b/lib/urllib3/connection.py index 7bf395bd..10fb36c4 100644 --- a/lib/urllib3/connection.py +++ b/lib/urllib3/connection.py @@ -68,7 +68,7 @@ port_by_scheme = {"http": 80, "https": 443} # When it comes time to update this value as a part of regular maintenance # (ie test_recent_date is failing) update it to ~6 months before the current date. -RECENT_DATE = datetime.date(2020, 7, 1) +RECENT_DATE = datetime.date(2022, 1, 1) _CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") diff --git a/lib/urllib3/connectionpool.py b/lib/urllib3/connectionpool.py index 15bffcb2..96339e90 100644 --- a/lib/urllib3/connectionpool.py +++ b/lib/urllib3/connectionpool.py @@ -767,6 +767,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): isinstance(e, BaseSSLError) and self.proxy and _is_ssl_error_message_from_http_proxy(e) + and conn.proxy + and conn.proxy.scheme == "https" ): e = ProxyError( "Your proxy appears to only use HTTP and not HTTPS, " diff --git a/lib/urllib3/contrib/pyopenssl.py b/lib/urllib3/contrib/pyopenssl.py index def83afd..50a07d59 100644 --- a/lib/urllib3/contrib/pyopenssl.py +++ b/lib/urllib3/contrib/pyopenssl.py @@ -73,11 +73,20 @@ except ImportError: # Platform-specific: Python 3 import logging import ssl import sys +import warnings from .. import util from ..packages import six from ..util.ssl_ import PROTOCOL_TLS_CLIENT +warnings.warn( + "'urllib3.contrib.pyopenssl' module is deprecated and will be removed " + "in a future release of urllib3 2.x. Read more in this issue: " + "https://github.com/urllib3/urllib3/issues/2680", + category=DeprecationWarning, + stacklevel=2, +) + __all__ = ["inject_into_urllib3", "extract_from_urllib3"] # SNI always works. @@ -406,7 +415,6 @@ if _fileobject: # Platform-specific: Python 2 self._makefile_refs += 1 return _fileobject(self, mode, bufsize, close=True) - else: # Platform-specific: Python 3 makefile = backport_makefile diff --git a/lib/urllib3/contrib/securetransport.py b/lib/urllib3/contrib/securetransport.py index 554c015f..6c46a3b9 100644 --- a/lib/urllib3/contrib/securetransport.py +++ b/lib/urllib3/contrib/securetransport.py @@ -770,7 +770,6 @@ if _fileobject: # Platform-specific: Python 2 self._makefile_refs += 1 return _fileobject(self, mode, bufsize, close=True) - else: # Platform-specific: Python 3 def makefile(self, mode="r", buffering=None, *args, **kwargs): diff --git a/lib/urllib3/packages/six.py b/lib/urllib3/packages/six.py index ba50acb0..f099a3dc 100644 --- a/lib/urllib3/packages/six.py +++ b/lib/urllib3/packages/six.py @@ -772,7 +772,6 @@ if PY3: value = None tb = None - else: def exec_(_code_, _globs_=None, _locs_=None): diff --git a/lib/urllib3/response.py b/lib/urllib3/response.py index fdb50ddb..01f08eee 100644 --- a/lib/urllib3/response.py +++ b/lib/urllib3/response.py @@ -2,6 +2,7 @@ from __future__ import absolute_import import io import logging +import sys import zlib from contextlib import contextmanager from socket import error as SocketError @@ -15,6 +16,7 @@ try: except ImportError: brotli = None +from . import util from ._collections import HTTPHeaderDict from .connection import BaseSSLError, HTTPException from .exceptions import ( @@ -481,6 +483,54 @@ class HTTPResponse(io.IOBase): if self._original_response and self._original_response.isclosed(): self.release_conn() + def _fp_read(self, amt): + """ + Read a response with the thought that reading the number of bytes + larger than can fit in a 32-bit int at a time via SSL in some + known cases leads to an overflow error that has to be prevented + if `amt` or `self.length_remaining` indicate that a problem may + happen. + + The known cases: + * 3.8 <= CPython < 3.9.7 because of a bug + https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. + * urllib3 injected with pyOpenSSL-backed SSL-support. + * CPython < 3.10 only when `amt` does not fit 32-bit int. + """ + assert self._fp + c_int_max = 2 ** 31 - 1 + if ( + ( + (amt and amt > c_int_max) + or (self.length_remaining and self.length_remaining > c_int_max) + ) + and not util.IS_SECURETRANSPORT + and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) + ): + buffer = io.BytesIO() + # Besides `max_chunk_amt` being a maximum chunk size, it + # affects memory overhead of reading a response by this + # method in CPython. + # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum + # chunk size that does not lead to an overflow error, but + # 256 MiB is a compromise. + max_chunk_amt = 2 ** 28 + while amt is None or amt != 0: + if amt is not None: + chunk_amt = min(amt, max_chunk_amt) + amt -= chunk_amt + else: + chunk_amt = max_chunk_amt + data = self._fp.read(chunk_amt) + if not data: + break + buffer.write(data) + del data # to reduce peak memory usage by `max_chunk_amt`. + return buffer.getvalue() + else: + # StringIO doesn't like amt=None + return self._fp.read(amt) if amt is not None else self._fp.read() + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`http.client.HTTPResponse.read`, but with two additional @@ -513,13 +563,11 @@ class HTTPResponse(io.IOBase): fp_closed = getattr(self._fp, "closed", False) with self._error_catcher(): + data = self._fp_read(amt) if not fp_closed else b"" if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() if not fp_closed else b"" flush_decoder = True else: cache_content = False - data = self._fp.read(amt) if not fp_closed else b"" if ( amt != 0 and not data ): # Platform-specific: Buggy versions of Python. diff --git a/lib/urllib3/util/url.py b/lib/urllib3/util/url.py index 81a03da9..b667c160 100644 --- a/lib/urllib3/util/url.py +++ b/lib/urllib3/util/url.py @@ -279,6 +279,9 @@ def _normalize_host(host, scheme): if scheme in NORMALIZABLE_SCHEMES: is_ipv6 = IPV6_ADDRZ_RE.match(host) if is_ipv6: + # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as + # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID + # separator as necessary to return a valid RFC 4007 scoped IP. match = ZONE_ID_RE.search(host) if match: start, end = match.span(1) @@ -331,7 +334,7 @@ def parse_url(url): """ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is performed to parse incomplete urls. Fields not provided will be None. - This parser is RFC 3986 compliant. + This parser is RFC 3986 and RFC 6874 compliant. The parser logic and helper functions are based heavily on work done in the ``rfc3986`` module. diff --git a/lib/urllib3/util/wait.py b/lib/urllib3/util/wait.py index c280646c..21b4590b 100644 --- a/lib/urllib3/util/wait.py +++ b/lib/urllib3/util/wait.py @@ -42,7 +42,6 @@ if sys.version_info >= (3, 5): def _retry_on_intr(fn, timeout): return fn(timeout) - else: # Old and broken Pythons. def _retry_on_intr(fn, timeout): diff --git a/requirements.txt b/requirements.txt index 10edc91e..4d6f3505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,7 +35,7 @@ pyparsing==3.0.9 python-dateutil==2.8.2 python-twitter==3.5 pytz==2022.1 -requests==2.27.1 +requests==2.28.1 requests-oauthlib==1.3.1 rumps==0.3.0; platform_system == "Darwin" simplejson==3.17.6