From bb5ebe0fa51093cb5630763b662c5262ab274e54 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jan 2022 13:20:40 -0800 Subject: [PATCH] Bump requests from 2.26.0 to 2.27.0 (#1602) * Bump requests from 2.26.0 to 2.27.0 Bumps [requests](https://github.com/psf/requests) from 2.26.0 to 2.27.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.26.0...v2.27.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update requests==2.27.0 Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> --- lib/charset_normalizer/api.py | 86 ++++++++++++++++++------------- lib/charset_normalizer/version.py | 2 +- lib/requests/__init__.py | 2 +- lib/requests/__version__.py | 6 +-- lib/requests/adapters.py | 9 +++- lib/requests/compat.py | 8 ++- lib/requests/exceptions.py | 10 +++- lib/requests/models.py | 33 +++++++----- lib/requests/sessions.py | 24 +++------ lib/requests/utils.py | 57 +++++++++++++++++--- requirements.txt | 2 +- 11 files changed, 156 insertions(+), 83 deletions(-) diff --git a/lib/charset_normalizer/api.py b/lib/charset_normalizer/api.py index 80e608b4..b3e198a7 100644 --- a/lib/charset_normalizer/api.py +++ b/lib/charset_normalizer/api.py @@ -68,20 +68,21 @@ def from_bytes( ) if explain: + previous_logger_level = logger.level # type: int logger.addHandler(explain_handler) + logger.setLevel(logging.DEBUG) length = len(sequences) # type: int if length == 0: - logger.warning( - "Given content is empty, stopping the process very early, returning empty utf_8 str match" - ) + logger.warning("Encoding detection on empty bytes, assuming utf_8 intention.") if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level or logging.WARNING) return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")]) if cp_isolation is not None: - logger.warning( + logger.debug( "cp_isolation is set. use this flag for debugging purpose. " "limited list of encoding allowed : %s.", ", ".join(cp_isolation), @@ -91,7 +92,7 @@ def from_bytes( cp_isolation = [] if cp_exclusion is not None: - logger.warning( + logger.debug( "cp_exclusion is set. use this flag for debugging purpose. " "limited list of encoding excluded : %s.", ", ".join(cp_exclusion), @@ -101,7 +102,7 @@ def from_bytes( cp_exclusion = [] if length <= (chunk_size * steps): - logger.warning( + logger.debug( "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.", steps, chunk_size, @@ -187,7 +188,7 @@ def from_bytes( ) # type: bool if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: - logger.info( + logger.debug( "Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.", encoding_iana, ) @@ -218,7 +219,7 @@ def from_bytes( ) except (UnicodeDecodeError, LookupError) as e: if not isinstance(e, LookupError): - logger.warning( + logger.debug( "Code page %s does not fit given bytes sequence at ALL. %s", encoding_iana, str(e), @@ -234,7 +235,7 @@ def from_bytes( break if similar_soft_failure_test: - logger.warning( + logger.debug( "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!", encoding_iana, encoding_soft_failed, @@ -254,7 +255,7 @@ def from_bytes( ) # type: bool if multi_byte_bonus: - logger.info( + logger.debug( "Code page %s is a multi byte encoding table and it appear that at least one character " "was encoded using n-bytes.", encoding_iana, @@ -264,6 +265,7 @@ def from_bytes( max_chunk_gave_up = max(max_chunk_gave_up, 2) early_stop_count = 0 # type: int + lazy_str_hard_failure = False md_chunks = [] # type: List[str] md_ratios = [] @@ -283,12 +285,13 @@ def from_bytes( errors="ignore" if is_multi_byte_decoder else "strict", ) # type: str except UnicodeDecodeError as e: # Lazy str loading may have missed something there - logger.warning( + logger.debug( "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", encoding_iana, str(e), ) early_stop_count = max_chunk_gave_up + lazy_str_hard_failure = True break # multi-byte bad cutting detector and adjustment @@ -324,12 +327,30 @@ def from_bytes( ): break + # We might want to check the sequence again with the whole content + # Only if initial MD tests passes + if ( + not lazy_str_hard_failure + and is_too_large_sequence + and not is_multi_byte_decoder + ): + try: + sequences[int(50e3) :].decode(encoding_iana, errors="strict") + except UnicodeDecodeError as e: + logger.debug( + "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + tested_but_hard_failure.append(encoding_iana) + continue + mean_mess_ratio = ( sum(md_ratios) / len(md_ratios) if md_ratios else 0.0 ) # type: float if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up: tested_but_soft_failure.append(encoding_iana) - logger.warning( + logger.info( "%s was excluded because of initial chaos probing. Gave up %i time(s). " "Computed mean chaos is %f %%.", encoding_iana, @@ -337,7 +358,10 @@ def from_bytes( round(mean_mess_ratio * 100, ndigits=3), ) # Preparing those fallbacks in case we got nothing. - if encoding_iana in ["ascii", "utf_8", specified_encoding]: + if ( + encoding_iana in ["ascii", "utf_8", specified_encoding] + and not lazy_str_hard_failure + ): fallback_entry = CharsetMatch( sequences, encoding_iana, threshold, False, [], decoded_payload ) @@ -361,7 +385,7 @@ def from_bytes( target_languages = mb_encoding_languages(encoding_iana) if target_languages: - logger.info( + logger.debug( "{} should target any language(s) of {}".format( encoding_iana, str(target_languages) ) @@ -369,12 +393,15 @@ def from_bytes( cd_ratios = [] - for chunk in md_chunks: - chunk_languages = coherence_ratio( - chunk, 0.1, ",".join(target_languages) if target_languages else None - ) + # We shall skip the CD when its about ASCII + # Most of the time its not relevant to run "language-detection" on it. + if encoding_iana != "ascii": + for chunk in md_chunks: + chunk_languages = coherence_ratio( + chunk, 0.1, ",".join(target_languages) if target_languages else None + ) - cd_ratios.append(chunk_languages) + cd_ratios.append(chunk_languages) cd_ratios_merged = merge_coherence_ratios(cd_ratios) @@ -385,20 +412,6 @@ def from_bytes( ) ) - # We might want to check the sequence again with the whole content - # Only if initial MD/CD tests passes - if is_too_large_sequence and not is_multi_byte_decoder: - try: - sequences[int(50e3) :].decode(encoding_iana, errors="strict") - except UnicodeDecodeError as e: - logger.warning( - "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s", - encoding_iana, - str(e), - ) - tested_but_hard_failure.append(encoding_iana) - continue - results.append( CharsetMatch( sequences, @@ -419,6 +432,7 @@ def from_bytes( ) if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) return CharsetMatches([results[encoding_iana]]) if encoding_iana == sig_encoding: @@ -428,16 +442,17 @@ def from_bytes( ) if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) return CharsetMatches([results[encoding_iana]]) if len(results) == 0: if fallback_u8 or fallback_ascii or fallback_specified: - logger.warning( + logger.debug( "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback." ) if fallback_specified: - logger.warning( + logger.debug( "%s will be used as a fallback match", fallback_specified.encoding ) results.append(fallback_specified) @@ -458,6 +473,7 @@ def from_bytes( if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) return results diff --git a/lib/charset_normalizer/version.py b/lib/charset_normalizer/version.py index d48da8ab..a8d66597 100644 --- a/lib/charset_normalizer/version.py +++ b/lib/charset_normalizer/version.py @@ -2,5 +2,5 @@ Expose version """ -__version__ = "2.0.8" +__version__ = "2.0.10" VERSION = __version__.split(".") diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index 0ac7713b..53a5b42a 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -139,7 +139,7 @@ from .status_codes import codes from .exceptions import ( RequestException, Timeout, URLRequired, TooManyRedirects, HTTPError, ConnectionError, - FileModeWarning, ConnectTimeout, ReadTimeout + FileModeWarning, ConnectTimeout, ReadTimeout, JSONDecodeError ) # Set default logging handler to avoid "No handler found" warnings. diff --git a/lib/requests/__version__.py b/lib/requests/__version__.py index 0d7cde1d..cd009315 100644 --- a/lib/requests/__version__.py +++ b/lib/requests/__version__.py @@ -5,10 +5,10 @@ __title__ = 'requests' __description__ = 'Python HTTP for Humans.' __url__ = 'https://requests.readthedocs.io' -__version__ = '2.26.0' -__build__ = 0x022600 +__version__ = '2.27.0' +__build__ = 0x022700 __author__ = 'Kenneth Reitz' __author_email__ = 'me@kennethreitz.org' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2020 Kenneth Reitz' +__copyright__ = 'Copyright 2022 Kenneth Reitz' __cake__ = u'\u2728 \U0001f370 \u2728' diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index fa4d9b3c..fe22ff45 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -19,6 +19,7 @@ from urllib3.util.retry import Retry from urllib3.exceptions import ClosedPoolError from urllib3.exceptions import ConnectTimeoutError from urllib3.exceptions import HTTPError as _HTTPError +from urllib3.exceptions import InvalidHeader as _InvalidHeader from urllib3.exceptions import MaxRetryError from urllib3.exceptions import NewConnectionError from urllib3.exceptions import ProxyError as _ProxyError @@ -37,7 +38,7 @@ from .structures import CaseInsensitiveDict from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, ProxyError, RetryError, InvalidSchema, InvalidProxyURL, - InvalidURL) + InvalidURL, InvalidHeader) from .auth import _basic_auth_str try: @@ -457,9 +458,11 @@ class HTTPAdapter(BaseAdapter): low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: + skip_host = 'Host' in request.headers low_conn.putrequest(request.method, url, - skip_accept_encoding=True) + skip_accept_encoding=True, + skip_host=skip_host) for header, value in request.headers.items(): low_conn.putheader(header, value) @@ -527,6 +530,8 @@ class HTTPAdapter(BaseAdapter): raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) + elif isinstance(e, _InvalidHeader): + raise InvalidHeader(e, request=request) else: raise diff --git a/lib/requests/compat.py b/lib/requests/compat.py index 0b14f501..029ae62a 100644 --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -28,8 +28,10 @@ is_py2 = (_ver[0] == 2) #: Python 3.x? is_py3 = (_ver[0] == 3) +has_simplejson = False try: import simplejson as json + has_simplejson = True except ImportError: import json @@ -49,13 +51,13 @@ if is_py2: # Keep OrderedDict for backwards compatibility. from collections import Callable, Mapping, MutableMapping, OrderedDict - builtin_str = str bytes = str str = unicode basestring = basestring numeric_types = (int, long, float) integer_types = (int, long) + JSONDecodeError = ValueError elif is_py3: from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag @@ -66,6 +68,10 @@ elif is_py3: # Keep OrderedDict for backwards compatibility. from collections import OrderedDict from collections.abc import Callable, Mapping, MutableMapping + if has_simplejson: + from simplejson import JSONDecodeError + else: + from json import JSONDecodeError builtin_str = str str = str diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py index c412ec98..79697635 100644 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -8,6 +8,8 @@ This module contains the set of Requests' exceptions. """ from urllib3.exceptions import HTTPError as BaseHTTPError +from .compat import JSONDecodeError as CompatJSONDecodeError + class RequestException(IOError): """There was an ambiguous exception that occurred while handling your @@ -29,6 +31,10 @@ class InvalidJSONError(RequestException): """A JSON error occurred.""" +class JSONDecodeError(InvalidJSONError, CompatJSONDecodeError): + """Couldn't decode the text into json""" + + class HTTPError(RequestException): """An HTTP error occurred.""" @@ -74,11 +80,11 @@ class TooManyRedirects(RequestException): class MissingSchema(RequestException, ValueError): - """The URL schema (e.g. http or https) is missing.""" + """The URL scheme (e.g. http or https) is missing.""" class InvalidSchema(RequestException, ValueError): - """See defaults.py for valid schemas.""" + """The URL scheme provided is either invalid or unsupported.""" class InvalidURL(RequestException, ValueError): diff --git a/lib/requests/models.py b/lib/requests/models.py index aa6fb86e..dfbea854 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -29,7 +29,9 @@ from .auth import HTTPBasicAuth from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar from .exceptions import ( HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, - ContentDecodingError, ConnectionError, StreamConsumedError, InvalidJSONError) + ContentDecodingError, ConnectionError, StreamConsumedError, + InvalidJSONError) +from .exceptions import JSONDecodeError as RequestsJSONDecodeError from ._internal_utils import to_native_string, unicode_is_ascii from .utils import ( guess_filename, get_auth_from_url, requote_uri, @@ -38,7 +40,7 @@ from .utils import ( from .compat import ( Callable, Mapping, cookielib, urlunparse, urlsplit, urlencode, str, bytes, - is_py2, chardet, builtin_str, basestring) + is_py2, chardet, builtin_str, basestring, JSONDecodeError) from .compat import json as complexjson from .status_codes import codes @@ -384,7 +386,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): raise InvalidURL(*e.args) if not scheme: - error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") + error = ("Invalid URL {0!r}: No scheme supplied. Perhaps you meant http://{0}?") error = error.format(to_native_string(url, 'utf8')) raise MissingSchema(error) @@ -401,7 +403,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): host = self._get_idna_encoded_host(host) except UnicodeError: raise InvalidURL('URL has an invalid label.') - elif host.startswith(u'*'): + elif host.startswith((u'*', u'.')): raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location @@ -468,9 +470,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): content_type = 'application/json' try: - body = complexjson.dumps(json, allow_nan=False) + body = complexjson.dumps(json, allow_nan=False) except ValueError as ve: - raise InvalidJSONError(ve, request=self) + raise InvalidJSONError(ve, request=self) if not isinstance(body, bytes): body = body.encode('utf-8') @@ -882,12 +884,8 @@ class Response(object): r"""Returns the json-encoded content of a response, if any. :param \*\*kwargs: Optional arguments that ``json.loads`` takes. - :raises simplejson.JSONDecodeError: If the response body does not - contain valid json and simplejson is installed. - :raises json.JSONDecodeError: If the response body does not contain - valid json and simplejson is not installed on Python 3. - :raises ValueError: If the response body does not contain valid - json and simplejson is not installed on Python 2. + :raises requests.exceptions.JSONDecodeError: If the response body does not + contain valid json. """ if not self.encoding and self.content and len(self.content) > 3: @@ -907,7 +905,16 @@ class Response(object): # and the server didn't bother to tell us what codec *was* # used. pass - return complexjson.loads(self.text, **kwargs) + + try: + return complexjson.loads(self.text, **kwargs) + except JSONDecodeError as e: + # Catch JSON-related errors and raise as requests.JSONDecodeError + # This aliases json.JSONDecodeError and simplejson.JSONDecodeError + if is_py2: # e is a ValueError + raise RequestsJSONDecodeError(e.message) + else: + raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) @property def links(self): diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index ae4bcc8e..3f59cab9 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -29,7 +29,7 @@ from .adapters import HTTPAdapter from .utils import ( requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, - get_auth_from_url, rewind_body + get_auth_from_url, rewind_body, resolve_proxies ) from .status_codes import codes @@ -269,7 +269,6 @@ class SessionRedirectMixin(object): if new_auth is not None: prepared_request.prepare_auth(new_auth) - def rebuild_proxies(self, prepared_request, proxies): """This method re-evaluates the proxy configuration by considering the environment variables. If we are redirected to a URL covered by @@ -282,21 +281,9 @@ class SessionRedirectMixin(object): :rtype: dict """ - proxies = proxies if proxies is not None else {} headers = prepared_request.headers - url = prepared_request.url - scheme = urlparse(url).scheme - new_proxies = proxies.copy() - no_proxy = proxies.get('no_proxy') - - bypass_proxy = should_bypass_proxies(url, no_proxy=no_proxy) - if self.trust_env and not bypass_proxy: - environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) - - proxy = environ_proxies.get(scheme, environ_proxies.get('all')) - - if proxy: - new_proxies.setdefault(scheme, proxy) + scheme = urlparse(prepared_request.url).scheme + new_proxies = resolve_proxies(prepared_request, proxies, self.trust_env) if 'Proxy-Authorization' in headers: del headers['Proxy-Authorization'] @@ -633,7 +620,10 @@ class Session(SessionRedirectMixin): kwargs.setdefault('stream', self.stream) kwargs.setdefault('verify', self.verify) kwargs.setdefault('cert', self.cert) - kwargs.setdefault('proxies', self.rebuild_proxies(request, self.proxies)) + if 'proxies' not in kwargs: + kwargs['proxies'] = resolve_proxies( + request, self.proxies, self.trust_env + ) # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. diff --git a/lib/requests/utils.py b/lib/requests/utils.py index dbb02a0d..1c2ae4e0 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -21,6 +21,7 @@ import warnings import zipfile from collections import OrderedDict from urllib3.util import make_headers +from urllib3.util import parse_url from .__version__ import __version__ from . import certs @@ -124,7 +125,10 @@ def super_len(o): elif hasattr(o, 'fileno'): try: fileno = o.fileno() - except io.UnsupportedOperation: + except (io.UnsupportedOperation, AttributeError): + # AttributeError is a surprising exception, seeing as how we've just checked + # that `hasattr(o, 'fileno')`. It happens for objects obtained via + # `Tarfile.extractfile()`, per issue 5229. pass else: total_length = os.fstat(fileno).st_size @@ -154,7 +158,7 @@ def super_len(o): current_position = total_length else: if hasattr(o, 'seek') and total_length is None: - # StringIO and BytesIO have seek but no useable fileno + # StringIO and BytesIO have seek but no usable fileno try: # seek to end of file o.seek(0, 2) @@ -251,6 +255,10 @@ def extract_zipped_paths(path): archive, member = os.path.split(path) while archive and not os.path.exists(archive): archive, prefix = os.path.split(archive) + if not prefix: + # If we don't check for an empty prefix after the split (in other words, archive remains unchanged after the split), + # we _can_ end up in an infinite loop on a rare corner case affecting a small number of users + break member = '/'.join([prefix, member]) if not zipfile.is_zipfile(archive): @@ -826,6 +834,33 @@ def select_proxy(url, proxies): return proxy +def resolve_proxies(request, proxies, trust_env=True): + """This method takes proxy information from a request and configuration + input to resolve a mapping of target proxies. This will consider settings + such a NO_PROXY to strip proxy configurations. + + :param request: Request or PreparedRequest + :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs + :param trust_env: Boolean declaring whether to trust environment configs + + :rtype: dict + """ + proxies = proxies if proxies is not None else {} + url = request.url + scheme = urlparse(url).scheme + no_proxy = proxies.get('no_proxy') + new_proxies = proxies.copy() + + if trust_env and not should_bypass_proxies(url, no_proxy=no_proxy): + environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) + + proxy = environ_proxies.get(scheme, environ_proxies.get('all')) + + if proxy: + new_proxies.setdefault(scheme, proxy) + return new_proxies + + def default_user_agent(name="python-requests"): """ Return a string representing the default user agent. @@ -928,15 +963,23 @@ def prepend_scheme_if_needed(url, new_scheme): :rtype: str """ - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) + parsed = parse_url(url) + scheme, auth, host, port, path, query, fragment = parsed - # urlparse is a finicky beast, and sometimes decides that there isn't a - # netloc present. Assume that it's being over-cautious, and switch netloc - # and path if urlparse decided there was no netloc. + # A defect in urlparse determines that there isn't a netloc present in some + # urls. We previously assumed parsing was overly cautious, and swapped the + # netloc and path. Due to a lack of tests on the original defect, this is + # maintained with parse_url for backwards compatibility. + netloc = parsed.netloc if not netloc: netloc, path = path, netloc - return urlunparse((scheme, netloc, path, params, query, fragment)) + if scheme is None: + scheme = new_scheme + if path is None: + path = '' + + return urlunparse((scheme, netloc, path, '', query, fragment)) def get_auth_from_url(url): diff --git a/requirements.txt b/requirements.txt index 71486686..7d3d7922 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,7 +35,7 @@ pyparsing==3.0.6 python-dateutil==2.8.2 python-twitter==3.5 pytz==2021.3 -requests==2.26.0 +requests==2.27.0 requests-oauthlib==1.3.0 rumps==0.3.0; platform_system == "Darwin" simplejson==3.17.6