Bump requests from 2.28.2 to 2.31.0 (#2078)

* Bump requests from 2.28.2 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update requests==2.31.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-07 05:31:15 -07:00 · 2023-08-23 21:40:02 -07:00 · 2023-08-23 21:40:02 -07:00 · 6b6d43ef43
commit 6b6d43ef43
parent 478d9e6aa5
54 changed files with 4861 additions and 4958 deletions
--- a/lib/urllib3/util/url.py
+++ b/lib/urllib3/util/url.py
@ -1,22 +1,20 @@
-from __future__ import absolute_import
+from __future__ import annotations

 import re
-from collections import namedtuple
+import typing

 from ..exceptions import LocationParseError
-from ..packages import six
-
-url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"]
+from .util import to_str

 # We only want to normalize urls with an HTTP(S) scheme.
 # urllib3 infers URLs without a scheme (None) to be http.
-NORMALIZABLE_SCHEMES = ("http", "https", None)
+_NORMALIZABLE_SCHEMES = ("http", "https", None)

 # Almost all of these patterns were derived from the
 # 'rfc3986' module: https://github.com/python-hyper/rfc3986
-PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
-SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
-URI_RE = re.compile(
+_PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
+_SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
+_URI_RE = re.compile(
    r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?"
    r"(?://([^\\/?#]*))?"
    r"([^?#]*)"
@ -25,10 +23,10 @@ URI_RE = re.compile(
    re.UNICODE | re.DOTALL,
 )

-IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
-HEX_PAT = "[0-9A-Fa-f]{1,4}"
-LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT)
-_subs = {"hex": HEX_PAT, "ls32": LS32_PAT}
+_IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
+_HEX_PAT = "[0-9A-Fa-f]{1,4}"
+_LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT)
+_subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT}
 _variations = [
    #                            6( h16 ":" ) ls32
    "(?:%(hex)s:){6}%(ls32)s",
@ -50,69 +48,78 @@ _variations = [
    "(?:(?:%(hex)s:){0,6}%(hex)s)?::",
 ]

-UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
-IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
-ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
-IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]"
-REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
-TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
+_UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
+_IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
+_ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
+_IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]"
+_REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
+_TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")

-IPV4_RE = re.compile("^" + IPV4_PAT + "$")
-IPV6_RE = re.compile("^" + IPV6_PAT + "$")
-IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
-BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
-ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
+_IPV4_RE = re.compile("^" + _IPV4_PAT + "$")
+_IPV6_RE = re.compile("^" + _IPV6_PAT + "$")
+_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$")
+_BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$")
+_ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$")

 _HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
-    REG_NAME_PAT,
-    IPV4_PAT,
-    IPV6_ADDRZ_PAT,
+    _REG_NAME_PAT,
+    _IPV4_PAT,
+    _IPV6_ADDRZ_PAT,
 )
 _HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)

-UNRESERVED_CHARS = set(
+_UNRESERVED_CHARS = set(
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
 )
-SUB_DELIM_CHARS = set("!$&'()*+,;=")
-USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"}
-PATH_CHARS = USERINFO_CHARS | {"@", "/"}
-QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"}
+_SUB_DELIM_CHARS = set("!$&'()*+,;=")
+_USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"}
+_PATH_CHARS = _USERINFO_CHARS | {"@", "/"}
+_QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"}


-class Url(namedtuple("Url", url_attrs)):
+class Url(
+    typing.NamedTuple(
+        "Url",
+        [
+            ("scheme", typing.Optional[str]),
+            ("auth", typing.Optional[str]),
+            ("host", typing.Optional[str]),
+            ("port", typing.Optional[int]),
+            ("path", typing.Optional[str]),
+            ("query", typing.Optional[str]),
+            ("fragment", typing.Optional[str]),
+        ],
+    )
+):
    """
    Data structure for representing an HTTP URL. Used as a return value for
    :func:`parse_url`. Both the scheme and host are normalized as they are
    both case-insensitive according to RFC 3986.
    """

-    __slots__ = ()
-
-    def __new__(
+    def __new__(  # type: ignore[no-untyped-def]
        cls,
-        scheme=None,
-        auth=None,
-        host=None,
-        port=None,
-        path=None,
-        query=None,
-        fragment=None,
+        scheme: str | None = None,
+        auth: str | None = None,
+        host: str | None = None,
+        port: int | None = None,
+        path: str | None = None,
+        query: str | None = None,
+        fragment: str | None = None,
    ):
        if path and not path.startswith("/"):
            path = "/" + path
        if scheme is not None:
            scheme = scheme.lower()
-        return super(Url, cls).__new__(
-            cls, scheme, auth, host, port, path, query, fragment
-        )
+        return super().__new__(cls, scheme, auth, host, port, path, query, fragment)

    @property
-    def hostname(self):
+    def hostname(self) -> str | None:
        """For backwards-compatibility with urlparse. We're nice like that."""
        return self.host

    @property
-    def request_uri(self):
+    def request_uri(self) -> str:
        """Absolute path including the query string."""
        uri = self.path or "/"

@ -122,14 +129,37 @@ class Url(namedtuple("Url", url_attrs)):
        return uri

    @property
-    def netloc(self):
-        """Network location including host and port"""
+    def authority(self) -> str | None:
+        """
+        Authority component as defined in RFC 3986 3.2.
+        This includes userinfo (auth), host and port.
+
+        i.e.
+            userinfo@host:port
+        """
+        userinfo = self.auth
+        netloc = self.netloc
+        if netloc is None or userinfo is None:
+            return netloc
+        else:
+            return f"{userinfo}@{netloc}"
+
+    @property
+    def netloc(self) -> str | None:
+        """
+        Network location including host and port.
+
+        If you need the equivalent of urllib.parse's ``netloc``,
+        use the ``authority`` property instead.
+        """
+        if self.host is None:
+            return None
        if self.port:
-            return "%s:%d" % (self.host, self.port)
+            return f"{self.host}:{self.port}"
        return self.host

    @property
-    def url(self):
+    def url(self) -> str:
        """
        Convert self into a url

@ -138,88 +168,77 @@ class Url(namedtuple("Url", url_attrs)):
        :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
        with a blank port will have : removed).

-        Example: ::
+        Example:

-            >>> U = parse_url('http://google.com/mail/')
-            >>> U.url
-            'http://google.com/mail/'
-            >>> Url('http', 'username:password', 'host.com', 80,
-            ... '/path', 'query', 'fragment').url
-            'http://username:password@host.com:80/path?query#fragment'
+        .. code-block:: python
+
+            import urllib3
+
+            U = urllib3.util.parse_url("https://google.com/mail/")
+
+            print(U.url)
+            # "https://google.com/mail/"
+
+            print( urllib3.util.Url("https", "username:password",
+                                    "host.com", 80, "/path", "query", "fragment"
+                                    ).url
+                )
+            # "https://username:password@host.com:80/path?query#fragment"
        """
        scheme, auth, host, port, path, query, fragment = self
-        url = u""
+        url = ""

        # We use "is not None" we want things to happen with empty strings (or 0 port)
        if scheme is not None:
-            url += scheme + u"://"
+            url += scheme + "://"
        if auth is not None:
-            url += auth + u"@"
+            url += auth + "@"
        if host is not None:
            url += host
        if port is not None:
-            url += u":" + str(port)
+            url += ":" + str(port)
        if path is not None:
            url += path
        if query is not None:
-            url += u"?" + query
+            url += "?" + query
        if fragment is not None:
-            url += u"#" + fragment
+            url += "#" + fragment

        return url

-    def __str__(self):
+    def __str__(self) -> str:
        return self.url


-def split_first(s, delims):
-    """
-    .. deprecated:: 1.25
-
-    Given a string and an iterable of delimiters, split on the first found
-    delimiter. Return two split parts and the matched delimiter.
-
-    If not found, then the first part is the full input string.
-
-    Example::
-
-        >>> split_first('foo/bar?baz', '?/=')
-        ('foo', 'bar?baz', '/')
-        >>> split_first('foo/bar?baz', '123')
-        ('foo/bar?baz', '', None)
-
-    Scales linearly with number of delims. Not ideal for large number of delims.
-    """
-    min_idx = None
-    min_delim = None
-    for d in delims:
-        idx = s.find(d)
-        if idx < 0:
-            continue
-
-        if min_idx is None or idx < min_idx:
-            min_idx = idx
-            min_delim = d
-
-    if min_idx is None or min_idx < 0:
-        return s, "", None
-
-    return s[:min_idx], s[min_idx + 1 :], min_delim
+@typing.overload
+def _encode_invalid_chars(
+    component: str, allowed_chars: typing.Container[str]
+) -> str:  # Abstract
+    ...


-def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
+@typing.overload
+def _encode_invalid_chars(
+    component: None, allowed_chars: typing.Container[str]
+) -> None:  # Abstract
+    ...
+
+
+def _encode_invalid_chars(
+    component: str | None, allowed_chars: typing.Container[str]
+) -> str | None:
    """Percent-encodes a URI component without reapplying
    onto an already percent-encoded component.
    """
    if component is None:
        return component

-    component = six.ensure_text(component)
+    component = to_str(component)

    # Normalize existing percent-encoded bytes.
    # Try to see if the component we're encoding is already percent-encoded
    # so we can skip all '%' characters but still encode all others.
-    component, percent_encodings = PERCENT_RE.subn(
+    component, percent_encodings = _PERCENT_RE.subn(
        lambda match: match.group(0).upper(), component
    )

@ -228,7 +247,7 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
    encoded_component = bytearray()

    for i in range(0, len(uri_bytes)):
-        # Will return a single character bytestring on both Python 2 & 3
+        # Will return a single character bytestring
        byte = uri_bytes[i : i + 1]
        byte_ord = ord(byte)
        if (is_percent_encoded and byte == b"%") or (
@ -238,10 +257,10 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
            continue
        encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))

-    return encoded_component.decode(encoding)
+    return encoded_component.decode()


-def _remove_path_dot_segments(path):
+def _remove_path_dot_segments(path: str) -> str:
    # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
    segments = path.split("/")  # Turn the path into a list of segments
    output = []  # Initialize the variable to use to store output
@ -251,7 +270,7 @@ def _remove_path_dot_segments(path):
        if segment == ".":
            continue
        # Anything other than '..', should be appended to the output
-        elif segment != "..":
+        if segment != "..":
            output.append(segment)
        # In this case segment == '..', if we can, we should pop the last
        # element
@ -271,18 +290,25 @@ def _remove_path_dot_segments(path):
    return "/".join(output)


-def _normalize_host(host, scheme):
-    if host:
-        if isinstance(host, six.binary_type):
-            host = six.ensure_str(host)
+@typing.overload
+def _normalize_host(host: None, scheme: str | None) -> None:
+    ...

-        if scheme in NORMALIZABLE_SCHEMES:
-            is_ipv6 = IPV6_ADDRZ_RE.match(host)
+
+@typing.overload
+def _normalize_host(host: str, scheme: str | None) -> str:
+    ...
+
+
+def _normalize_host(host: str | None, scheme: str | None) -> str | None:
+    if host:
+        if scheme in _NORMALIZABLE_SCHEMES:
+            is_ipv6 = _IPV6_ADDRZ_RE.match(host)
            if is_ipv6:
                # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
                # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
                # separator as necessary to return a valid RFC 4007 scoped IP.
-                match = ZONE_ID_RE.search(host)
+                match = _ZONE_ID_RE.search(host)
                if match:
                    start, end = match.span(1)
                    zone_id = host[start:end]
@ -291,46 +317,56 @@ def _normalize_host(host, scheme):
                        zone_id = zone_id[3:]
                    else:
                        zone_id = zone_id[1:]
-                    zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS)
-                    return host[:start].lower() + zone_id + host[end:]
+                    zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS)
+                    return f"{host[:start].lower()}%{zone_id}{host[end:]}"
                else:
                    return host.lower()
-            elif not IPV4_RE.match(host):
-                return six.ensure_str(
-                    b".".join([_idna_encode(label) for label in host.split(".")])
+            elif not _IPV4_RE.match(host):
+                return to_str(
+                    b".".join([_idna_encode(label) for label in host.split(".")]),
+                    "ascii",
                )
    return host


-def _idna_encode(name):
-    if name and any(ord(x) >= 128 for x in name):
+def _idna_encode(name: str) -> bytes:
+    if not name.isascii():
        try:
            import idna
        except ImportError:
-            six.raise_from(
-                LocationParseError("Unable to parse URL without the 'idna' module"),
-                None,
-            )
+            raise LocationParseError(
+                "Unable to parse URL without the 'idna' module"
+            ) from None
+
        try:
            return idna.encode(name.lower(), strict=True, std3_rules=True)
        except idna.IDNAError:
-            six.raise_from(
-                LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None
-            )
+            raise LocationParseError(
+                f"Name '{name}' is not a valid IDNA label"
+            ) from None
+
    return name.lower().encode("ascii")


-def _encode_target(target):
-    """Percent-encodes a request target so that there are no invalid characters"""
-    path, query = TARGET_RE.match(target).groups()
-    target = _encode_invalid_chars(path, PATH_CHARS)
-    query = _encode_invalid_chars(query, QUERY_CHARS)
+def _encode_target(target: str) -> str:
+    """Percent-encodes a request target so that there are no invalid characters
+
+    Pre-condition for this function is that 'target' must start with '/'.
+    If that is the case then _TARGET_RE will always produce a match.
+    """
+    match = _TARGET_RE.match(target)
+    if not match:  # Defensive:
+        raise LocationParseError(f"{target!r} is not a valid request URI")
+
+    path, query = match.groups()
+    encoded_target = _encode_invalid_chars(path, _PATH_CHARS)
    if query is not None:
-        target += "?" + query
-    return target
+        query = _encode_invalid_chars(query, _QUERY_CHARS)
+        encoded_target += "?" + query
+    return encoded_target


-def parse_url(url):
+def parse_url(url: str) -> Url:
    """
    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
    performed to parse incomplete urls. Fields not provided will be None.
@ -341,28 +377,44 @@ def parse_url(url):

    :param str url: URL to parse into a :class:`.Url` namedtuple.

-    Partly backwards-compatible with :mod:`urlparse`.
+    Partly backwards-compatible with :mod:`urllib.parse`.

-    Example::
+    Example:

-        >>> parse_url('http://google.com/mail/')
-        Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
-        >>> parse_url('google.com:80')
-        Url(scheme=None, host='google.com', port=80, path=None, ...)
-        >>> parse_url('/foo?bar')
-        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+    .. code-block:: python
+
+        import urllib3
+
+        print( urllib3.util.parse_url('http://google.com/mail/'))
+        # Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+
+        print( urllib3.util.parse_url('google.com:80'))
+        # Url(scheme=None, host='google.com', port=80, path=None, ...)
+
+        print( urllib3.util.parse_url('/foo?bar'))
+        # Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
    """
    if not url:
        # Empty
        return Url()

    source_url = url
-    if not SCHEME_RE.search(url):
+    if not _SCHEME_RE.search(url):
        url = "//" + url

+    scheme: str | None
+    authority: str | None
+    auth: str | None
+    host: str | None
+    port: str | None
+    port_int: int | None
+    path: str | None
+    query: str | None
+    fragment: str | None
+
    try:
-        scheme, authority, path, query, fragment = URI_RE.match(url).groups()
-        normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES
+        scheme, authority, path, query, fragment = _URI_RE.match(url).groups()  # type: ignore[union-attr]
+        normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES

        if scheme:
            scheme = scheme.lower()
@ -370,31 +422,33 @@ def parse_url(url):
        if authority:
            auth, _, host_port = authority.rpartition("@")
            auth = auth or None
-            host, port = _HOST_PORT_RE.match(host_port).groups()
+            host, port = _HOST_PORT_RE.match(host_port).groups()  # type: ignore[union-attr]
            if auth and normalize_uri:
-                auth = _encode_invalid_chars(auth, USERINFO_CHARS)
+                auth = _encode_invalid_chars(auth, _USERINFO_CHARS)
            if port == "":
                port = None
        else:
            auth, host, port = None, None, None

        if port is not None:
-            port = int(port)
-            if not (0 <= port <= 65535):
+            port_int = int(port)
+            if not (0 <= port_int <= 65535):
                raise LocationParseError(url)
+        else:
+            port_int = None

        host = _normalize_host(host, scheme)

        if normalize_uri and path:
            path = _remove_path_dot_segments(path)
-            path = _encode_invalid_chars(path, PATH_CHARS)
+            path = _encode_invalid_chars(path, _PATH_CHARS)
        if normalize_uri and query:
-            query = _encode_invalid_chars(query, QUERY_CHARS)
+            query = _encode_invalid_chars(query, _QUERY_CHARS)
        if normalize_uri and fragment:
-            fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS)
+            fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS)

-    except (ValueError, AttributeError):
-        return six.raise_from(LocationParseError(source_url), None)
+    except (ValueError, AttributeError) as e:
+        raise LocationParseError(source_url) from e

    # For the sake of backwards compatibility we put empty
    # string values for path if there are any defined values
@ -406,30 +460,12 @@ def parse_url(url):
        else:
            path = None

-    # Ensure that each part of the URL is a `str` for
-    # backwards compatibility.
-    if isinstance(url, six.text_type):
-        ensure_func = six.ensure_text
-    else:
-        ensure_func = six.ensure_str
-
-    def ensure_type(x):
-        return x if x is None else ensure_func(x)
-
    return Url(
-        scheme=ensure_type(scheme),
-        auth=ensure_type(auth),
-        host=ensure_type(host),
-        port=port,
-        path=ensure_type(path),
-        query=ensure_type(query),
-        fragment=ensure_type(fragment),
+        scheme=scheme,
+        auth=auth,
+        host=host,
+        port=port_int,
+        path=path,
+        query=query,
+        fragment=fragment,
    )
-
-
-def get_host(url):
-    """
-    Deprecated. Use :func:`parse_url` instead.
-    """
-    p = parse_url(url)
-    return p.scheme or "http", p.hostname, p.port