From 08e08293fecb901f058d93178f706f07ae86c673 Mon Sep 17 00:00:00 2001 From: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> Date: Sun, 24 Jan 2021 21:16:27 -0800 Subject: [PATCH] Update requests to 2.25.1 --- lib/requests/__init__.py | 62 +++++++++----- lib/requests/__version__.py | 8 +- lib/requests/adapters.py | 42 +++++---- lib/requests/api.py | 29 ++++--- lib/requests/auth.py | 20 ++++- lib/requests/compat.py | 5 +- lib/requests/cookies.py | 41 +++++---- lib/requests/exceptions.py | 13 +-- lib/requests/help.py | 5 +- lib/requests/hooks.py | 4 +- lib/requests/models.py | 62 ++++++++------ lib/requests/sessions.py | 128 +++++++++++++++++++--------- lib/requests/status_codes.py | 42 +++++++-- lib/requests/structures.py | 10 +-- lib/requests/utils.py | 160 +++++++++++++++++++++++++++-------- 15 files changed, 429 insertions(+), 202 deletions(-) diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index 268e7dcc..f8f94295 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -9,32 +9,32 @@ Requests HTTP Library ~~~~~~~~~~~~~~~~~~~~~ -Requests is an HTTP library, written in Python, for human beings. Basic GET -usage: +Requests is an HTTP library, written in Python, for human beings. +Basic GET usage: >>> import requests >>> r = requests.get('https://www.python.org') >>> r.status_code 200 - >>> 'Python is a programming language' in r.content + >>> b'Python is a programming language' in r.content True ... or POST: >>> payload = dict(key1='value1', key2='value2') - >>> r = requests.post('http://httpbin.org/post', data=payload) + >>> r = requests.post('https://httpbin.org/post', data=payload) >>> print(r.text) { ... "form": { - "key2": "value2", - "key1": "value1" + "key1": "value1", + "key2": "value2" }, ... } The other HTTP methods are supported - see `requests.api`. Full documentation -is at . +is at . :copyright: (c) 2017 by Kenneth Reitz. :license: Apache 2.0, see LICENSE for more details. @@ -57,32 +57,53 @@ def check_compatibility(urllib3_version, chardet_version): # Check urllib3 for compatibility. major, minor, patch = urllib3_version # noqa: F811 major, minor, patch = int(major), int(minor), int(patch) - # urllib3 >= 1.21.1, <= 1.22 + # urllib3 >= 1.21.1, <= 1.26 assert major == 1 assert minor >= 21 - assert minor <= 22 + assert minor <= 26 # Check chardet for compatibility. major, minor, patch = chardet_version.split('.')[:3] major, minor, patch = int(major), int(minor), int(patch) - # chardet >= 3.0.2, < 3.1.0 - assert major == 3 - assert minor < 1 - assert patch >= 2 + # chardet >= 3.0.2, < 5.0.0 + assert (3, 0, 2) <= (major, minor, patch) < (5, 0, 0) +def _check_cryptography(cryptography_version): + # cryptography < 1.3.4 + try: + cryptography_version = list(map(int, cryptography_version.split('.'))) + except ValueError: + return + + if cryptography_version < [1, 3, 4]: + warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version) + warnings.warn(warning, RequestsDependencyWarning) + # Check imported dependencies for compatibility. try: check_compatibility(urllib3.__version__, chardet.__version__) except (AssertionError, ValueError): - warnings.warn("urllib3 ({0}) or chardet ({1}) doesn't match a supported " + warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported " "version!".format(urllib3.__version__, chardet.__version__), RequestsDependencyWarning) -# Attempt to enable urllib3's SNI support, if possible +# Attempt to enable urllib3's fallback for SNI support +# if the standard library doesn't support SNI or the +# 'ssl' library isn't available. try: - from urllib3.contrib import pyopenssl - pyopenssl.inject_into_urllib3() + try: + import ssl + except ImportError: + ssl = None + + if not getattr(ssl, "HAS_SNI", False): + from urllib3.contrib import pyopenssl + pyopenssl.inject_into_urllib3() + + # Check cryptography version + from cryptography import __version__ as cryptography_version + _check_cryptography(cryptography_version) except ImportError: pass @@ -108,12 +129,7 @@ from .exceptions import ( # Set default logging handler to avoid "No handler found" warnings. import logging -try: # Python 2.7+ - from logging import NullHandler -except ImportError: - class NullHandler(logging.Handler): - def emit(self, record): - pass +from logging import NullHandler logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/lib/requests/__version__.py b/lib/requests/__version__.py index dc33eef6..1267488d 100644 --- a/lib/requests/__version__.py +++ b/lib/requests/__version__.py @@ -4,11 +4,11 @@ __title__ = 'requests' __description__ = 'Python HTTP for Humans.' -__url__ = 'http://python-requests.org' -__version__ = '2.18.4' -__build__ = 0x021804 +__url__ = 'https://requests.readthedocs.io' +__version__ = '2.25.1' +__build__ = 0x022501 __author__ = 'Kenneth Reitz' __author_email__ = 'me@kennethreitz.org' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2017 Kenneth Reitz' +__copyright__ = 'Copyright 2020 Kenneth Reitz' __cake__ = u'\u2728 \U0001f370 \u2728' diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index 00f8792b..fa4d9b3c 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -13,6 +13,7 @@ import socket from urllib3.poolmanager import PoolManager, proxy_from_url from urllib3.response import HTTPResponse +from urllib3.util import parse_url from urllib3.util import Timeout as TimeoutSauce from urllib3.util.retry import Retry from urllib3.exceptions import ClosedPoolError @@ -25,16 +26,18 @@ from urllib3.exceptions import ProtocolError from urllib3.exceptions import ReadTimeoutError from urllib3.exceptions import SSLError as _SSLError from urllib3.exceptions import ResponseError +from urllib3.exceptions import LocationValueError from .models import Response from .compat import urlparse, basestring -from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url, urldefragauth, - select_proxy) +from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths, + get_encoding_from_headers, prepend_scheme_if_needed, + get_auth_from_url, urldefragauth, select_proxy) from .structures import CaseInsensitiveDict from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError, RetryError, InvalidSchema) + ProxyError, RetryError, InvalidSchema, InvalidProxyURL, + InvalidURL) from .auth import _basic_auth_str try: @@ -126,8 +129,7 @@ class HTTPAdapter(BaseAdapter): self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block) def __getstate__(self): - return dict((attr, getattr(self, attr, None)) for attr in - self.__attrs__) + return {attr: getattr(self, attr, None) for attr in self.__attrs__} def __setstate__(self, state): # Can't handle by adding 'proxy_manager' to self.__attrs__ because @@ -219,11 +221,11 @@ class HTTPAdapter(BaseAdapter): cert_loc = verify if not cert_loc: - cert_loc = DEFAULT_CA_BUNDLE_PATH + cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH) if not cert_loc or not os.path.exists(cert_loc): raise IOError("Could not find a suitable TLS CA certificate bundle, " - "invalid path: {0}".format(cert_loc)) + "invalid path: {}".format(cert_loc)) conn.cert_reqs = 'CERT_REQUIRED' @@ -245,10 +247,10 @@ class HTTPAdapter(BaseAdapter): conn.key_file = None if conn.cert_file and not os.path.exists(conn.cert_file): raise IOError("Could not find the TLS certificate file, " - "invalid path: {0}".format(conn.cert_file)) + "invalid path: {}".format(conn.cert_file)) if conn.key_file and not os.path.exists(conn.key_file): raise IOError("Could not find the TLS key file, " - "invalid path: {0}".format(conn.key_file)) + "invalid path: {}".format(conn.key_file)) def build_response(self, req, resp): """Builds a :class:`Response ` object from a urllib3 @@ -300,6 +302,10 @@ class HTTPAdapter(BaseAdapter): if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') + proxy_url = parse_url(proxy) + if not proxy_url.host: + raise InvalidProxyURL("Please check proxy URL. It is malformed" + " and could be missing the host.") proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: @@ -373,7 +379,7 @@ class HTTPAdapter(BaseAdapter): when subclassing the :class:`HTTPAdapter `. - :param proxies: The url of the proxy being used for this request. + :param proxy: The url of the proxy being used for this request. :rtype: dict """ headers = {} @@ -402,11 +408,14 @@ class HTTPAdapter(BaseAdapter): :rtype: requests.Response """ - conn = self.get_connection(request.url, proxies) + try: + conn = self.get_connection(request.url, proxies) + except LocationValueError as e: + raise InvalidURL(e, request=request) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request) + self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) chunked = not (request.body is None or 'Content-Length' in request.headers) @@ -416,7 +425,7 @@ class HTTPAdapter(BaseAdapter): timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. - err = ("Invalid timeout {0}. Pass a (connect, read) " + err = ("Invalid timeout {}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout)) raise ValueError(err) @@ -466,11 +475,10 @@ class HTTPAdapter(BaseAdapter): # Receive the response from the server try: - # For Python 2.7+ versions, use buffering of HTTP - # responses + # For Python 2.7, use buffering of HTTP responses r = low_conn.getresponse(buffering=True) except TypeError: - # For compatibility with Python 2.6 versions and back + # For compatibility with Python 3.3+ r = low_conn.getresponse() resp = HTTPResponse.from_httplib( diff --git a/lib/requests/api.py b/lib/requests/api.py index bc2115c1..e978e203 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -16,11 +16,13 @@ from . import sessions def request(method, url, **kwargs): """Constructs and sends a :class:`Request `. - :param method: method for the new :class:`Request` object. + :param method: method for the new :class:`Request` object: ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``, ``PATCH``, or ``DELETE``. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. - :param data: (optional) Dictionary or list of tuples ``[(key, value)]`` (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. - :param json: (optional) json data to send in the body of the :class:`Request`. + :param params: (optional) Dictionary, list of tuples or bytes to send + in the query string for the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload. @@ -47,7 +49,8 @@ def request(method, url, **kwargs): Usage:: >>> import requests - >>> req = requests.request('GET', 'http://httpbin.org/get') + >>> req = requests.request('GET', 'https://httpbin.org/get') + >>> req """ @@ -62,7 +65,8 @@ def get(url, params=None, **kwargs): r"""Sends a GET request. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. + :param params: (optional) Dictionary, list of tuples or bytes to send + in the query string for the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response @@ -89,7 +93,9 @@ def head(url, **kwargs): r"""Sends a HEAD request. :param url: URL for the new :class:`Request` object. - :param \*\*kwargs: Optional arguments that ``request`` takes. + :param \*\*kwargs: Optional arguments that ``request`` takes. If + `allow_redirects` is not provided, it will be set to `False` (as + opposed to the default :meth:`request` behavior). :return: :class:`Response ` object :rtype: requests.Response """ @@ -102,7 +108,8 @@ def post(url, data=None, json=None, **kwargs): r"""Sends a POST request. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object @@ -116,7 +123,8 @@ def put(url, data=None, **kwargs): r"""Sends a PUT request. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object @@ -130,7 +138,8 @@ def patch(url, data=None, **kwargs): r"""Sends a PATCH request. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object diff --git a/lib/requests/auth.py b/lib/requests/auth.py index 1a182dff..eeface39 100644 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -38,7 +38,7 @@ def _basic_auth_str(username, password): if not isinstance(username, basestring): warnings.warn( "Non-string usernames will no longer be supported in Requests " - "3.0.0. Please convert the object you've passed in ({0!r}) to " + "3.0.0. Please convert the object you've passed in ({!r}) to " "a string or bytes object in the near future to avoid " "problems.".format(username), category=DeprecationWarning, @@ -48,9 +48,9 @@ def _basic_auth_str(username, password): if not isinstance(password, basestring): warnings.warn( "Non-string passwords will no longer be supported in Requests " - "3.0.0. Please convert the object you've passed in ({0!r}) to " + "3.0.0. Please convert the object you've passed in ({!r}) to " "a string or bytes object in the near future to avoid " - "problems.".format(password), + "problems.".format(type(password)), category=DeprecationWarning, ) password = str(password) @@ -153,6 +153,18 @@ class HTTPDigestAuth(AuthBase): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 + elif _algorithm == 'SHA-256': + def sha256_utf8(x): + if isinstance(x, str): + x = x.encode('utf-8') + return hashlib.sha256(x).hexdigest() + hash_utf8 = sha256_utf8 + elif _algorithm == 'SHA-512': + def sha512_utf8(x): + if isinstance(x, str): + x = x.encode('utf-8') + return hashlib.sha512(x).hexdigest() + hash_utf8 = sha512_utf8 KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) @@ -227,7 +239,7 @@ class HTTPDigestAuth(AuthBase): """ # If response is not 4xx, do not auth - # See https://github.com/requests/requests/issues/3772 + # See https://github.com/psf/requests/issues/3772 if not 400 <= r.status_code < 500: self._thread_local.num_401_calls = 1 return r diff --git a/lib/requests/compat.py b/lib/requests/compat.py index f417cfd8..5de0769f 100644 --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -43,8 +43,9 @@ if is_py2: import cookielib from Cookie import Morsel from StringIO import StringIO + # Keep OrderedDict for backwards compatibility. + from collections import Callable, Mapping, MutableMapping, OrderedDict - from urllib3.packages.ordered_dict import OrderedDict builtin_str = str bytes = str @@ -59,7 +60,9 @@ elif is_py3: from http import cookiejar as cookielib from http.cookies import Morsel from io import StringIO + # Keep OrderedDict for backwards compatibility. from collections import OrderedDict + from collections.abc import Callable, Mapping, MutableMapping builtin_str = str str = str diff --git a/lib/requests/cookies.py b/lib/requests/cookies.py index ab3c88b9..56fccd9c 100644 --- a/lib/requests/cookies.py +++ b/lib/requests/cookies.py @@ -12,10 +12,9 @@ requests.utils imports from here, so be careful with imports. import copy import time import calendar -import collections from ._internal_utils import to_native_string -from .compat import cookielib, urlparse, urlunparse, Morsel +from .compat import cookielib, urlparse, urlunparse, Morsel, MutableMapping try: import threading @@ -169,7 +168,7 @@ class CookieConflictError(RuntimeError): """ -class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): +class RequestsCookieJar(cookielib.CookieJar, MutableMapping): """Compatibility class; is a cookielib.CookieJar, but exposes a dict interface. @@ -415,9 +414,14 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def copy(self): """Return a copy of this RequestsCookieJar.""" new_cj = RequestsCookieJar() + new_cj.set_policy(self.get_policy()) new_cj.update(self) return new_cj + def get_policy(self): + """Return the CookiePolicy instance used.""" + return self._policy + def _copy_cookie_jar(jar): if jar is None: @@ -440,20 +444,21 @@ def create_cookie(name, value, **kwargs): By default, the pair of `name` and `value` will be set for the domain '' and sent on every request (this is sometimes called a "supercookie"). """ - result = dict( - version=0, - name=name, - value=value, - port=None, - domain='', - path='/', - secure=False, - expires=None, - discard=True, - comment=None, - comment_url=None, - rest={'HttpOnly': None}, - rfc2109=False,) + result = { + 'version': 0, + 'name': name, + 'value': value, + 'port': None, + 'domain': '', + 'path': '/', + 'secure': False, + 'expires': None, + 'discard': True, + 'comment': None, + 'comment_url': None, + 'rest': {'HttpOnly': None}, + 'rfc2109': False, + } badargs = set(kwargs) - set(result) if badargs: @@ -507,6 +512,7 @@ def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): :param cookiejar: (optional) A cookiejar to add the cookies to. :param overwrite: (optional) If False, will not replace cookies already in the jar with new ones. + :rtype: CookieJar """ if cookiejar is None: cookiejar = RequestsCookieJar() @@ -525,6 +531,7 @@ def merge_cookies(cookiejar, cookies): :param cookiejar: CookieJar object to add the cookies to. :param cookies: Dictionary or CookieJar object to be added. + :rtype: CookieJar """ if not isinstance(cookiejar, cookielib.CookieJar): raise ValueError('You can only merge into CookieJar') diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py index be7eaed6..0e9c820c 100644 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -85,16 +85,20 @@ class InvalidHeader(RequestException, ValueError): """The header value provided was somehow invalid.""" +class InvalidProxyURL(InvalidURL): + """The proxy URL provided is invalid.""" + + class ChunkedEncodingError(RequestException): """The server declared chunked encoding but sent an invalid chunk.""" class ContentDecodingError(RequestException, BaseHTTPError): - """Failed to decode response content""" + """Failed to decode response content.""" class StreamConsumedError(RequestException, TypeError): - """The content for this response was already consumed""" + """The content for this response was already consumed.""" class RetryError(RequestException): @@ -102,21 +106,18 @@ class RetryError(RequestException): class UnrewindableBodyError(RequestException): - """Requests encountered an error when trying to rewind a body""" + """Requests encountered an error when trying to rewind a body.""" # Warnings class RequestsWarning(Warning): """Base warning for Requests.""" - pass class FileModeWarning(RequestsWarning, DeprecationWarning): """A file was opened in text mode, but Requests determined its binary length.""" - pass class RequestsDependencyWarning(RequestsWarning): """An imported dependency doesn't match the expected version range.""" - pass diff --git a/lib/requests/help.py b/lib/requests/help.py index 5440ee61..e53d35ef 100644 --- a/lib/requests/help.py +++ b/lib/requests/help.py @@ -13,7 +13,7 @@ import chardet from . import __version__ as requests_version try: - from .packages.urllib3.contrib import pyopenssl + from urllib3.contrib import pyopenssl except ImportError: pyopenssl = None OpenSSL = None @@ -89,8 +89,7 @@ def info(): 'version': getattr(idna, '__version__', ''), } - # OPENSSL_VERSION_NUMBER doesn't exist in the Python 2.6 ssl module. - system_ssl = getattr(ssl, 'OPENSSL_VERSION_NUMBER', None) + system_ssl = ssl.OPENSSL_VERSION_NUMBER system_ssl_info = { 'version': '%x' % system_ssl if system_ssl is not None else '' } diff --git a/lib/requests/hooks.py b/lib/requests/hooks.py index 32b32de7..7a51f212 100644 --- a/lib/requests/hooks.py +++ b/lib/requests/hooks.py @@ -15,14 +15,14 @@ HOOKS = ['response'] def default_hooks(): - return dict((event, []) for event in HOOKS) + return {event: [] for event in HOOKS} # TODO: response is the only one def dispatch_hook(key, hooks, hook_data, **kwargs): """Dispatches a hook dictionary on a given piece of data.""" - hooks = hooks or dict() + hooks = hooks or {} hooks = hooks.get(key) if hooks: if hasattr(hooks, '__call__'): diff --git a/lib/requests/models.py b/lib/requests/models.py index 4041cac3..ec2edc20 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -7,13 +7,12 @@ requests.models This module contains the primary objects that power Requests. """ -import collections import datetime import sys # Import encoding now, to avoid implicit import later. # Implicit import within threads may cause LookupError when standard library is in a ZIP, -# such as in Embedded Python. See https://github.com/requests/requests/issues/3578. +# such as in Embedded Python. See https://github.com/psf/requests/issues/3578. import encodings.idna from urllib3.fields import RequestField @@ -37,6 +36,7 @@ from .utils import ( stream_decode_response_unicode, to_key_val_list, parse_header_links, iter_slices, guess_json_utf, super_len, check_header_validity) from .compat import ( + Callable, Mapping, cookielib, urlunparse, urlsplit, urlencode, str, bytes, is_py2, chardet, builtin_str, basestring) from .compat import json as complexjson @@ -155,8 +155,12 @@ class RequestEncodingMixin(object): if isinstance(fp, (str, bytes, bytearray)): fdata = fp - else: + elif hasattr(fp, 'read'): fdata = fp.read() + elif fp is None: + continue + else: + fdata = fp rf = RequestField(name=k, data=fdata, filename=fn, headers=fh) rf.make_multipart(content_type=ft) @@ -174,10 +178,10 @@ class RequestHooksMixin(object): if event not in self.hooks: raise ValueError('Unsupported event specified, with event name "%s"' % (event)) - if isinstance(hook, collections.Callable): + if isinstance(hook, Callable): self.hooks[event].append(hook) elif hasattr(hook, '__iter__'): - self.hooks[event].extend(h for h in hook if isinstance(h, collections.Callable)) + self.hooks[event].extend(h for h in hook if isinstance(h, Callable)) def deregister_hook(self, event, hook): """Deregister a previously registered hook. @@ -200,9 +204,13 @@ class Request(RequestHooksMixin): :param url: URL to send. :param headers: dictionary of headers to send. :param files: dictionary of {filename: fileobject} files to multipart upload. - :param data: the body to attach to the request. If a dictionary is provided, form-encoding will take place. + :param data: the body to attach to the request. If a dictionary or + list of tuples ``[(key, value)]`` is provided, form-encoding will + take place. :param json: json for the body to attach to the request (if files or data is not specified). - :param params: dictionary of URL parameters to append to the URL. + :param params: URL parameters to append to the URL. If a dictionary or + list of tuples ``[(key, value)]`` is provided, form-encoding will + take place. :param auth: Auth handler or (user, pass) tuple. :param cookies: dictionary or CookieJar of cookies to attach to this request. :param hooks: dictionary of callback hooks, for internal usage. @@ -210,7 +218,7 @@ class Request(RequestHooksMixin): Usage:: >>> import requests - >>> req = requests.Request('GET', 'http://httpbin.org/get') + >>> req = requests.Request('GET', 'https://httpbin.org/get') >>> req.prepare() """ @@ -265,13 +273,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): """The fully mutable :class:`PreparedRequest ` object, containing the exact bytes that will be sent to the server. - Generated from either a :class:`Request ` object or manually. + Instances are generated from a :class:`Request ` object, and + should not be instantiated manually; doing so may produce undesirable + effects. Usage:: >>> import requests - >>> req = requests.Request('GET', 'http://httpbin.org/get') + >>> req = requests.Request('GET', 'https://httpbin.org/get') >>> r = req.prepare() + >>> r >>> s = requests.Session() @@ -350,7 +361,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. - #: https://github.com/requests/requests/pull/2238 + #: https://github.com/psf/requests/pull/2238 if isinstance(url, bytes): url = url.decode('utf8') else: @@ -461,15 +472,15 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): is_stream = all([ hasattr(data, '__iter__'), - not isinstance(data, (basestring, list, tuple, collections.Mapping)) + not isinstance(data, (basestring, list, tuple, Mapping)) ]) - try: - length = super_len(data) - except (TypeError, AttributeError, UnsupportedOperation): - length = None - if is_stream: + try: + length = super_len(data) + except (TypeError, AttributeError, UnsupportedOperation): + length = None + body = data if getattr(body, 'tell', None) is not None: @@ -600,7 +611,7 @@ class Response(object): #: File-like object representation of response (for advanced usage). #: Use of ``raw`` requires that ``stream=True`` be set on the request. - # This requirement does not apply for use internally to Requests. + #: This requirement does not apply for use internally to Requests. self.raw = None #: Final URL location of Response. @@ -644,10 +655,7 @@ class Response(object): if not self._content_consumed: self.content - return dict( - (attr, getattr(self, attr, None)) - for attr in self.__attrs__ - ) + return {attr: getattr(self, attr, None) for attr in self.__attrs__} def __setstate__(self, state): for name, value in state.items(): @@ -686,11 +694,11 @@ class Response(object): @property def ok(self): - """Returns True if :attr:`status_code` is less than 400. + """Returns True if :attr:`status_code` is less than 400, False if not. This attribute checks if the status code of the response is between 400 and 600 to see if there was a client error or a server error. If - the status code, is between 200 and 400, this will return True. This + the status code is between 200 and 400, this will return True. This is **not** a check to see if the response code is ``200 OK``. """ try: @@ -776,7 +784,7 @@ class Response(object): return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None, delimiter=None): + def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. @@ -820,7 +828,7 @@ class Response(object): if self.status_code == 0 or self.raw is None: self._content = None else: - self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes() + self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b'' self._content_consumed = True # don't need to release the connection; that's been handled by urllib3 @@ -910,7 +918,7 @@ class Response(object): return l def raise_for_status(self): - """Raises stored :class:`HTTPError`, if one occurred.""" + """Raises :class:`HTTPError`, if one occurred.""" http_error_msg = '' if isinstance(self.reason, bytes): diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index 6570e733..45ab8a5d 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -1,26 +1,26 @@ # -*- coding: utf-8 -*- """ -requests.session -~~~~~~~~~~~~~~~~ +requests.sessions +~~~~~~~~~~~~~~~~~ This module provides a Session object to manage and persist settings across requests (cookies, auth, proxies). """ import os -import platform +import sys import time -from collections import Mapping from datetime import timedelta +from collections import OrderedDict from .auth import _basic_auth_str -from .compat import cookielib, is_py3, OrderedDict, urljoin, urlparse +from .compat import cookielib, is_py3, urljoin, urlparse, Mapping from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook from ._internal_utils import to_native_string -from .utils import to_key_val_list, default_headers +from .utils import to_key_val_list, default_headers, DEFAULT_PORTS from .exceptions import ( TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) @@ -38,8 +38,8 @@ from .status_codes import codes from .models import REDIRECT_STATI # Preferred clock, based on which one is more accurate on a given system. -if platform.system() == 'Windows': - try: # Python 3.3+ +if sys.platform == 'win32': + try: # Python 3.4+ preferred_clock = time.perf_counter except AttributeError: # Earlier than Python 3. preferred_clock = time.clock @@ -116,6 +116,31 @@ class SessionRedirectMixin(object): return to_native_string(location, 'utf8') return None + def should_strip_auth(self, old_url, new_url): + """Decide whether Authorization header should be removed when redirecting""" + old_parsed = urlparse(old_url) + new_parsed = urlparse(new_url) + if old_parsed.hostname != new_parsed.hostname: + return True + # Special case: allow http -> https redirect when using the standard + # ports. This isn't specified by RFC 7235, but is kept to avoid + # breaking backwards compatibility with older versions of requests + # that allowed any redirects on the same host. + if (old_parsed.scheme == 'http' and old_parsed.port in (80, None) + and new_parsed.scheme == 'https' and new_parsed.port in (443, None)): + return False + + # Handle default port usage corresponding to scheme. + changed_port = old_parsed.port != new_parsed.port + changed_scheme = old_parsed.scheme != new_parsed.scheme + default_port = (DEFAULT_PORTS.get(old_parsed.scheme, None), None) + if (not changed_scheme and old_parsed.port in default_port + and new_parsed.port in default_port): + return False + + # Standard case: root URI must match + return changed_port or changed_scheme + def resolve_redirects(self, resp, req, stream=False, timeout=None, verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): """Receives a Response. Returns a generator of Responses or Requests.""" @@ -123,6 +148,7 @@ class SessionRedirectMixin(object): hist = [] # keep track of history url = self.get_redirect_target(resp) + previous_fragment = urlparse(req.url).fragment while url: prepared_request = req.copy() @@ -137,7 +163,7 @@ class SessionRedirectMixin(object): resp.raw.read(decode_content=False) if len(resp.history) >= self.max_redirects: - raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp) + raise TooManyRedirects('Exceeded {} redirects.'.format(self.max_redirects), response=resp) # Release the connection back into the pool. resp.close() @@ -145,10 +171,14 @@ class SessionRedirectMixin(object): # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(resp.url) - url = '%s:%s' % (to_native_string(parsed_rurl.scheme), url) + url = ':'.join([to_native_string(parsed_rurl.scheme), url]) - # The scheme should be lower case... + # Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2) parsed = urlparse(url) + if parsed.fragment == '' and previous_fragment: + parsed = parsed._replace(fragment=previous_fragment) + elif parsed.fragment: + previous_fragment = parsed.fragment url = parsed.geturl() # Facilitate relative 'location' headers, as allowed by RFC 7231. @@ -163,19 +193,16 @@ class SessionRedirectMixin(object): self.rebuild_method(prepared_request, resp) - # https://github.com/requests/requests/issues/1084 + # https://github.com/psf/requests/issues/1084 if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): - # https://github.com/requests/requests/issues/3490 + # https://github.com/psf/requests/issues/3490 purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') for header in purged_headers: prepared_request.headers.pop(header, None) prepared_request.body = None headers = prepared_request.headers - try: - del headers['Cookie'] - except KeyError: - pass + headers.pop('Cookie', None) # Extract any cookies sent on the response to the cookiejar # in the new request. Because we've mutated our copied prepared @@ -232,21 +259,16 @@ class SessionRedirectMixin(object): headers = prepared_request.headers url = prepared_request.url - if 'Authorization' in headers: + if 'Authorization' in headers and self.should_strip_auth(response.request.url, url): # If we get redirected to a new host, we should strip out any # authentication headers. - original_parsed = urlparse(response.request.url) - redirect_parsed = urlparse(url) - - if (original_parsed.hostname != redirect_parsed.hostname): - del headers['Authorization'] + del headers['Authorization'] # .netrc might have more auth for us on our new host. new_auth = get_netrc_auth(url) if self.trust_env else None if new_auth is not None: prepared_request.prepare_auth(new_auth) - return def rebuild_proxies(self, prepared_request, proxies): """This method re-evaluates the proxy configuration by considering the @@ -295,7 +317,7 @@ class SessionRedirectMixin(object): """ method = prepared_request.method - # http://tools.ietf.org/html/rfc7231#section-6.4.4 + # https://tools.ietf.org/html/rfc7231#section-6.4.4 if response.status_code == codes.see_other and method != 'HEAD': method = 'GET' @@ -321,19 +343,19 @@ class Session(SessionRedirectMixin): >>> import requests >>> s = requests.Session() - >>> s.get('http://httpbin.org/get') + >>> s.get('https://httpbin.org/get') Or as a context manager:: >>> with requests.Session() as s: - >>> s.get('http://httpbin.org/get') + ... s.get('https://httpbin.org/get') """ __attrs__ = [ 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', + 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects', ] @@ -365,6 +387,13 @@ class Session(SessionRedirectMixin): self.stream = False #: SSL Verification default. + #: Defaults to `True`, requiring requests to verify the TLS certificate at the + #: remote end. + #: If verify is set to `False`, requests will accept any TLS certificate + #: presented by the server, and will ignore hostname mismatches and/or + #: expired certificates, which will make your application vulnerable to + #: man-in-the-middle (MitM) attacks. + #: Only set this to `False` for testing. self.verify = True #: SSL client certificate default, if String, path to ssl client @@ -449,8 +478,8 @@ class Session(SessionRedirectMixin): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. - :param data: (optional) Dictionary, bytes, or file-like object to send - in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the @@ -473,7 +502,12 @@ class Session(SessionRedirectMixin): content. Defaults to ``False``. :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path - to a CA bundle to use. Defaults to ``True``. + to a CA bundle to use. Defaults to ``True``. When set to + ``False``, requests will accept any TLS certificate presented by + the server, and will ignore hostname mismatches and/or expired + certificates, which will make your application vulnerable to + man-in-the-middle (MitM) attacks. Setting verify to ``False`` + may be useful during local development or testing. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :rtype: requests.Response @@ -546,7 +580,8 @@ class Session(SessionRedirectMixin): r"""Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response @@ -558,7 +593,8 @@ class Session(SessionRedirectMixin): r"""Sends a PUT request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ @@ -569,7 +605,8 @@ class Session(SessionRedirectMixin): r"""Sends a PATCH request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ @@ -633,11 +670,13 @@ class Session(SessionRedirectMixin): extract_cookies_to_jar(self.cookies, request, r.raw) - # Redirect resolving generator. - gen = self.resolve_redirects(r, request, **kwargs) - # Resolve redirects if allowed. - history = [resp for resp in gen] if allow_redirects else [] + if allow_redirects: + # Redirect resolving generator. + gen = self.resolve_redirects(r, request, **kwargs) + history = [resp for resp in gen] + else: + history = [] # Shuffle things around if there's history. if history: @@ -696,11 +735,11 @@ class Session(SessionRedirectMixin): """ for (prefix, adapter) in self.adapters.items(): - if url.lower().startswith(prefix): + if url.lower().startswith(prefix.lower()): return adapter # Nothing matches :-/ - raise InvalidSchema("No connection adapters were found for '%s'" % url) + raise InvalidSchema("No connection adapters were found for {!r}".format(url)) def close(self): """Closes all adapters and as such the session""" @@ -719,7 +758,7 @@ class Session(SessionRedirectMixin): self.adapters[key] = self.adapters.pop(key) def __getstate__(self): - state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state = {attr: getattr(self, attr, None) for attr in self.__attrs__} return state def __setstate__(self, state): @@ -731,7 +770,12 @@ def session(): """ Returns a :class:`Session` for context-management. + .. deprecated:: 1.0.0 + + This method has been deprecated since version 1.0.0 and is only kept for + backwards compatibility. New code should use :class:`~requests.sessions.Session` + to create a session. This may be removed at a future date. + :rtype: Session """ - return Session() diff --git a/lib/requests/status_codes.py b/lib/requests/status_codes.py index dee89190..d80a7cd4 100644 --- a/lib/requests/status_codes.py +++ b/lib/requests/status_codes.py @@ -1,5 +1,25 @@ # -*- coding: utf-8 -*- +r""" +The ``codes`` object defines a mapping from common names for HTTP statuses +to their numerical codes, accessible either as attributes or as dictionary +items. + +Example:: + + >>> import requests + >>> requests.codes['temporary_redirect'] + 307 + >>> requests.codes.teapot + 418 + >>> requests.codes['\o/'] + 200 + +Some codes have multiple names, and both upper- and lower-case versions of +the names are allowed. For example, ``codes.ok``, ``codes.OK``, and +``codes.okay`` all correspond to the HTTP status code 200. +""" + from .structures import LookupDict _codes = { @@ -84,8 +104,20 @@ _codes = { codes = LookupDict(name='status_codes') -for code, titles in _codes.items(): - for title in titles: - setattr(codes, title, code) - if not title.startswith(('\\', '/')): - setattr(codes, title.upper(), code) +def _init(): + for code, titles in _codes.items(): + for title in titles: + setattr(codes, title, code) + if not title.startswith(('\\', '/')): + setattr(codes, title.upper(), code) + + def doc(code): + names = ', '.join('``%s``' % n for n in _codes[code]) + return '* %d: %s' % (code, names) + + global __doc__ + __doc__ = (__doc__ + '\n' + + '\n'.join(doc(code) for code in sorted(_codes)) + if __doc__ is not None else None) + +_init() diff --git a/lib/requests/structures.py b/lib/requests/structures.py index 05d2b3f5..8ee0ba7a 100644 --- a/lib/requests/structures.py +++ b/lib/requests/structures.py @@ -7,16 +7,16 @@ requests.structures Data structures that power Requests. """ -import collections +from collections import OrderedDict -from .compat import OrderedDict +from .compat import Mapping, MutableMapping -class CaseInsensitiveDict(collections.MutableMapping): +class CaseInsensitiveDict(MutableMapping): """A case-insensitive ``dict``-like object. Implements all methods and operations of - ``collections.MutableMapping`` as well as dict's ``copy``. Also + ``MutableMapping`` as well as dict's ``copy``. Also provides ``lower_items``. All keys are expected to be strings. The structure remembers the @@ -71,7 +71,7 @@ class CaseInsensitiveDict(collections.MutableMapping): ) def __eq__(self, other): - if isinstance(other, collections.Mapping): + if isinstance(other, Mapping): other = CaseInsensitiveDict(other) else: return NotImplemented diff --git a/lib/requests/utils.py b/lib/requests/utils.py index 5c47de98..db67938e 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -8,17 +8,18 @@ This module provides utility functions that are used within Requests that are also useful for external consumption. """ -import cgi import codecs -import collections import contextlib import io import os -import platform import re import socket import struct +import sys +import tempfile import warnings +import zipfile +from collections import OrderedDict from .__version__ import __version__ from . import certs @@ -26,9 +27,9 @@ from . import certs from ._internal_utils import to_native_string from .compat import parse_http_list as _parse_list_header from .compat import ( - quote, urlparse, bytes, str, OrderedDict, unquote, getproxies, + quote, urlparse, bytes, str, unquote, getproxies, proxy_bypass, urlunparse, basestring, integer_types, is_py3, - proxy_bypass_environment, getproxies_environment) + proxy_bypass_environment, getproxies_environment, Mapping) from .cookies import cookiejar_from_dict from .structures import CaseInsensitiveDict from .exceptions import ( @@ -38,20 +39,28 @@ NETRC_FILES = ('.netrc', '_netrc') DEFAULT_CA_BUNDLE_PATH = certs.where() +DEFAULT_PORTS = {'http': 80, 'https': 443} -if platform.system() == 'Windows': + +if sys.platform == 'win32': # provide a proxy_bypass version on Windows without DNS lookups def proxy_bypass_registry(host): - if is_py3: - import winreg - else: - import _winreg as winreg + try: + if is_py3: + import winreg + else: + import _winreg as winreg + except ImportError: + return False + try: internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] + # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it + proxyEnable = int(winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0]) + # ProxyOverride is almost always a string proxyOverride = winreg.QueryValueEx(internetSettings, 'ProxyOverride')[0] except OSError: @@ -160,18 +169,24 @@ def super_len(o): def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" + netrc_file = os.environ.get('NETRC') + if netrc_file is not None: + netrc_locations = (netrc_file,) + else: + netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES) + try: from netrc import netrc, NetrcParseError netrc_path = None - for f in NETRC_FILES: + for f in netrc_locations: try: - loc = os.path.expanduser('~/{0}'.format(f)) + loc = os.path.expanduser(f) except KeyError: # os.path.expanduser can fail when $HOME is undefined and - # getpwuid fails. See http://bugs.python.org/issue20164 & - # https://github.com/requests/requests/issues/1846 + # getpwuid fails. See https://bugs.python.org/issue20164 & + # https://github.com/psf/requests/issues/1846 return if os.path.exists(loc): @@ -203,7 +218,7 @@ def get_netrc_auth(url, raise_errors=False): if raise_errors: raise - # AppEngine hackiness. + # App Engine hackiness. except (ImportError, AttributeError): pass @@ -216,6 +231,38 @@ def guess_filename(obj): return os.path.basename(name) +def extract_zipped_paths(path): + """Replace nonexistent paths that look like they refer to a member of a zip + archive with the location of an extracted copy of the target, or else + just return the provided path unchanged. + """ + if os.path.exists(path): + # this is already a valid path, no need to do anything further + return path + + # find the first valid part of the provided path and treat that as a zip archive + # assume the rest of the path is the name of a member in the archive + archive, member = os.path.split(path) + while archive and not os.path.exists(archive): + archive, prefix = os.path.split(archive) + member = '/'.join([prefix, member]) + + if not zipfile.is_zipfile(archive): + return path + + zip_file = zipfile.ZipFile(archive) + if member not in zip_file.namelist(): + return path + + # we have a valid zip archive and a valid member of that archive + tmp = tempfile.gettempdir() + extracted_path = os.path.join(tmp, *member.split('/')) + if not os.path.exists(extracted_path): + extracted_path = zip_file.extract(member, path=tmp) + + return extracted_path + + def from_key_val_list(value): """Take an object and test to see if it can be represented as a dictionary. Unless it can not be represented as such, return an @@ -226,7 +273,9 @@ def from_key_val_list(value): >>> from_key_val_list([('key', 'val')]) OrderedDict([('key', 'val')]) >>> from_key_val_list('string') - ValueError: need more than 1 value to unpack + Traceback (most recent call last): + ... + ValueError: cannot encode objects that are not 2-tuples >>> from_key_val_list({'key': 'val'}) OrderedDict([('key', 'val')]) @@ -252,7 +301,9 @@ def to_key_val_list(value): >>> to_key_val_list({'key': 'val'}) [('key', 'val')] >>> to_key_val_list('string') - ValueError: cannot encode objects that are not 2-tuples. + Traceback (most recent call last): + ... + ValueError: cannot encode objects that are not 2-tuples :rtype: list """ @@ -262,7 +313,7 @@ def to_key_val_list(value): if isinstance(value, (str, bytes, bool, int)): raise ValueError('cannot encode objects that are not 2-tuples') - if isinstance(value, collections.Mapping): + if isinstance(value, Mapping): value = value.items() return list(value) @@ -407,6 +458,31 @@ def get_encodings_from_content(content): xml_re.findall(content)) +def _parse_content_type_header(header): + """Returns content type and parameters from given header + + :param header: string + :return: tuple containing content type and dictionary of + parameters + """ + + tokens = header.split(';') + content_type, params = tokens[0].strip(), tokens[1:] + params_dict = {} + items_to_strip = "\"' " + + for param in params: + param = param.strip() + if param: + key, value = param, True + index_of_equals = param.find("=") + if index_of_equals != -1: + key = param[:index_of_equals].strip(items_to_strip) + value = param[index_of_equals + 1:].strip(items_to_strip) + params_dict[key.lower()] = value + return content_type, params_dict + + def get_encoding_from_headers(headers): """Returns encodings from given HTTP Header Dict. @@ -419,7 +495,7 @@ def get_encoding_from_headers(headers): if not content_type: return None - content_type, params = cgi.parse_header(content_type) + content_type, params = _parse_content_type_header(content_type) if 'charset' in params: return params['charset'].strip("'\"") @@ -427,6 +503,10 @@ def get_encoding_from_headers(headers): if 'text' in content_type: return 'ISO-8859-1' + if 'application/json' in content_type: + # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset + return 'utf-8' + def stream_decode_response_unicode(iterator, r): """Stream decodes a iterator.""" @@ -632,6 +712,8 @@ def should_bypass_proxies(url, no_proxy): :rtype: bool """ + # Prioritize lowercase environment variables over uppercase + # to keep a consistent behaviour with other http projects (curl, wget). get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL @@ -639,41 +721,43 @@ def should_bypass_proxies(url, no_proxy): no_proxy_arg = no_proxy if no_proxy is None: no_proxy = get_proxy('no_proxy') - netloc = urlparse(url).netloc + parsed = urlparse(url) + + if parsed.hostname is None: + # URLs don't always have hostnames, e.g. file:/// urls. + return True if no_proxy: # We need to check whether we match here. We need to see if we match - # the end of the netloc, both with and without the port. + # the end of the hostname, both with and without the port. no_proxy = ( host for host in no_proxy.replace(' ', '').split(',') if host ) - ip = netloc.split(':')[0] - if is_ipv4_address(ip): + if is_ipv4_address(parsed.hostname): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): - if address_in_network(ip, proxy_ip): + if address_in_network(parsed.hostname, proxy_ip): return True - elif ip == proxy_ip: + elif parsed.hostname == proxy_ip: # If no_proxy ip was defined in plain IP notation instead of cidr notation & # matches the IP of the index return True else: + host_with_port = parsed.hostname + if parsed.port: + host_with_port += ':{}'.format(parsed.port) + for host in no_proxy: - if netloc.endswith(host) or netloc.split(':')[0].endswith(host): + if parsed.hostname.endswith(host) or host_with_port.endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return True - # If the system proxy settings indicate that this URL should be bypassed, - # don't proxy. - # The proxy_bypass function is incredibly buggy on OS X in early versions - # of Python 2.6, so allow this call to fail. Only catch the specific - # exceptions we've seen, though: this call failing in other ways can reveal - # legitimate problems. with set_environ('no_proxy', no_proxy_arg): + # parsed.hostname can be `None` in cases such as a file URI. try: - bypass = proxy_bypass(netloc) + bypass = proxy_bypass(parsed.hostname) except (TypeError, socket.gaierror): bypass = False @@ -743,7 +827,7 @@ def default_headers(): def parse_header_links(value): - """Return a dict of parsed link headers proxies. + """Return a list of parsed link headers proxies. i.e. Link: ; rel=front; type="image/jpeg",; rel=back;type="image/jpeg" @@ -754,6 +838,10 @@ def parse_header_links(value): replace_chars = ' \'"' + value = value.strip(replace_chars) + if not value: + return links + for val in re.split(', *<', value): try: url, params = val.split(';', 1)