Add urllib3 1.22

This commit is contained in:
JonnyWong16 2017-12-26 15:57:27 -08:00
commit ffcb1144b6
36 changed files with 9802 additions and 0 deletions

View file

@ -0,0 +1,54 @@
from __future__ import absolute_import
# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import make_headers
from .response import is_fp_closed
from .ssl_ import (
SSLContext,
HAS_SNI,
IS_PYOPENSSL,
IS_SECURETRANSPORT,
assert_fingerprint,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
)
from .timeout import (
current_time,
Timeout,
)
from .retry import Retry
from .url import (
get_host,
parse_url,
split_first,
Url,
)
from .wait import (
wait_for_read,
wait_for_write
)
__all__ = (
'HAS_SNI',
'IS_PYOPENSSL',
'IS_SECURETRANSPORT',
'SSLContext',
'Retry',
'Timeout',
'Url',
'assert_fingerprint',
'current_time',
'is_connection_dropped',
'is_fp_closed',
'get_host',
'parse_url',
'make_headers',
'resolve_cert_reqs',
'resolve_ssl_version',
'split_first',
'ssl_wrap_socket',
'wait_for_read',
'wait_for_write'
)

View file

@ -0,0 +1,130 @@
from __future__ import absolute_import
import socket
from .wait import wait_for_read
from .selectors import HAS_SELECT, SelectorError
def is_connection_dropped(conn): # Platform-specific
"""
Returns True if the connection is dropped and should be closed.
:param conn:
:class:`httplib.HTTPConnection` object.
Note: For platforms like AppEngine, this will always return ``False`` to
let the platform handle connection recycling transparently for us.
"""
sock = getattr(conn, 'sock', False)
if sock is False: # Platform-specific: AppEngine
return False
if sock is None: # Connection already closed (such as by httplib).
return True
if not HAS_SELECT:
return False
try:
return bool(wait_for_read(sock, timeout=0.0))
except SelectorError:
return True
# This function is copied from socket.py in the Python 2.7 standard
# library test suite. Added to its signature is only `socket_options`.
# One additional modification is that we avoid binding to IPv6 servers
# discovered in DNS if the system doesn't have IPv6 functionality.
def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None, socket_options=None):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith('['):
host = host.strip('[]')
err = None
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
# The original create_connection function always returns all records.
family = allowed_gai_family()
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
return sock
except socket.error as e:
err = e
if sock is not None:
sock.close()
sock = None
if err is not None:
raise err
raise socket.error("getaddrinfo returns an empty list")
def _set_socket_options(sock, options):
if options is None:
return
for opt in options:
sock.setsockopt(*opt)
def allowed_gai_family():
"""This function is designed to work in the context of
getaddrinfo, where family=socket.AF_UNSPEC is the default and
will perform a DNS search for both IPv6 and IPv4 records."""
family = socket.AF_INET
if HAS_IPV6:
family = socket.AF_UNSPEC
return family
def _has_ipv6(host):
""" Returns True if the system can bind an IPv6 address. """
sock = None
has_ipv6 = False
if socket.has_ipv6:
# has_ipv6 returns true if cPython was compiled with IPv6 support.
# It does not tell us if the system has IPv6 support enabled. To
# determine that we must bind to an IPv6 address.
# https://github.com/shazow/urllib3/pull/611
# https://bugs.python.org/issue658327
try:
sock = socket.socket(socket.AF_INET6)
sock.bind((host, 0))
has_ipv6 = True
except Exception:
pass
if sock:
sock.close()
return has_ipv6
HAS_IPV6 = _has_ipv6('::1')

118
lib/urllib3/util/request.py Normal file
View file

@ -0,0 +1,118 @@
from __future__ import absolute_import
from base64 import b64encode
from ..packages.six import b, integer_types
from ..exceptions import UnrewindableBodyError
ACCEPT_ENCODING = 'gzip,deflate'
_FAILEDTELL = object()
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
basic_auth=None, proxy_basic_auth=None, disable_cache=None):
"""
Shortcuts for generating request headers.
:param keep_alive:
If ``True``, adds 'connection: keep-alive' header.
:param accept_encoding:
Can be a boolean, list, or string.
``True`` translates to 'gzip,deflate'.
List will get joined by comma.
String will be used as provided.
:param user_agent:
String representing the user-agent you want, such as
"python-urllib3/0.6"
:param basic_auth:
Colon-separated username:password string for 'authorization: basic ...'
auth header.
:param proxy_basic_auth:
Colon-separated username:password string for 'proxy-authorization: basic ...'
auth header.
:param disable_cache:
If ``True``, adds 'cache-control: no-cache' header.
Example::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
>>> make_headers(accept_encoding=True)
{'accept-encoding': 'gzip,deflate'}
"""
headers = {}
if accept_encoding:
if isinstance(accept_encoding, str):
pass
elif isinstance(accept_encoding, list):
accept_encoding = ','.join(accept_encoding)
else:
accept_encoding = ACCEPT_ENCODING
headers['accept-encoding'] = accept_encoding
if user_agent:
headers['user-agent'] = user_agent
if keep_alive:
headers['connection'] = 'keep-alive'
if basic_auth:
headers['authorization'] = 'Basic ' + \
b64encode(b(basic_auth)).decode('utf-8')
if proxy_basic_auth:
headers['proxy-authorization'] = 'Basic ' + \
b64encode(b(proxy_basic_auth)).decode('utf-8')
if disable_cache:
headers['cache-control'] = 'no-cache'
return headers
def set_file_position(body, pos):
"""
If a position is provided, move file to that point.
Otherwise, we'll attempt to record a position for future use.
"""
if pos is not None:
rewind_body(body, pos)
elif getattr(body, 'tell', None) is not None:
try:
pos = body.tell()
except (IOError, OSError):
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body.
pos = _FAILEDTELL
return pos
def rewind_body(body, body_pos):
"""
Attempt to rewind body to a certain position.
Primarily used for request redirects and retries.
:param body:
File-like object that supports seek.
:param int pos:
Position to seek to in file.
"""
body_seek = getattr(body, 'seek', None)
if body_seek is not None and isinstance(body_pos, integer_types):
try:
body_seek(body_pos)
except (IOError, OSError):
raise UnrewindableBodyError("An error occurred when rewinding request "
"body for redirect/retry.")
elif body_pos is _FAILEDTELL:
raise UnrewindableBodyError("Unable to record file position for rewinding "
"request body during a redirect/retry.")
else:
raise ValueError("body_pos must be of type integer, "
"instead it was %s." % type(body_pos))

View file

@ -0,0 +1,81 @@
from __future__ import absolute_import
from ..packages.six.moves import http_client as httplib
from ..exceptions import HeaderParsingError
def is_fp_closed(obj):
"""
Checks whether a given file-like object is closed.
:param obj:
The file-like object to check.
"""
try:
# Check `isclosed()` first, in case Python3 doesn't set `closed`.
# GH Issue #928
return obj.isclosed()
except AttributeError:
pass
try:
# Check via the official file-like-object way.
return obj.closed
except AttributeError:
pass
try:
# Check if the object is a container for another file-like object that
# gets released on exhaustion (e.g. HTTPResponse).
return obj.fp is None
except AttributeError:
pass
raise ValueError("Unable to determine whether fp is closed.")
def assert_header_parsing(headers):
"""
Asserts whether all headers have been successfully parsed.
Extracts encountered errors from the result of parsing headers.
Only works on Python 3.
:param headers: Headers to verify.
:type headers: `httplib.HTTPMessage`.
:raises urllib3.exceptions.HeaderParsingError:
If parsing errors are found.
"""
# This will fail silently if we pass in the wrong kind of parameter.
# To make debugging easier add an explicit check.
if not isinstance(headers, httplib.HTTPMessage):
raise TypeError('expected httplib.Message, got {0}.'.format(
type(headers)))
defects = getattr(headers, 'defects', None)
get_payload = getattr(headers, 'get_payload', None)
unparsed_data = None
if get_payload: # Platform-specific: Python 3.
unparsed_data = get_payload()
if defects or unparsed_data:
raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
def is_response_to_head(response):
"""
Checks whether the request of a response has been a HEAD-request.
Handles the quirks of AppEngine.
:param conn:
:type conn: :class:`httplib.HTTPResponse`
"""
# FIXME: Can we do this somehow without accessing private httplib _method?
method = response._method
if isinstance(method, int): # Platform-specific: Appengine
return method == 3
return method.upper() == 'HEAD'

401
lib/urllib3/util/retry.py Normal file
View file

@ -0,0 +1,401 @@
from __future__ import absolute_import
import time
import logging
from collections import namedtuple
from itertools import takewhile
import email
import re
from ..exceptions import (
ConnectTimeoutError,
MaxRetryError,
ProtocolError,
ReadTimeoutError,
ResponseError,
InvalidHeader,
)
from ..packages import six
log = logging.getLogger(__name__)
# Data structure for representing the metadata of requests that result in a retry.
RequestHistory = namedtuple('RequestHistory', ["method", "url", "error",
"status", "redirect_location"])
class Retry(object):
""" Retry configuration.
Each retry attempt will create a new Retry object with updated values, so
they can be safely reused.
Retries can be defined as a default for a pool::
retries = Retry(connect=5, read=2, redirect=5)
http = PoolManager(retries=retries)
response = http.request('GET', 'http://example.com/')
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', retries=Retry(10))
Retries can be disabled by passing ``False``::
response = http.request('GET', 'http://example.com/', retries=False)
Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
retries are disabled, in which case the causing exception will be raised.
:param int total:
Total number of retries to allow. Takes precedence over other counts.
Set to ``None`` to remove this constraint and fall back on other
counts. It's a good idea to set this to some sensibly-high value to
account for unexpected edge cases and avoid infinite retry loops.
Set to ``0`` to fail on the first retry.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int connect:
How many connection-related errors to retry on.
These are errors raised before the request is sent to the remote server,
which we assume has not triggered the server to process the request.
Set to ``0`` to fail on the first retry of this type.
:param int read:
How many times to retry on read errors.
These errors are raised after the request was sent to the server, so the
request may have side-effects.
Set to ``0`` to fail on the first retry of this type.
:param int redirect:
How many redirects to perform. Limit this to avoid infinite redirect
loops.
A redirect is a HTTP response with a status code 301, 302, 303, 307 or
308.
Set to ``0`` to fail on the first retry of this type.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int status:
How many times to retry on bad status codes.
These are retries made on responses, where status code matches
``status_forcelist``.
Set to ``0`` to fail on the first retry of this type.
:param iterable method_whitelist:
Set of uppercased HTTP method verbs that we should retry on.
By default, we only retry on methods which are considered to be
idempotent (multiple requests with the same parameters end with the
same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
Set to a ``False`` value to retry on any verb.
:param iterable status_forcelist:
A set of integer HTTP status codes that we should force a retry on.
A retry is initiated if the request method is in ``method_whitelist``
and the response status code is in ``status_forcelist``.
By default, this is disabled with ``None``.
:param float backoff_factor:
A backoff factor to apply between attempts after the second try
(most errors are resolved immediately by a second try without a
delay). urllib3 will sleep for::
{backoff factor} * (2 ^ ({number of total retries} - 1))
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer
than :attr:`Retry.BACKOFF_MAX`.
By default, backoff is disabled (set to 0).
:param bool raise_on_redirect: Whether, if the number of redirects is
exhausted, to raise a MaxRetryError, or to return a response with a
response code in the 3xx range.
:param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
whether we should raise an exception, or return a response,
if status falls in ``status_forcelist`` range and retries have
been exhausted.
:param tuple history: The history of the request encountered during
each call to :meth:`~Retry.increment`. The list is in the order
the requests occurred. Each list item is of class :class:`RequestHistory`.
:param bool respect_retry_after_header:
Whether to respect Retry-After header on status codes defined as
:attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
"""
DEFAULT_METHOD_WHITELIST = frozenset([
'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
#: Maximum backoff time.
BACKOFF_MAX = 120
def __init__(self, total=10, connect=None, read=None, redirect=None, status=None,
method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
backoff_factor=0, raise_on_redirect=True, raise_on_status=True,
history=None, respect_retry_after_header=True):
self.total = total
self.connect = connect
self.read = read
self.status = status
if redirect is False or total is False:
redirect = 0
raise_on_redirect = False
self.redirect = redirect
self.status_forcelist = status_forcelist or set()
self.method_whitelist = method_whitelist
self.backoff_factor = backoff_factor
self.raise_on_redirect = raise_on_redirect
self.raise_on_status = raise_on_status
self.history = history or tuple()
self.respect_retry_after_header = respect_retry_after_header
def new(self, **kw):
params = dict(
total=self.total,
connect=self.connect, read=self.read, redirect=self.redirect, status=self.status,
method_whitelist=self.method_whitelist,
status_forcelist=self.status_forcelist,
backoff_factor=self.backoff_factor,
raise_on_redirect=self.raise_on_redirect,
raise_on_status=self.raise_on_status,
history=self.history,
)
params.update(kw)
return type(self)(**params)
@classmethod
def from_int(cls, retries, redirect=True, default=None):
""" Backwards-compatibility for the old retries format."""
if retries is None:
retries = default if default is not None else cls.DEFAULT
if isinstance(retries, Retry):
return retries
redirect = bool(redirect) and None
new_retries = cls(retries, redirect=redirect)
log.debug("Converted retries value: %r -> %r", retries, new_retries)
return new_retries
def get_backoff_time(self):
""" Formula for computing the current backoff
:rtype: float
"""
# We want to consider only the last consecutive errors sequence (Ignore redirects).
consecutive_errors_len = len(list(takewhile(lambda x: x.redirect_location is None,
reversed(self.history))))
if consecutive_errors_len <= 1:
return 0
backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
return min(self.BACKOFF_MAX, backoff_value)
def parse_retry_after(self, retry_after):
# Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
if re.match(r"^\s*[0-9]+\s*$", retry_after):
seconds = int(retry_after)
else:
retry_date_tuple = email.utils.parsedate(retry_after)
if retry_date_tuple is None:
raise InvalidHeader("Invalid Retry-After header: %s" % retry_after)
retry_date = time.mktime(retry_date_tuple)
seconds = retry_date - time.time()
if seconds < 0:
seconds = 0
return seconds
def get_retry_after(self, response):
""" Get the value of Retry-After in seconds. """
retry_after = response.getheader("Retry-After")
if retry_after is None:
return None
return self.parse_retry_after(retry_after)
def sleep_for_retry(self, response=None):
retry_after = self.get_retry_after(response)
if retry_after:
time.sleep(retry_after)
return True
return False
def _sleep_backoff(self):
backoff = self.get_backoff_time()
if backoff <= 0:
return
time.sleep(backoff)
def sleep(self, response=None):
""" Sleep between retry attempts.
This method will respect a server's ``Retry-After`` response header
and sleep the duration of the time requested. If that is not present, it
will use an exponential backoff. By default, the backoff factor is 0 and
this method will return immediately.
"""
if response:
slept = self.sleep_for_retry(response)
if slept:
return
self._sleep_backoff()
def _is_connection_error(self, err):
""" Errors when we're fairly sure that the server did not receive the
request, so it should be safe to retry.
"""
return isinstance(err, ConnectTimeoutError)
def _is_read_error(self, err):
""" Errors that occur after the request has been started, so we should
assume that the server began processing it.
"""
return isinstance(err, (ReadTimeoutError, ProtocolError))
def _is_method_retryable(self, method):
""" Checks if a given HTTP method should be retried upon, depending if
it is included on the method whitelist.
"""
if self.method_whitelist and method.upper() not in self.method_whitelist:
return False
return True
def is_retry(self, method, status_code, has_retry_after=False):
""" Is this method/status code retryable? (Based on whitelists and control
variables such as the number of total retries to allow, whether to
respect the Retry-After header, whether this header is present, and
whether the returned status code is on the list of status codes to
be retried upon on the presence of the aforementioned header)
"""
if not self._is_method_retryable(method):
return False
if self.status_forcelist and status_code in self.status_forcelist:
return True
return (self.total and self.respect_retry_after_header and
has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES))
def is_exhausted(self):
""" Are we out of retries? """
retry_counts = (self.total, self.connect, self.read, self.redirect, self.status)
retry_counts = list(filter(None, retry_counts))
if not retry_counts:
return False
return min(retry_counts) < 0
def increment(self, method=None, url=None, response=None, error=None,
_pool=None, _stacktrace=None):
""" Return a new Retry object with incremented retry counters.
:param response: A response object, or None, if the server did not
return a response.
:type response: :class:`~urllib3.response.HTTPResponse`
:param Exception error: An error encountered during the request, or
None if the response was received successfully.
:return: A new ``Retry`` object.
"""
if self.total is False and error:
# Disabled, indicate to re-raise the error.
raise six.reraise(type(error), error, _stacktrace)
total = self.total
if total is not None:
total -= 1
connect = self.connect
read = self.read
redirect = self.redirect
status_count = self.status
cause = 'unknown'
status = None
redirect_location = None
if error and self._is_connection_error(error):
# Connect retry?
if connect is False:
raise six.reraise(type(error), error, _stacktrace)
elif connect is not None:
connect -= 1
elif error and self._is_read_error(error):
# Read retry?
if read is False or not self._is_method_retryable(method):
raise six.reraise(type(error), error, _stacktrace)
elif read is not None:
read -= 1
elif response and response.get_redirect_location():
# Redirect retry?
if redirect is not None:
redirect -= 1
cause = 'too many redirects'
redirect_location = response.get_redirect_location()
status = response.status
else:
# Incrementing because of a server error like a 500 in
# status_forcelist and a the given method is in the whitelist
cause = ResponseError.GENERIC_ERROR
if response and response.status:
if status_count is not None:
status_count -= 1
cause = ResponseError.SPECIFIC_ERROR.format(
status_code=response.status)
status = response.status
history = self.history + (RequestHistory(method, url, error, status, redirect_location),)
new_retry = self.new(
total=total,
connect=connect, read=read, redirect=redirect, status=status_count,
history=history)
if new_retry.is_exhausted():
raise MaxRetryError(_pool, url, error or ResponseError(cause))
log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
return new_retry
def __repr__(self):
return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
'read={self.read}, redirect={self.redirect}, status={self.status})').format(
cls=type(self), self=self)
# For backwards compatibility (equivalent to pre-v1.9):
Retry.DEFAULT = Retry(3)

View file

@ -0,0 +1,581 @@
# Backport of selectors.py from Python 3.5+ to support Python < 3.4
# Also has the behavior specified in PEP 475 which is to retry syscalls
# in the case of an EINTR error. This module is required because selectors34
# does not follow this behavior and instead returns that no dile descriptor
# events have occurred rather than retry the syscall. The decision to drop
# support for select.devpoll is made to maintain 100% test coverage.
import errno
import math
import select
import socket
import sys
import time
from collections import namedtuple, Mapping
try:
monotonic = time.monotonic
except (AttributeError, ImportError): # Python 3.3<
monotonic = time.time
EVENT_READ = (1 << 0)
EVENT_WRITE = (1 << 1)
HAS_SELECT = True # Variable that shows whether the platform has a selector.
_SYSCALL_SENTINEL = object() # Sentinel in case a system call returns None.
_DEFAULT_SELECTOR = None
class SelectorError(Exception):
def __init__(self, errcode):
super(SelectorError, self).__init__()
self.errno = errcode
def __repr__(self):
return "<SelectorError errno={0}>".format(self.errno)
def __str__(self):
return self.__repr__()
def _fileobj_to_fd(fileobj):
""" Return a file descriptor from a file object. If
given an integer will simply return that integer back. """
if isinstance(fileobj, int):
fd = fileobj
else:
try:
fd = int(fileobj.fileno())
except (AttributeError, TypeError, ValueError):
raise ValueError("Invalid file object: {0!r}".format(fileobj))
if fd < 0:
raise ValueError("Invalid file descriptor: {0}".format(fd))
return fd
# Determine which function to use to wrap system calls because Python 3.5+
# already handles the case when system calls are interrupted.
if sys.version_info >= (3, 5):
def _syscall_wrapper(func, _, *args, **kwargs):
""" This is the short-circuit version of the below logic
because in Python 3.5+ all system calls automatically restart
and recalculate their timeouts. """
try:
return func(*args, **kwargs)
except (OSError, IOError, select.error) as e:
errcode = None
if hasattr(e, "errno"):
errcode = e.errno
raise SelectorError(errcode)
else:
def _syscall_wrapper(func, recalc_timeout, *args, **kwargs):
""" Wrapper function for syscalls that could fail due to EINTR.
All functions should be retried if there is time left in the timeout
in accordance with PEP 475. """
timeout = kwargs.get("timeout", None)
if timeout is None:
expires = None
recalc_timeout = False
else:
timeout = float(timeout)
if timeout < 0.0: # Timeout less than 0 treated as no timeout.
expires = None
else:
expires = monotonic() + timeout
args = list(args)
if recalc_timeout and "timeout" not in kwargs:
raise ValueError(
"Timeout must be in args or kwargs to be recalculated")
result = _SYSCALL_SENTINEL
while result is _SYSCALL_SENTINEL:
try:
result = func(*args, **kwargs)
# OSError is thrown by select.select
# IOError is thrown by select.epoll.poll
# select.error is thrown by select.poll.poll
# Aren't we thankful for Python 3.x rework for exceptions?
except (OSError, IOError, select.error) as e:
# select.error wasn't a subclass of OSError in the past.
errcode = None
if hasattr(e, "errno"):
errcode = e.errno
elif hasattr(e, "args"):
errcode = e.args[0]
# Also test for the Windows equivalent of EINTR.
is_interrupt = (errcode == errno.EINTR or (hasattr(errno, "WSAEINTR") and
errcode == errno.WSAEINTR))
if is_interrupt:
if expires is not None:
current_time = monotonic()
if current_time > expires:
raise OSError(errno=errno.ETIMEDOUT)
if recalc_timeout:
if "timeout" in kwargs:
kwargs["timeout"] = expires - current_time
continue
if errcode:
raise SelectorError(errcode)
else:
raise
return result
SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data'])
class _SelectorMapping(Mapping):
""" Mapping of file objects to selector keys """
def __init__(self, selector):
self._selector = selector
def __len__(self):
return len(self._selector._fd_to_key)
def __getitem__(self, fileobj):
try:
fd = self._selector._fileobj_lookup(fileobj)
return self._selector._fd_to_key[fd]
except KeyError:
raise KeyError("{0!r} is not registered.".format(fileobj))
def __iter__(self):
return iter(self._selector._fd_to_key)
class BaseSelector(object):
""" Abstract Selector class
A selector supports registering file objects to be monitored
for specific I/O events.
A file object is a file descriptor or any object with a
`fileno()` method. An arbitrary object can be attached to the
file object which can be used for example to store context info,
a callback, etc.
A selector can use various implementations (select(), poll(), epoll(),
and kqueue()) depending on the platform. The 'DefaultSelector' class uses
the most efficient implementation for the current platform.
"""
def __init__(self):
# Maps file descriptors to keys.
self._fd_to_key = {}
# Read-only mapping returned by get_map()
self._map = _SelectorMapping(self)
def _fileobj_lookup(self, fileobj):
""" Return a file descriptor from a file object.
This wraps _fileobj_to_fd() to do an exhaustive
search in case the object is invalid but we still
have it in our map. Used by unregister() so we can
unregister an object that was previously registered
even if it is closed. It is also used by _SelectorMapping
"""
try:
return _fileobj_to_fd(fileobj)
except ValueError:
# Search through all our mapped keys.
for key in self._fd_to_key.values():
if key.fileobj is fileobj:
return key.fd
# Raise ValueError after all.
raise
def register(self, fileobj, events, data=None):
""" Register a file object for a set of events to monitor. """
if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)):
raise ValueError("Invalid events: {0!r}".format(events))
key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data)
if key.fd in self._fd_to_key:
raise KeyError("{0!r} (FD {1}) is already registered"
.format(fileobj, key.fd))
self._fd_to_key[key.fd] = key
return key
def unregister(self, fileobj):
""" Unregister a file object from being monitored. """
try:
key = self._fd_to_key.pop(self._fileobj_lookup(fileobj))
except KeyError:
raise KeyError("{0!r} is not registered".format(fileobj))
# Getting the fileno of a closed socket on Windows errors with EBADF.
except socket.error as e: # Platform-specific: Windows.
if e.errno != errno.EBADF:
raise
else:
for key in self._fd_to_key.values():
if key.fileobj is fileobj:
self._fd_to_key.pop(key.fd)
break
else:
raise KeyError("{0!r} is not registered".format(fileobj))
return key
def modify(self, fileobj, events, data=None):
""" Change a registered file object monitored events and data. """
# NOTE: Some subclasses optimize this operation even further.
try:
key = self._fd_to_key[self._fileobj_lookup(fileobj)]
except KeyError:
raise KeyError("{0!r} is not registered".format(fileobj))
if events != key.events:
self.unregister(fileobj)
key = self.register(fileobj, events, data)
elif data != key.data:
# Use a shortcut to update the data.
key = key._replace(data=data)
self._fd_to_key[key.fd] = key
return key
def select(self, timeout=None):
""" Perform the actual selection until some monitored file objects
are ready or the timeout expires. """
raise NotImplementedError()
def close(self):
""" Close the selector. This must be called to ensure that all
underlying resources are freed. """
self._fd_to_key.clear()
self._map = None
def get_key(self, fileobj):
""" Return the key associated with a registered file object. """
mapping = self.get_map()
if mapping is None:
raise RuntimeError("Selector is closed")
try:
return mapping[fileobj]
except KeyError:
raise KeyError("{0!r} is not registered".format(fileobj))
def get_map(self):
""" Return a mapping of file objects to selector keys """
return self._map
def _key_from_fd(self, fd):
""" Return the key associated to a given file descriptor
Return None if it is not found. """
try:
return self._fd_to_key[fd]
except KeyError:
return None
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
# Almost all platforms have select.select()
if hasattr(select, "select"):
class SelectSelector(BaseSelector):
""" Select-based selector. """
def __init__(self):
super(SelectSelector, self).__init__()
self._readers = set()
self._writers = set()
def register(self, fileobj, events, data=None):
key = super(SelectSelector, self).register(fileobj, events, data)
if events & EVENT_READ:
self._readers.add(key.fd)
if events & EVENT_WRITE:
self._writers.add(key.fd)
return key
def unregister(self, fileobj):
key = super(SelectSelector, self).unregister(fileobj)
self._readers.discard(key.fd)
self._writers.discard(key.fd)
return key
def _select(self, r, w, timeout=None):
""" Wrapper for select.select because timeout is a positional arg """
return select.select(r, w, [], timeout)
def select(self, timeout=None):
# Selecting on empty lists on Windows errors out.
if not len(self._readers) and not len(self._writers):
return []
timeout = None if timeout is None else max(timeout, 0.0)
ready = []
r, w, _ = _syscall_wrapper(self._select, True, self._readers,
self._writers, timeout)
r = set(r)
w = set(w)
for fd in r | w:
events = 0
if fd in r:
events |= EVENT_READ
if fd in w:
events |= EVENT_WRITE
key = self._key_from_fd(fd)
if key:
ready.append((key, events & key.events))
return ready
if hasattr(select, "poll"):
class PollSelector(BaseSelector):
""" Poll-based selector """
def __init__(self):
super(PollSelector, self).__init__()
self._poll = select.poll()
def register(self, fileobj, events, data=None):
key = super(PollSelector, self).register(fileobj, events, data)
event_mask = 0
if events & EVENT_READ:
event_mask |= select.POLLIN
if events & EVENT_WRITE:
event_mask |= select.POLLOUT
self._poll.register(key.fd, event_mask)
return key
def unregister(self, fileobj):
key = super(PollSelector, self).unregister(fileobj)
self._poll.unregister(key.fd)
return key
def _wrap_poll(self, timeout=None):
""" Wrapper function for select.poll.poll() so that
_syscall_wrapper can work with only seconds. """
if timeout is not None:
if timeout <= 0:
timeout = 0
else:
# select.poll.poll() has a resolution of 1 millisecond,
# round away from zero to wait *at least* timeout seconds.
timeout = math.ceil(timeout * 1e3)
result = self._poll.poll(timeout)
return result
def select(self, timeout=None):
ready = []
fd_events = _syscall_wrapper(self._wrap_poll, True, timeout=timeout)
for fd, event_mask in fd_events:
events = 0
if event_mask & ~select.POLLIN:
events |= EVENT_WRITE
if event_mask & ~select.POLLOUT:
events |= EVENT_READ
key = self._key_from_fd(fd)
if key:
ready.append((key, events & key.events))
return ready
if hasattr(select, "epoll"):
class EpollSelector(BaseSelector):
""" Epoll-based selector """
def __init__(self):
super(EpollSelector, self).__init__()
self._epoll = select.epoll()
def fileno(self):
return self._epoll.fileno()
def register(self, fileobj, events, data=None):
key = super(EpollSelector, self).register(fileobj, events, data)
events_mask = 0
if events & EVENT_READ:
events_mask |= select.EPOLLIN
if events & EVENT_WRITE:
events_mask |= select.EPOLLOUT
_syscall_wrapper(self._epoll.register, False, key.fd, events_mask)
return key
def unregister(self, fileobj):
key = super(EpollSelector, self).unregister(fileobj)
try:
_syscall_wrapper(self._epoll.unregister, False, key.fd)
except SelectorError:
# This can occur when the fd was closed since registry.
pass
return key
def select(self, timeout=None):
if timeout is not None:
if timeout <= 0:
timeout = 0.0
else:
# select.epoll.poll() has a resolution of 1 millisecond
# but luckily takes seconds so we don't need a wrapper
# like PollSelector. Just for better rounding.
timeout = math.ceil(timeout * 1e3) * 1e-3
timeout = float(timeout)
else:
timeout = -1.0 # epoll.poll() must have a float.
# We always want at least 1 to ensure that select can be called
# with no file descriptors registered. Otherwise will fail.
max_events = max(len(self._fd_to_key), 1)
ready = []
fd_events = _syscall_wrapper(self._epoll.poll, True,
timeout=timeout,
maxevents=max_events)
for fd, event_mask in fd_events:
events = 0
if event_mask & ~select.EPOLLIN:
events |= EVENT_WRITE
if event_mask & ~select.EPOLLOUT:
events |= EVENT_READ
key = self._key_from_fd(fd)
if key:
ready.append((key, events & key.events))
return ready
def close(self):
self._epoll.close()
super(EpollSelector, self).close()
if hasattr(select, "kqueue"):
class KqueueSelector(BaseSelector):
""" Kqueue / Kevent-based selector """
def __init__(self):
super(KqueueSelector, self).__init__()
self._kqueue = select.kqueue()
def fileno(self):
return self._kqueue.fileno()
def register(self, fileobj, events, data=None):
key = super(KqueueSelector, self).register(fileobj, events, data)
if events & EVENT_READ:
kevent = select.kevent(key.fd,
select.KQ_FILTER_READ,
select.KQ_EV_ADD)
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
if events & EVENT_WRITE:
kevent = select.kevent(key.fd,
select.KQ_FILTER_WRITE,
select.KQ_EV_ADD)
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
return key
def unregister(self, fileobj):
key = super(KqueueSelector, self).unregister(fileobj)
if key.events & EVENT_READ:
kevent = select.kevent(key.fd,
select.KQ_FILTER_READ,
select.KQ_EV_DELETE)
try:
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
except SelectorError:
pass
if key.events & EVENT_WRITE:
kevent = select.kevent(key.fd,
select.KQ_FILTER_WRITE,
select.KQ_EV_DELETE)
try:
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
except SelectorError:
pass
return key
def select(self, timeout=None):
if timeout is not None:
timeout = max(timeout, 0)
max_events = len(self._fd_to_key) * 2
ready_fds = {}
kevent_list = _syscall_wrapper(self._kqueue.control, True,
None, max_events, timeout)
for kevent in kevent_list:
fd = kevent.ident
event_mask = kevent.filter
events = 0
if event_mask == select.KQ_FILTER_READ:
events |= EVENT_READ
if event_mask == select.KQ_FILTER_WRITE:
events |= EVENT_WRITE
key = self._key_from_fd(fd)
if key:
if key.fd not in ready_fds:
ready_fds[key.fd] = (key, events & key.events)
else:
old_events = ready_fds[key.fd][1]
ready_fds[key.fd] = (key, (events | old_events) & key.events)
return list(ready_fds.values())
def close(self):
self._kqueue.close()
super(KqueueSelector, self).close()
if not hasattr(select, 'select'): # Platform-specific: AppEngine
HAS_SELECT = False
def _can_allocate(struct):
""" Checks that select structs can be allocated by the underlying
operating system, not just advertised by the select module. We don't
check select() because we'll be hopeful that most platforms that
don't have it available will not advertise it. (ie: GAE) """
try:
# select.poll() objects won't fail until used.
if struct == 'poll':
p = select.poll()
p.poll(0)
# All others will fail on allocation.
else:
getattr(select, struct)().close()
return True
except (OSError, AttributeError) as e:
return False
# Choose the best implementation, roughly:
# kqueue == epoll > poll > select. Devpoll not supported. (See above)
# select() also can't accept a FD > FD_SETSIZE (usually around 1024)
def DefaultSelector():
""" This function serves as a first call for DefaultSelector to
detect if the select module is being monkey-patched incorrectly
by eventlet, greenlet, and preserve proper behavior. """
global _DEFAULT_SELECTOR
if _DEFAULT_SELECTOR is None:
if _can_allocate('kqueue'):
_DEFAULT_SELECTOR = KqueueSelector
elif _can_allocate('epoll'):
_DEFAULT_SELECTOR = EpollSelector
elif _can_allocate('poll'):
_DEFAULT_SELECTOR = PollSelector
elif hasattr(select, 'select'):
_DEFAULT_SELECTOR = SelectSelector
else: # Platform-specific: AppEngine
raise ValueError('Platform does not have a selector')
return _DEFAULT_SELECTOR()

341
lib/urllib3/util/ssl_.py Normal file
View file

@ -0,0 +1,341 @@
from __future__ import absolute_import
import errno
import warnings
import hmac
from binascii import hexlify, unhexlify
from hashlib import md5, sha1, sha256
from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning
SSLContext = None
HAS_SNI = False
IS_PYOPENSSL = False
IS_SECURETRANSPORT = False
# Maps the length of a digest to a possible hash function producing this digest
HASHFUNC_MAP = {
32: md5,
40: sha1,
64: sha256,
}
def _const_compare_digest_backport(a, b):
"""
Compare two digests of equal length in constant time.
The digests must be of type str/bytes.
Returns True if the digests match, and False otherwise.
"""
result = abs(len(a) - len(b))
for l, r in zip(bytearray(a), bytearray(b)):
result |= l ^ r
return result == 0
_const_compare_digest = getattr(hmac, 'compare_digest',
_const_compare_digest_backport)
try: # Test for SSL features
import ssl
from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
from ssl import HAS_SNI # Has SNI?
except ImportError:
pass
try:
from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
except ImportError:
OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
OP_NO_COMPRESSION = 0x20000
# A secure default.
# Sources for more information on TLS ciphers:
#
# - https://wiki.mozilla.org/Security/Server_Side_TLS
# - https://www.ssllabs.com/projects/best-practices/index.html
# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
#
# The general intent is:
# - Prefer TLS 1.3 cipher suites
# - prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
# - prefer ECDHE over DHE for better performance,
# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and
# security,
# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common,
# - disable NULL authentication, MD5 MACs and DSS for security reasons.
DEFAULT_CIPHERS = ':'.join([
'TLS13-AES-256-GCM-SHA384',
'TLS13-CHACHA20-POLY1305-SHA256',
'TLS13-AES-128-GCM-SHA256',
'ECDH+AESGCM',
'ECDH+CHACHA20',
'DH+AESGCM',
'DH+CHACHA20',
'ECDH+AES256',
'DH+AES256',
'ECDH+AES128',
'DH+AES',
'RSA+AESGCM',
'RSA+AES',
'!aNULL',
'!eNULL',
'!MD5',
])
try:
from ssl import SSLContext # Modern SSL?
except ImportError:
import sys
class SSLContext(object): # Platform-specific: Python 2 & 3.1
supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or
(3, 2) <= sys.version_info)
def __init__(self, protocol_version):
self.protocol = protocol_version
# Use default values from a real SSLContext
self.check_hostname = False
self.verify_mode = ssl.CERT_NONE
self.ca_certs = None
self.options = 0
self.certfile = None
self.keyfile = None
self.ciphers = None
def load_cert_chain(self, certfile, keyfile):
self.certfile = certfile
self.keyfile = keyfile
def load_verify_locations(self, cafile=None, capath=None):
self.ca_certs = cafile
if capath is not None:
raise SSLError("CA directories not supported in older Pythons")
def set_ciphers(self, cipher_suite):
if not self.supports_set_ciphers:
raise TypeError(
'Your version of Python does not support setting '
'a custom cipher suite. Please upgrade to Python '
'2.7, 3.2, or later if you need this functionality.'
)
self.ciphers = cipher_suite
def wrap_socket(self, socket, server_hostname=None, server_side=False):
warnings.warn(
'A true SSLContext object is not available. This prevents '
'urllib3 from configuring SSL appropriately and may cause '
'certain SSL connections to fail. You can upgrade to a newer '
'version of Python to solve this. For more information, see '
'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
'#ssl-warnings',
InsecurePlatformWarning
)
kwargs = {
'keyfile': self.keyfile,
'certfile': self.certfile,
'ca_certs': self.ca_certs,
'cert_reqs': self.verify_mode,
'ssl_version': self.protocol,
'server_side': server_side,
}
if self.supports_set_ciphers: # Platform-specific: Python 2.7+
return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
else: # Platform-specific: Python 2.6
return wrap_socket(socket, **kwargs)
def assert_fingerprint(cert, fingerprint):
"""
Checks if given fingerprint matches the supplied certificate.
:param cert:
Certificate as bytes object.
:param fingerprint:
Fingerprint as string of hexdigits, can be interspersed by colons.
"""
fingerprint = fingerprint.replace(':', '').lower()
digest_length = len(fingerprint)
hashfunc = HASHFUNC_MAP.get(digest_length)
if not hashfunc:
raise SSLError(
'Fingerprint of invalid length: {0}'.format(fingerprint))
# We need encode() here for py32; works on py2 and p33.
fingerprint_bytes = unhexlify(fingerprint.encode())
cert_digest = hashfunc(cert).digest()
if not _const_compare_digest(cert_digest, fingerprint_bytes):
raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
.format(fingerprint, hexlify(cert_digest)))
def resolve_cert_reqs(candidate):
"""
Resolves the argument to a numeric constant, which can be passed to
the wrap_socket function/method from the ssl module.
Defaults to :data:`ssl.CERT_NONE`.
If given a string it is assumed to be the name of the constant in the
:mod:`ssl` module or its abbrevation.
(So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
If it's neither `None` nor a string we assume it is already the numeric
constant which can directly be passed to wrap_socket.
"""
if candidate is None:
return CERT_NONE
if isinstance(candidate, str):
res = getattr(ssl, candidate, None)
if res is None:
res = getattr(ssl, 'CERT_' + candidate)
return res
return candidate
def resolve_ssl_version(candidate):
"""
like resolve_cert_reqs
"""
if candidate is None:
return PROTOCOL_SSLv23
if isinstance(candidate, str):
res = getattr(ssl, candidate, None)
if res is None:
res = getattr(ssl, 'PROTOCOL_' + candidate)
return res
return candidate
def create_urllib3_context(ssl_version=None, cert_reqs=None,
options=None, ciphers=None):
"""All arguments have the same meaning as ``ssl_wrap_socket``.
By default, this function does a lot of the same work that
``ssl.create_default_context`` does on Python 3.4+. It:
- Disables SSLv2, SSLv3, and compression
- Sets a restricted set of server ciphers
If you wish to enable SSLv3, you can do::
from urllib3.util import ssl_
context = ssl_.create_urllib3_context()
context.options &= ~ssl_.OP_NO_SSLv3
You can do the same to enable compression (substituting ``COMPRESSION``
for ``SSLv3`` in the last line above).
:param ssl_version:
The desired protocol version to use. This will default to
PROTOCOL_SSLv23 which will negotiate the highest protocol that both
the server and your installation of OpenSSL support.
:param cert_reqs:
Whether to require the certificate verification. This defaults to
``ssl.CERT_REQUIRED``.
:param options:
Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
:param ciphers:
Which cipher suites to allow the server to select.
:returns:
Constructed SSLContext object with specified options
:rtype: SSLContext
"""
context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23)
# Setting the default here, as we may have no ssl module on import
cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
if options is None:
options = 0
# SSLv2 is easily broken and is considered harmful and dangerous
options |= OP_NO_SSLv2
# SSLv3 has several problems and is now dangerous
options |= OP_NO_SSLv3
# Disable compression to prevent CRIME attacks for OpenSSL 1.0+
# (issue #309)
options |= OP_NO_COMPRESSION
context.options |= options
if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6
context.set_ciphers(ciphers or DEFAULT_CIPHERS)
context.verify_mode = cert_reqs
if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2
# We do our own verification, including fingerprints and alternative
# hostnames. So disable it here
context.check_hostname = False
return context
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
"""
All arguments except for server_hostname, ssl_context, and ca_cert_dir have
the same meaning as they do when using :func:`ssl.wrap_socket`.
:param server_hostname:
When SNI is supported, the expected hostname of the certificate
:param ssl_context:
A pre-made :class:`SSLContext` object. If none is provided, one will
be created using :func:`create_urllib3_context`.
:param ciphers:
A string of ciphers we wish the client to support. This is not
supported on Python 2.6 as the ssl module does not support it.
:param ca_cert_dir:
A directory containing CA certificates in multiple separate files, as
supported by OpenSSL's -CApath flag or the capath argument to
SSLContext.load_verify_locations().
"""
context = ssl_context
if context is None:
# Note: This branch of code and all the variables in it are no longer
# used by urllib3 itself. We should consider deprecating and removing
# this code.
context = create_urllib3_context(ssl_version, cert_reqs,
ciphers=ciphers)
if ca_certs or ca_cert_dir:
try:
context.load_verify_locations(ca_certs, ca_cert_dir)
except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2
raise SSLError(e)
# Py33 raises FileNotFoundError which subclasses OSError
# These are not equivalent unless we check the errno attribute
except OSError as e: # Platform-specific: Python 3.3 and beyond
if e.errno == errno.ENOENT:
raise SSLError(e)
raise
elif getattr(context, 'load_default_certs', None) is not None:
# try to load OS default certs; works well on Windows (require Python3.4+)
context.load_default_certs()
if certfile:
context.load_cert_chain(certfile, keyfile)
if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
return context.wrap_socket(sock, server_hostname=server_hostname)
warnings.warn(
'An HTTPS request has been made, but the SNI (Subject Name '
'Indication) extension to TLS is not available on this platform. '
'This may cause the server to present an incorrect TLS '
'certificate, which can cause validation failures. You can upgrade to '
'a newer version of Python to solve this. For more information, see '
'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
'#ssl-warnings',
SNIMissingWarning
)
return context.wrap_socket(sock)

242
lib/urllib3/util/timeout.py Normal file
View file

@ -0,0 +1,242 @@
from __future__ import absolute_import
# The default socket timeout, used by httplib to indicate that no timeout was
# specified by the user
from socket import _GLOBAL_DEFAULT_TIMEOUT
import time
from ..exceptions import TimeoutStateError
# A sentinel value to indicate that no timeout was specified by the user in
# urllib3
_Default = object()
# Use time.monotonic if available.
current_time = getattr(time, "monotonic", time.time)
class Timeout(object):
""" Timeout configuration.
Timeouts can be defined as a default for a pool::
timeout = Timeout(connect=2.0, read=7.0)
http = PoolManager(timeout=timeout)
response = http.request('GET', 'http://example.com/')
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
Timeouts can be disabled by setting all the parameters to ``None``::
no_timeout = Timeout(connect=None, read=None)
response = http.request('GET', 'http://example.com/, timeout=no_timeout)
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
:param connect:
The maximum amount of time to wait for a connection attempt to a server
to succeed. Omitting the parameter will default the connect timeout to
the system default, probably `the global default timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout for connection attempts.
:type connect: integer, float, or None
:param read:
The maximum amount of time to wait between consecutive
read operations for a response from the server. Omitting
the parameter will default the read timeout to the system
default, probably `the global default timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout.
:type read: integer, float, or None
.. note::
Many factors can affect the total amount of time for urllib3 to return
an HTTP response.
For example, Python's DNS resolver does not obey the timeout specified
on the socket. Other factors that can affect total request time include
high CPU load, high swap, the program running at a low priority level,
or other behaviors.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server,
not the total amount of time for the request to return a complete
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
of 20 seconds will not trigger, even though the request will take
several minutes to complete.
If your goal is to cut off any request after a set amount of wall clock
time, consider having a second "watcher" thread to cut off a slow
request.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
def __init__(self, total=None, connect=_Default, read=_Default):
self._connect = self._validate_timeout(connect, 'connect')
self._read = self._validate_timeout(read, 'read')
self.total = self._validate_timeout(total, 'total')
self._start_connect = None
def __str__(self):
return '%s(connect=%r, read=%r, total=%r)' % (
type(self).__name__, self._connect, self._read, self.total)
@classmethod
def _validate_timeout(cls, value, name):
""" Check that a timeout attribute is valid.
:param value: The timeout value to validate
:param name: The name of the timeout attribute to validate. This is
used to specify in error messages.
:return: The validated and casted version of the given value.
:raises ValueError: If it is a numeric value less than or equal to
zero, or the type is not an integer, float, or None.
"""
if value is _Default:
return cls.DEFAULT_TIMEOUT
if value is None or value is cls.DEFAULT_TIMEOUT:
return value
if isinstance(value, bool):
raise ValueError("Timeout cannot be a boolean value. It must "
"be an int, float or None.")
try:
float(value)
except (TypeError, ValueError):
raise ValueError("Timeout value %s was %s, but it must be an "
"int, float or None." % (name, value))
try:
if value <= 0:
raise ValueError("Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than or equal to 0." % (name, value))
except TypeError: # Python 3
raise ValueError("Timeout value %s was %s, but it must be an "
"int, float or None." % (name, value))
return value
@classmethod
def from_float(cls, timeout):
""" Create a new Timeout from a legacy timeout value.
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
object that sets the individual timeouts to the ``timeout`` value
passed to this function.
:param timeout: The legacy timeout value.
:type timeout: integer, float, sentinel default object, or None
:return: Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
def clone(self):
""" Create a copy of the timeout object
Timeout properties are stored per-pool but each request needs a fresh
Timeout object to ensure each one has its own start/stop configured.
:return: a copy of the timeout object
:rtype: :class:`Timeout`
"""
# We can't use copy.deepcopy because that will also create a new object
# for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
# detect the user default.
return Timeout(connect=self._connect, read=self._read,
total=self.total)
def start_connect(self):
""" Start the timeout clock, used during a connect() attempt
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to start a timer that has been started already.
"""
if self._start_connect is not None:
raise TimeoutStateError("Timeout timer has already been started.")
self._start_connect = current_time()
return self._start_connect
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
:return: Elapsed time.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
"""
if self._start_connect is None:
raise TimeoutStateError("Can't get connect duration for timer "
"that has not started.")
return current_time() - self._start_connect
@property
def connect_timeout(self):
""" Get the value to use when setting a connection timeout.
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
:return: Connect timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
return self._connect
if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
return self.total
return min(self._connect, self.total)
@property
def read_timeout(self):
""" Get the value for the read timeout.
This assumes some time has elapsed in the connection timeout and
computes the read timeout appropriately.
If self.total is set, the read timeout is dependent on the amount of
time taken by the connect timeout. If the connection time has not been
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
:return: Value to use for the read timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
"""
if (self.total is not None and
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
# In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(),
self._read))
elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
return max(0, self.total - self.get_connect_duration())
else:
return self._read

230
lib/urllib3/util/url.py Normal file
View file

@ -0,0 +1,230 @@
from __future__ import absolute_import
from collections import namedtuple
from ..exceptions import LocationParseError
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
# We only want to normalize urls with an HTTP(S) scheme.
# urllib3 infers URLs without a scheme (None) to be http.
NORMALIZABLE_SCHEMES = ('http', 'https', None)
class Url(namedtuple('Url', url_attrs)):
"""
Datastructure for representing an HTTP URL. Used as a return value for
:func:`parse_url`. Both the scheme and host are normalized as they are
both case-insensitive according to RFC 3986.
"""
__slots__ = ()
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
query=None, fragment=None):
if path and not path.startswith('/'):
path = '/' + path
if scheme:
scheme = scheme.lower()
if host and scheme in NORMALIZABLE_SCHEMES:
host = host.lower()
return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
query, fragment)
@property
def hostname(self):
"""For backwards-compatibility with urlparse. We're nice like that."""
return self.host
@property
def request_uri(self):
"""Absolute path including the query string."""
uri = self.path or '/'
if self.query is not None:
uri += '?' + self.query
return uri
@property
def netloc(self):
"""Network location including host and port"""
if self.port:
return '%s:%d' % (self.host, self.port)
return self.host
@property
def url(self):
"""
Convert self into a url
This function should more or less round-trip with :func:`.parse_url`. The
returned url may not be exactly the same as the url inputted to
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
with a blank port will have : removed).
Example: ::
>>> U = parse_url('http://google.com/mail/')
>>> U.url
'http://google.com/mail/'
>>> Url('http', 'username:password', 'host.com', 80,
... '/path', 'query', 'fragment').url
'http://username:password@host.com:80/path?query#fragment'
"""
scheme, auth, host, port, path, query, fragment = self
url = ''
# We use "is not None" we want things to happen with empty strings (or 0 port)
if scheme is not None:
url += scheme + '://'
if auth is not None:
url += auth + '@'
if host is not None:
url += host
if port is not None:
url += ':' + str(port)
if path is not None:
url += path
if query is not None:
url += '?' + query
if fragment is not None:
url += '#' + fragment
return url
def __str__(self):
return self.url
def split_first(s, delims):
"""
Given a string and an iterable of delimiters, split on the first found
delimiter. Return two split parts and the matched delimiter.
If not found, then the first part is the full input string.
Example::
>>> split_first('foo/bar?baz', '?/=')
('foo', 'bar?baz', '/')
>>> split_first('foo/bar?baz', '123')
('foo/bar?baz', '', None)
Scales linearly with number of delims. Not ideal for large number of delims.
"""
min_idx = None
min_delim = None
for d in delims:
idx = s.find(d)
if idx < 0:
continue
if min_idx is None or idx < min_idx:
min_idx = idx
min_delim = d
if min_idx is None or min_idx < 0:
return s, '', None
return s[:min_idx], s[min_idx + 1:], min_delim
def parse_url(url):
"""
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
performed to parse incomplete urls. Fields not provided will be None.
Partly backwards-compatible with :mod:`urlparse`.
Example::
>>> parse_url('http://google.com/mail/')
Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
>>> parse_url('google.com:80')
Url(scheme=None, host='google.com', port=80, path=None, ...)
>>> parse_url('/foo?bar')
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
"""
# While this code has overlap with stdlib's urlparse, it is much
# simplified for our needs and less annoying.
# Additionally, this implementations does silly things to be optimal
# on CPython.
if not url:
# Empty
return Url()
scheme = None
auth = None
host = None
port = None
path = None
fragment = None
query = None
# Scheme
if '://' in url:
scheme, url = url.split('://', 1)
# Find the earliest Authority Terminator
# (http://tools.ietf.org/html/rfc3986#section-3.2)
url, path_, delim = split_first(url, ['/', '?', '#'])
if delim:
# Reassemble the path
path = delim + path_
# Auth
if '@' in url:
# Last '@' denotes end of auth part
auth, url = url.rsplit('@', 1)
# IPv6
if url and url[0] == '[':
host, url = url.split(']', 1)
host += ']'
# Port
if ':' in url:
_host, port = url.split(':', 1)
if not host:
host = _host
if port:
# If given, ports must be integers. No whitespace, no plus or
# minus prefixes, no non-integer digits such as ^2 (superscript).
if not port.isdigit():
raise LocationParseError(url)
try:
port = int(port)
except ValueError:
raise LocationParseError(url)
else:
# Blank ports are cool, too. (rfc3986#section-3.2.3)
port = None
elif not host and url:
host = url
if not path:
return Url(scheme, auth, host, port, path, query, fragment)
# Fragment
if '#' in path:
path, fragment = path.split('#', 1)
# Query
if '?' in path:
path, query = path.split('?', 1)
return Url(scheme, auth, host, port, path, query, fragment)
def get_host(url):
"""
Deprecated. Use :func:`parse_url` instead.
"""
p = parse_url(url)
return p.scheme or 'http', p.hostname, p.port

40
lib/urllib3/util/wait.py Normal file
View file

@ -0,0 +1,40 @@
from .selectors import (
HAS_SELECT,
DefaultSelector,
EVENT_READ,
EVENT_WRITE
)
def _wait_for_io_events(socks, events, timeout=None):
""" Waits for IO events to be available from a list of sockets
or optionally a single socket if passed in. Returns a list of
sockets that can be interacted with immediately. """
if not HAS_SELECT:
raise ValueError('Platform does not have a selector')
if not isinstance(socks, list):
# Probably just a single socket.
if hasattr(socks, "fileno"):
socks = [socks]
# Otherwise it might be a non-list iterable.
else:
socks = list(socks)
with DefaultSelector() as selector:
for sock in socks:
selector.register(sock, events)
return [key[0].fileobj for key in
selector.select(timeout) if key[1] & events]
def wait_for_read(socks, timeout=None):
""" Waits for reading to be available from a list of sockets
or optionally a single socket if passed in. Returns a list of
sockets that can be read from immediately. """
return _wait_for_io_events(socks, EVENT_READ, timeout)
def wait_for_write(socks, timeout=None):
""" Waits for writing to be available from a list of sockets
or optionally a single socket if passed in. Returns a list of
sockets that can be written to immediately. """
return _wait_for_io_events(socks, EVENT_WRITE, timeout)