Merge branch 'nightly' into dependabot/pip/nightly/tempora-5.5.0

This commit is contained in:
JonnyWong16 2023-08-23 21:43:14 -07:00 committed by GitHub
commit af75759186
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
122 changed files with 7064 additions and 5942 deletions

View file

@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.11.2"
__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@ -38,11 +38,13 @@ from .builder import (
builder_registry,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
HTMLParserTreeBuilder
)
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
CSS,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
@ -348,26 +350,50 @@ class BeautifulSoup(Tag):
self.markup = None
self.builder.soup = None
def __copy__(self):
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
copy = type(self)(
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
def _clone(self):
"""Create a new BeautifulSoup object with the same TreeBuilder,
but not associated with any markup.
# Although we encoded the tree to UTF-8, that may not have
# been the encoding of the original markup. Set the copy's
# .original_encoding to reflect the original object's
# .original_encoding.
copy.original_encoding = self.original_encoding
return copy
This is the first step of the deepcopy process.
"""
clone = type(self)("", None, self.builder)
# Keep track of the encoding of the original document,
# since we won't be parsing it again.
clone.original_encoding = self.original_encoding
return clone
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
d['builder'] = None
d['builder'] = type(self.builder)
# Store the contents as a Unicode string.
d['contents'] = []
d['markup'] = self.decode()
# If _most_recent_element is present, it's a Tag object left
# over from initial parse. It might not be picklable and we
# don't need it.
if '_most_recent_element' in d:
del d['_most_recent_element']
return d
def __setstate__(self, state):
# If necessary, restore the TreeBuilder by looking it up.
self.__dict__ = state
if isinstance(self.builder, type):
self.builder = self.builder()
elif not self.builder:
# We don't know which builder was used to build this
# parse tree, so use a default we know is always available.
self.builder = HTMLParserTreeBuilder()
self.builder.soup = self
self.reset()
self._feed()
return state
@classmethod
def _decode_markup(cls, markup):
"""Ensure `markup` is bytes so it's safe to send into warnings.warn.
@ -468,6 +494,7 @@ class BeautifulSoup(Tag):
self.open_tag_counter = Counter()
self.preserve_whitespace_tag_stack = []
self.string_container_stack = []
self._most_recent_element = None
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@ -749,7 +776,7 @@ class BeautifulSoup(Tag):
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree
as an HTML or XML document.
@ -776,7 +803,7 @@ class BeautifulSoup(Tag):
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
indent_level, eventual_encoding, formatter, iterator)
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup

View file

@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
DetectsXMLParsedAsHTML,
ParserRejectedMarkup,
HTML,
HTMLTreeBuilder,
STRICT,
@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
self._initialize_xml_detector()
def error(self, message):
# NOTE: This method is required so long as Python 3.9 is
# supported. The corresponding code is removed from HTMLParser
# in 3.5, but not removed from ParserBase until 3.10.
# https://github.com/python/cpython/issues/76025
#
# The original implementation turned the error into a warning,
# but in every case I discovered, this made HTMLParser
# immediately crash with an error message that was less
# helpful than the warning. The new implementation makes it
# more clear that html.parser just can't parse this
# markup. The 3.10 implementation does the same, though it
# raises AssertionError rather than calling a method. (We
# catch this error and wrap it in a ParserRejectedMarkup.)
raise ParserRejectedMarkup(message)
def handle_startendtag(self, name, attrs):
"""Handle an incoming empty-element tag.
@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
try:
parser.feed(markup)
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = []

280
lib/bs4/css.py Normal file
View file

@ -0,0 +1,280 @@
"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
class CSS(object):
"""A proxy object against the soupsieve library, to simplify its
CSS selector API.
Acquire this object through the .css attribute on the
BeautifulSoup object, or on the Tag you want to use as the
starting point for a CSS selector.
The main advantage of doing this is that the tag to be selected
against doesn't need to be explicitly specified in the function
calls, since it's already scoped to a tag.
"""
def __init__(self, tag, api=soupsieve):
"""Constructor.
You don't need to instantiate this class yourself; instead,
access the .css attribute on the BeautifulSoup object, or on
the Tag you want to use as the starting point for your CSS
selector.
:param tag: All CSS selectors will use this as their starting
point.
:param api: A plug-in replacement for the soupsieve module,
designed mainly for use in tests.
"""
if api is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
self.api = api
self.tag = tag
def escape(self, ident):
"""Escape a CSS identifier.
This is a simple wrapper around soupselect.escape(). See the
documentation for that function for more information.
"""
if soupsieve is None:
raise NotImplementedError(
"Cannot escape CSS identifiers because the soupsieve package is not installed."
)
return self.api.escape(ident)
def _ns(self, ns, select):
"""Normalize a dictionary of namespaces."""
if not isinstance(select, self.api.SoupSieve) and ns is None:
# If the selector is a precompiled pattern, it already has
# a namespace context compiled in, which cannot be
# replaced.
ns = self.tag._namespaces
return ns
def _rs(self, results):
"""Normalize a list of results to a Resultset.
A ResultSet is more consistent with the rest of Beautiful
Soup's API, and ResultSet.__getattr__ has a helpful error
message if you try to treat a list of results as a single
result (a common mistake).
"""
# Import here to avoid circular import
from bs4.element import ResultSet
return ResultSet(None, results)
def compile(self, select, namespaces=None, flags=0, **kwargs):
"""Pre-compile a selector and return the compiled object.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.compile() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.compile() method.
:return: A precompiled selector object.
:rtype: soupsieve.SoupSieve
"""
return self.api.compile(
select, self._ns(namespaces, select), flags, **kwargs
)
def select_one(self, select, namespaces=None, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag and return the
first result.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select_one()
method.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select_one() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select_one() method.
:return: A Tag, or None if the selector has no match.
:rtype: bs4.element.Tag
"""
return self.api.select_one(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
if limit is None:
limit = 0
return self._rs(
self.api.select(
select, self.tag, self._ns(namespaces, select), limit, flags,
**kwargs
)
)
def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.iselect()
method. It is the same as select(), but it returns a generator
instead of a list.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.iselect() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.iselect() method.
:return: A generator
:rtype: types.GeneratorType
"""
return self.api.iselect(
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
)
def closest(self, select, namespaces=None, flags=0, **kwargs):
"""Find the Tag closest to this one that matches the given selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.closest()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.closest() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.closest() method.
:return: A Tag, or None if there is no match.
:rtype: bs4.Tag
"""
return self.api.closest(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def match(self, select, namespaces=None, flags=0, **kwargs):
"""Check whether this Tag matches the given CSS selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.match()
method.
:param: a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.match() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.match() method.
:return: True if this Tag matches the selector; False otherwise.
:rtype: bool
"""
return self.api.match(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def filter(self, select, namespaces=None, flags=0, **kwargs):
"""Filter this Tag's direct children based on the given CSS selector.
This uses the Soup Sieve library. It works the same way as
passing this Tag into that library's soupsieve.filter()
method. More information, for more information see the
documentation for soupsieve.filter().
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.filter() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.filter() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
return self._rs(
self.api.filter(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
)

View file

@ -59,21 +59,6 @@ def diagnose(data):
if hasattr(data, 'read'):
data = data.read()
elif data.startswith("http:") or data.startswith("https:"):
print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
else:
try:
if os.path.exists(data):
print(('"%s" looks like a filename. Reading data from the file.' % data))
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
print("")
for parser in basic_parsers:
print(("Trying to parse your markup with %s" % parser))

View file

@ -8,14 +8,8 @@ except ImportError as e:
import re
import sys
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
from bs4.css import CSS
from bs4.formatter import (
Formatter,
HTMLFormatter,
@ -154,6 +148,11 @@ class PageElement(object):
NavigableString, Tag, etc. are all subclasses of PageElement.
"""
# In general, we can't tell just by looking at an element whether
# it's contained in an XML document or an HTML document. But for
# Tags (q.v.) we can store this information at parse time.
known_xml = None
def setup(self, parent=None, previous_element=None, next_element=None,
previous_sibling=None, next_sibling=None):
"""Sets up the initial relations between this element and
@ -941,11 +940,6 @@ class NavigableString(str, PageElement):
PREFIX = ''
SUFFIX = ''
# We can't tell just by looking at a string whether it's contained
# in an XML document or an HTML document.
known_xml = None
def __new__(cls, value):
"""Create a new NavigableString.
@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
u.setup()
return u
def __copy__(self):
def __deepcopy__(self, memo, recursive=False):
"""A copy of a NavigableString has the same contents and class
as the original, but it is not connected to the parse tree.
:param recursive: This parameter is ignored; it's only defined
so that NavigableString.__deepcopy__ implements the same
signature as Tag.__deepcopy__.
"""
return type(self)(self)
def __copy__(self):
"""A copy of a NavigableString can only be a deep copy, because
only one PageElement can occupy a given place in a parse tree.
"""
return self.__deepcopy__({})
def __getnewargs__(self):
return (str(self),)
@ -1311,10 +1315,46 @@ class Tag(PageElement):
parserClass = _alias("parser_class") # BS3
def __copy__(self):
"""A copy of a Tag is a new Tag, unconnected to the parse tree.
def __deepcopy__(self, memo, recursive=True):
"""A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
Its contents are a copy of the old Tag's contents.
"""
clone = self._clone()
if recursive:
# Clone this tag's descendants recursively, but without
# making any recursive function calls.
tag_stack = [clone]
for event, element in self._event_stream(self.descendants):
if event is Tag.END_ELEMENT_EVENT:
# Stop appending incoming Tags to the Tag that was
# just closed.
tag_stack.pop()
else:
descendant_clone = element.__deepcopy__(
memo, recursive=False
)
# Add to its parent's .contents
tag_stack[-1].append(descendant_clone)
if event is Tag.START_ELEMENT_EVENT:
# Add the Tag itself to the stack so that its
# children will be .appended to it.
tag_stack.append(descendant_clone)
return clone
def __copy__(self):
"""A copy of a Tag must always be a deep copy, because a Tag's
children can only have one parent at a time.
"""
return self.__deepcopy__({})
def _clone(self):
"""Create a new Tag just like this one, but with no
contents and unattached to any parse tree.
This is the first step in the deepcopy process.
"""
clone = type(self)(
None, self.builder, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml,
@ -1326,8 +1366,6 @@ class Tag(PageElement):
)
for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr))
for child in self.contents:
clone.append(child.__copy__())
return clone
@property
@ -1650,28 +1688,178 @@ class Tag(PageElement):
def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
"""Render a Unicode representation of this PageElement and its
contents.
:param indent_level: Each line of the rendering will be
indented this many spaces. Used internally in
recursive calls while pretty-printing.
:param eventual_encoding: The tag is destined to be
encoded into this encoding. This method is _not_
responsible for performing that encoding. This information
is passed in so that it can be substituted in if the
document contains a <META> tag that mentions the document's
encoding.
:param formatter: A Formatter object, or a string naming one of
the standard formatters.
"""
formatter="minimal",
iterator=None):
pieces = []
# First off, turn a non-Formatter `formatter` into a Formatter
# object. This will stop the lookup from happening over and
# over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
if indent_level is True:
indent_level = 0
# The currently active tag that put us into string literal
# mode. Until this element is closed, children will be treated
# as string literals and not pretty-printed. String literal
# mode is turned on immediately after this tag begins, and
# turned off immediately before it's closed. This means there
# will be whitespace before and after the tag itself.
string_literal_tag = None
for event, element in self._event_stream(iterator):
if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
piece = element._format_tag(
eventual_encoding, formatter, opening=True
)
elif event is Tag.END_ELEMENT_EVENT:
piece = element._format_tag(
eventual_encoding, formatter, opening=False
)
if indent_level is not None:
indent_level -= 1
else:
piece = element.output_ready(formatter)
# Now we need to apply the 'prettiness' -- extra
# whitespace before and/or after this tag. This can get
# complicated because certain tags, like <pre> and
# <script>, can't be prettified, since adding whitespace would
# change the meaning of the content.
# The default behavior is to add whitespace before and
# after an element when string literal mode is off, and to
# leave things as they are when string literal mode is on.
if string_literal_tag:
indent_before = indent_after = False
else:
indent_before = indent_after = True
# The only time the behavior is more complex than that is
# when we encounter an opening or closing tag that might
# put us into or out of string literal mode.
if (event is Tag.START_ELEMENT_EVENT
and not string_literal_tag
and not element._should_pretty_print()):
# We are about to enter string literal mode. Add
# whitespace before this tag, but not after. We
# will stay in string literal mode until this tag
# is closed.
indent_before = True
indent_after = False
string_literal_tag = element
elif (event is Tag.END_ELEMENT_EVENT
and element is string_literal_tag):
# We are about to exit string literal mode by closing
# the tag that sent us into that mode. Add whitespace
# after this tag, but not before.
indent_before = False
indent_after = True
string_literal_tag = None
# Now we know whether to add whitespace before and/or
# after this element.
if indent_level is not None:
if (indent_before or indent_after):
if isinstance(element, NavigableString):
piece = piece.strip()
if piece:
piece = self._indent_string(
piece, indent_level, formatter,
indent_before, indent_after
)
if event == Tag.START_ELEMENT_EVENT:
indent_level += 1
pieces.append(piece)
return "".join(pieces)
# Names for the different events yielded by _event_stream
START_ELEMENT_EVENT = object()
END_ELEMENT_EVENT = object()
EMPTY_ELEMENT_EVENT = object()
STRING_ELEMENT_EVENT = object()
def _event_stream(self, iterator=None):
"""Yield a sequence of events that can be used to reconstruct the DOM
for this element.
This lets us recreate the nested structure of this element
(e.g. when formatting it as a string) without using recursive
method calls.
This is similar in concept to the SAX API, but it's a simpler
interface designed for internal use. The events are different
from SAX and the arguments associated with the events are Tags
and other Beautiful Soup objects.
:param iterator: An alternate iterator to use when traversing
the tree.
"""
tag_stack = []
iterator = iterator or self.self_and_descendants
for c in iterator:
# If the parent of the element we're about to yield is not
# the tag currently on the stack, it means that the tag on
# the stack closed before this element appeared.
while tag_stack and c.parent != tag_stack[-1]:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
if isinstance(c, Tag):
if c.is_empty_element:
yield Tag.EMPTY_ELEMENT_EVENT, c
else:
yield Tag.START_ELEMENT_EVENT, c
tag_stack.append(c)
continue
else:
yield Tag.STRING_ELEMENT_EVENT, c
while tag_stack:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
def _indent_string(self, s, indent_level, formatter,
indent_before, indent_after):
"""Add indentation whitespace before and/or after a string.
:param s: The string to amend with whitespace.
:param indent_level: The indentation level; affects how much
whitespace goes before the string.
:param indent_before: Whether or not to add whitespace
before the string.
:param indent_after: Whether or not to add whitespace
(a newline) after the string.
"""
space_before = ''
if indent_before and indent_level:
space_before = (formatter.indent * indent_level)
space_after = ''
if indent_after:
space_after = "\n"
return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening):
# A tag starts with the < character (see below).
# Then the / character, if this is a closing tag.
closing_slash = ''
if not opening:
closing_slash = '/'
# Then an optional namespace prefix.
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
# Then a list of attribute values, if this is an opening tag.
attribute_string = ''
if opening:
attributes = formatter.attributes(self)
attrs = []
for key, val in attributes:
@ -1693,63 +1881,19 @@ class Tag(PageElement):
str(key) + '='
+ formatter.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
closeTag = ''
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
if self.is_empty_element:
close = formatter.void_element_close_prefix or ''
else:
closeTag = '</%s%s>' % (prefix, self.name)
pretty_print = self._should_pretty_print(indent_level)
space = ''
indent_space = ''
if indent_level is not None:
indent_space = (formatter.indent * (indent_level - 1))
if pretty_print:
space = indent_space
indent_contents = indent_level + 1
else:
indent_contents = None
contents = self.decode_contents(
indent_contents, eventual_encoding, formatter
)
if self.hidden:
# This is the 'document root' object.
s = contents
else:
s = []
attribute_string = ''
if attrs:
attribute_string = ' ' + ' '.join(attrs)
if indent_level is not None:
# Even if this particular tag is not pretty-printed,
# we should indent up to the start of the tag.
s.append(indent_space)
s.append('<%s%s%s%s>' % (
prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)
if pretty_print and contents and contents[-1] != "\n":
s.append("\n")
if pretty_print and closeTag:
s.append(space)
s.append(closeTag)
if indent_level is not None and closeTag and self.next_sibling:
# Even if this particular tag is not pretty-printed,
# we're now done with the tag, and we should add a
# newline if appropriate.
s.append("\n")
s = ''.join(s)
return s
def _should_pretty_print(self, indent_level):
# Then an optional closing slash (for a void element in an
# XML document).
void_element_closing_slash = ''
if self.is_empty_element:
void_element_closing_slash = formatter.void_element_close_prefix or ''
# Put it all together.
return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
def _should_pretty_print(self, indent_level=1):
"""Should this tag be pretty-printed?
Most of them should, but some (such as <pre> in HTML
@ -1800,32 +1944,8 @@ class Tag(PageElement):
the standard Formatters.
"""
# First off, turn a string formatter into a Formatter object. This
# will stop the lookup from happening over and over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
pretty_print = (indent_level is not None)
s = []
for c in self:
text = None
if isinstance(c, NavigableString):
text = c.output_ready(formatter)
elif isinstance(c, Tag):
s.append(c.decode(indent_level, eventual_encoding,
formatter))
preserve_whitespace = (
self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
)
if text and indent_level and not preserve_whitespace:
text = text.strip()
if text:
if pretty_print and not preserve_whitespace:
s.append(formatter.indent * (indent_level - 1))
s.append(text)
if pretty_print and not preserve_whitespace:
s.append("\n")
return ''.join(s)
return self.decode(indent_level, eventual_encoding, formatter,
iterator=self.descendants)
def encode_contents(
self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
@ -1922,6 +2042,18 @@ class Tag(PageElement):
# return iter() to make the purpose of the method clear
return iter(self.contents) # XXX This seems to be untested.
@property
def self_and_descendants(self):
"""Iterate over this PageElement and its children in a
breadth-first sequence.
:yield: A sequence of PageElements.
"""
if not self.hidden:
yield self
for i in self.descendants:
yield i
@property
def descendants(self):
"""Iterate over all children of this PageElement in a
@ -1948,16 +2080,13 @@ class Tag(PageElement):
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param kwargs: Keyword arguments to be passed into SoupSieve's
:param kwargs: Keyword arguments to be passed into Soup Sieve's
soupsieve.select() method.
:return: A Tag.
:rtype: bs4.element.Tag
"""
value = self.select(selector, namespaces, 1, **kwargs)
if value:
return value[0]
return None
return self.css.select_one(selector, namespaces, **kwargs)
def select(self, selector, namespaces=None, limit=None, **kwargs):
"""Perform a CSS selection operation on the current element.
@ -1979,21 +2108,12 @@ class Tag(PageElement):
:return: A ResultSet of Tags.
:rtype: bs4.element.ResultSet
"""
if namespaces is None:
namespaces = self._namespaces
return self.css.select(selector, namespaces, limit, **kwargs)
if limit is None:
limit = 0
if soupsieve is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
# We do this because it's more consistent and because
# ResultSet.__getattr__ has a helpful error message.
return ResultSet(None, results)
@property
def css(self):
"""Return an interface to the CSS selector API."""
return CSS(self)
# Old names for backwards compatibility
def childGenerator(self):

View file

@ -298,37 +298,11 @@ class TreeBuilderSmokeTest(object):
)
assert soup.a['class'] == ['a', 'b', 'c']
def test_fuzzed_input(self):
# This test centralizes in one place the various fuzz tests
# for Beautiful Soup created by the oss-fuzz project.
# These strings superficially resemble markup, but they
# generally can't be parsed into anything. The best we can
# hope for is that parsing these strings won't crash the
# parser.
#
# n.b. This markup is commented out because these fuzz tests
# _do_ crash the parser. However the crashes are due to bugs
# in html.parser, not Beautiful Soup -- otherwise I'd fix the
# bugs!
bad_markup = [
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
# https://bugs.python.org/issue37747
#
#b'\n<![\xff\xfe\xfe\xcd\x00',
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
# https://bugs.python.org/issue34480
#
#b'<![n\x00'
]
for markup in bad_markup:
with warnings.catch_warnings(record=False):
def test_invalid_doctype(self):
markup = '<![if word]>content<![endif]>'
markup = '<!DOCTYPE html]ff>'
soup = self.soup(markup)
class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
"""A basic test of a treebuilder's competence.
@ -577,8 +551,8 @@ Hello, world!
"""Whitespace must be preserved in <pre> and <textarea> tags,
even if that would mean not prettifying the markup.
"""
pre_markup = "<pre> </pre>"
textarea_markup = "<textarea> woo\nwoo </textarea>"
pre_markup = "<pre>a z</pre>\n"
textarea_markup = "<textarea> woo\nwoo </textarea>\n"
self.assert_soup(pre_markup)
self.assert_soup(textarea_markup)
@ -589,7 +563,7 @@ Hello, world!
assert soup.textarea.prettify() == textarea_markup
soup = self.soup("<textarea></textarea>")
assert soup.textarea.prettify() == "<textarea></textarea>"
assert soup.textarea.prettify() == "<textarea></textarea>\n"
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""

View file

@ -0,0 +1 @@
˙<!DOCTyPEV PUBLIC'''Đ'

View file

@ -0,0 +1 @@
)<a><math><TR><a><mI><a><p><a>

View file

@ -0,0 +1 @@
-<math><sElect><mi><sElect><sElect>

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
ñ<table><svg><html>

487
lib/bs4/tests/test_css.py Normal file
View file

@ -0,0 +1,487 @@
import pytest
import types
from unittest.mock import MagicMock
from bs4 import (
CSS,
BeautifulSoup,
ResultSet,
)
from . import (
SoupTest,
SOUP_SIEVE_PRESENT,
)
if SOUP_SIEVE_PRESENT:
from soupsieve import SelectorSyntaxError
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assert_selects(self, selector, expected_ids, **kwargs):
results = self.soup.select(selector, **kwargs)
assert isinstance(results, ResultSet)
el_ids = [el['id'] for el in results]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
assertSelect = assert_selects
def assert_select_multiple(self, *tests):
for selector, expected_ids in tests:
self.assert_selects(selector, expected_ids)
def test_precompiled(self):
sel = self.soup.css.compile('div')
els = self.soup.select(sel)
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one(sel)
assert 'main' == el['id']
def test_one_tag_one(self):
els = self.soup.select('title')
assert len(els) == 1
assert els[0].name == 'title'
assert els[0].contents == ['The title']
def test_one_tag_many(self):
els = self.soup.select('div')
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one('div')
assert 'main' == el['id']
def test_select_one_returns_none_if_no_match(self):
match = self.soup.select_one('nonexistenttag')
assert None == match
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assert_selects('div div', ['inner', 'data1'])
def test_tag_in_tag_many(self):
for selector in ('html div', 'html body div', 'body div'):
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assert_selects('html div', ['main'], limit=1)
self.assert_selects('html body div', ['inner', 'main'], limit=2)
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
assert len(self.soup.select('del')) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('tag%t')
def test_select_dashed_tag_ids(self):
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
def test_select_dashed_by_id(self):
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
assert dashed[0].name == 'custom-dashed-tag'
assert dashed[0]['id'] == 'dash2'
def test_dashed_tag_text(self):
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
def test_select_dashed_matches_find_all(self):
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
def test_header_tags(self):
self.assert_select_multiple(
('h1', ['header1']),
('h2', ['header2', 'header3']),
)
def test_class_one(self):
for selector in ('.onep', 'p.onep', 'html p.onep'):
els = self.soup.select(selector)
assert len(els) == 1
assert els[0].name == 'p'
assert els[0]['class'] == ['onep']
def test_class_mismatched_tag(self):
els = self.soup.select('div.onep')
assert len(els) == 0
def test_one_id(self):
for selector in ('div#inner', '#inner', 'div div#inner'):
self.assert_selects(selector, ['inner'])
def test_bad_id(self):
els = self.soup.select('#doesnotexist')
assert len(els) == 0
def test_items_in_id(self):
els = self.soup.select('div#inner p')
assert len(els) == 3
for el in els:
assert el.name == 'p'
assert els[1]['class'] == ['onep']
assert not els[0].has_attr('class')
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
assert len(self.soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assert_selects(selector, ['pmulti'])
def test_multi_class_selection(self):
for selector in ('.class1.class3', '.class3.class2',
'.class1.class2.class3'):
self.assert_selects(selector, ['pmulti'])
def test_child_selector(self):
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
self.assert_selects('.s1 > a span', ['s1a2s1'])
def test_child_selector_id(self):
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
def test_attribute_equals(self):
self.assert_select_multiple(
('p[class="onep"]', ['p1']),
('p[id="p1"]', ['p1']),
('[class="onep"]', ['p1']),
('[id="p1"]', ['p1']),
('link[rel="stylesheet"]', ['l1']),
('link[type="text/css"]', ['l1']),
('link[href="blah.css"]', ['l1']),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ['l1']),
('[type="text/css"]', ['l1']),
('[href="blah.css"]', ['l1']),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_select_multiple(
('p[class~="class1"]', ['pmulti']),
('p[class~="class2"]', ['pmulti']),
('p[class~="class3"]', ['pmulti']),
('[class~="class1"]', ['pmulti']),
('[class~="class2"]', ['pmulti']),
('[class~="class3"]', ['pmulti']),
('a[rel~="friend"]', ['bob']),
('a[rel~="met"]', ['bob']),
('[rel~="friend"]', ['bob']),
('[rel~="met"]', ['bob']),
)
def test_attribute_startswith(self):
self.assert_select_multiple(
('[rel^="style"]', ['l1']),
('link[rel^="style"]', ['l1']),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ['l1']),
('a[href^="http://"]', ['bob', 'me']),
('[href^="http://"]', ['bob', 'me']),
('[id^="p"]', ['pmulti', 'p1']),
('[id^="m"]', ['me', 'main']),
('div[id^="m"]', ['main']),
('a[id^="m"]', ['me']),
('div[data-tag^="dashed"]', ['data1'])
)
def test_attribute_endswith(self):
self.assert_select_multiple(
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', ['data1']),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ['l1']),
('link[rel*="style"]', ['l1']),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
('a[id*="m"]', ['me']),
# From test_attribute_endswith
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', ['data1']),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ['bob', 'me', 'l1']),
('a[href*="."]', ['bob', 'me']),
('link[href*="."]', ['l1']),
('div[id*="n"]', ['main', 'inner']),
('div[id*="nn"]', ['inner']),
('div[data-tag*="edval"]', ['data1'])
)
def test_attribute_exact_or_hypen(self):
self.assert_select_multiple(
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('p[lang|="fr"]', ['lang-fr']),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_select_multiple(
('[rel]', ['l1', 'bob', 'me']),
('link[rel]', ['l1']),
('a[rel]', ['bob', 'me']),
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
('p[class]', ['p1', 'pmulti']),
('[blah]', []),
('p[blah]', []),
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(NotImplementedError):
self.soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self.soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
assert len(els) == 1
assert els[0].string == 'Another'
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
assert len(els) == 0
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
def test_id_child_selector_nth_of_type(self):
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self.soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ['inner', 'data1'])
def test_overspecified_child_id(self):
self.assert_selects(".fancy #inner", ['inner'])
self.assert_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_selects('#p1 + h2', ['header2'])
self.assert_selects('#p1 + h2 + p', ['pmulti'])
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
assert [] == self.soup.select('#p1 + p')
def test_general_sibling_selector(self):
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
self.assert_selects('#p1 ~ #header2', ['header2'])
self.assert_selects('#p1 ~ h2 + a', ['me'])
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
assert [] == self.soup.select('#inner ~ h2')
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_with_no_space(self):
self.assert_selects('x,y', ['xid', 'yid'])
def test_multiple_select_with_more_space(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_duplicated(self):
self.assert_selects('x, x', ['xid'])
def test_multiple_select_sibling(self):
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_selects('x, y > z', ['xid', 'zidb'])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_multiple_select_indirect_descendant(self):
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select(',x, y')
with pytest.raises(SelectorSyntaxError):
self.soup.select('x,,y')
def test_multiple_select_attrs(self):
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
def test_multiple_select_ids(self):
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
def test_multiple_select_nested(self):
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected
def test_closest(self):
inner = self.soup.find("div", id="inner")
closest = inner.css.closest("div[id=main]")
assert closest == self.soup.find("div", id="main")
def test_match(self):
inner = self.soup.find("div", id="inner")
main = self.soup.find("div", id="main")
assert inner.css.match("div[id=main]") == False
assert main.css.match("div[id=main]") == True
def test_iselect(self):
gen = self.soup.css.iselect("h2")
assert isinstance(gen, types.GeneratorType)
[header2, header3] = gen
assert header2['id'] == 'header2'
assert header3['id'] == 'header3'
def test_filter(self):
inner = self.soup.find("div", id="inner")
results = inner.css.filter("h2")
assert len(inner.css.filter("h2")) == 2
results = inner.css.filter("h2[id=header3]")
assert isinstance(results, ResultSet)
[result] = results
assert result['id'] == 'header3'
def test_escape(self):
m = self.soup.css.escape
assert m(".foo#bar") == '\\.foo\\#bar'
assert m("()[]{}") == '\\(\\)\\[\\]\\{\\}'
assert m(".foo") == self.soup.css.escape(".foo")

View file

@ -80,20 +80,20 @@ class TestFormatter(SoupTest):
@pytest.mark.parametrize(
"indent,expect",
[
(None, '<a>\n<b>\ntext\n</b>\n</a>'),
(-1, '<a>\n<b>\ntext\n</b>\n</a>'),
(0, '<a>\n<b>\ntext\n</b>\n</a>'),
("", '<a>\n<b>\ntext\n</b>\n</a>'),
(None, '<a>\n<b>\ntext\n</b>\n</a>\n'),
(-1, '<a>\n<b>\ntext\n</b>\n</a>\n'),
(0, '<a>\n<b>\ntext\n</b>\n</a>\n'),
("", '<a>\n<b>\ntext\n</b>\n</a>\n'),
(1, '<a>\n <b>\n text\n </b>\n</a>'),
(2, '<a>\n <b>\n text\n </b>\n</a>'),
(1, '<a>\n <b>\n text\n </b>\n</a>\n'),
(2, '<a>\n <b>\n text\n </b>\n</a>\n'),
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'),
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'),
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'),
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'),
# Some invalid inputs -- the default behavior is used.
(object(), '<a>\n <b>\n text\n </b>\n</a>'),
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>'),
(object(), '<a>\n <b>\n text\n </b>\n</a>\n'),
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>\n'),
]
)
def test_indent(self, indent, expect):

View file

@ -0,0 +1,91 @@
"""This file contains test cases reported by third parties using
fuzzing tools, primarily from Google's oss-fuzz project. Some of these
represent real problems with Beautiful Soup, but many are problems in
libraries that Beautiful Soup depends on, and many of the test cases
represent different ways of triggering the same problem.
Grouping these test cases together makes it easy to see which test
cases represent the same problem, and puts the test cases in close
proximity to code that can trigger the problems.
"""
import os
import pytest
from bs4 import (
BeautifulSoup,
ParserRejectedMarkup,
)
class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so
# they can be included in builds.
TESTCASE_SUFFIX = ".testcase"
# This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup
# instead.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
]
)
def test_rejected_markup(self, filename):
markup = self.__markup(filename)
with pytest.raises(ParserRejectedMarkup):
BeautifulSoup(markup, 'html.parser')
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
"clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
]
)
def test_deeply_nested_document(self, filename):
# Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has
# been fixed.
markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode()
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip("html5lib problems")
@pytest.mark.parametrize(
"filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
"clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320",
# b')<a><math><TR><a><mI><a><p><a>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456",
# b'-<math><sElect><mi><sElect><sElect>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
# b'ñ<table><svg><html>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
# <TABLE>, some ^@ characters, some <math> tags.
"clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744",
# Nested table
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
]
)
def test_html5lib_parse_errors(self, filename):
markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode())
def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX
this_dir = os.path.split(__file__)[0]
path = os.path.join(this_dir, 'fuzz', filename)
return open(path, 'rb').read()

View file

@ -3,9 +3,11 @@ trees."""
from pdb import set_trace
import pickle
import pytest
import warnings
from bs4.builder import (
HTMLParserTreeBuilder,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
)
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@ -15,6 +17,28 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
default_builder = HTMLParserTreeBuilder
def test_rejected_input(self):
# Python's html.parser will occasionally reject markup,
# especially when there is a problem with the initial DOCTYPE
# declaration. Different versions of Python sound the alarm in
# different ways, but Beautiful Soup consistently raises
# errors as ParserRejectedMarkup exceptions.
bad_markup = [
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
# https://github.com/python/cpython/issues/81928
b'\n<![\xff\xfe\xfe\xcd\x00',
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
# https://github.com/python/cpython/issues/78661
#
b'<![n\x00',
b"<![UNKNOWN[]]>",
]
for markup in bad_markup:
with pytest.raises(ParserRejectedMarkup):
soup = self.soup(markup)
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass

View file

@ -189,13 +189,15 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
assert soup.find('prefix:tag3').name == 'tag3'
assert soup.subtag.find('prefix:tag3').name == 'tag3'
def test_pickle_removes_builder(self):
# The lxml TreeBuilder is not picklable, so it won't be
# preserved in a pickle/unpickle operation.
def test_pickle_restores_builder(self):
# The lxml TreeBuilder is not picklable, so when unpickling
# a document created with it, a new TreeBuilder of the
# appropriate class is created.
soup = self.soup("<a>some markup</a>")
assert isinstance(soup.builder, self.default_builder)
pickled = pickle.dumps(soup)
unpickled = pickle.loads(pickled)
assert "some markup" == unpickled.a.string
assert unpickled.builder is None
assert unpickled.builder != soup.builder
assert isinstance(unpickled.builder, self.default_builder)

View file

@ -2,20 +2,18 @@
import copy
import pickle
import pytest
import sys
from bs4 import BeautifulSoup
from bs4.element import (
Comment,
ResultSet,
SoupStrainer,
)
from . import (
SoupTest,
SOUP_SIEVE_PRESENT,
)
if SOUP_SIEVE_PRESENT:
from soupsieve import SelectorSyntaxError
class TestEncoding(SoupTest):
"""Test the ability to encode objects into strings."""
@ -52,9 +50,20 @@ class TestEncoding(SoupTest):
encoding="utf8"
)
def test_encode_deeply_nested_document(self):
# This test verifies that encoding a string doesn't involve
# any recursive function calls. If it did, this test would
# overflow the Python interpreter stack.
limit = sys.getrecursionlimit() + 1
markup = "<span>" * limit
soup = self.soup(markup)
encoded = soup.encode()
assert limit == encoded.count(b"<span>")
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
soup.renderContents()
assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
def test_repr(self):
@ -159,7 +168,31 @@ class TestFormatters(SoupTest):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
# Everything outside the <pre> tag is reformatted, but everything
# inside is left alone.
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
def test_prettify_handles_nested_string_literal_tags(self):
# Most of this markup is inside a <pre> tag, so prettify()
# only does three things to it:
# 1. Add a newline and a space between the <div> and the <pre>
# 2. Add a newline after the </pre>
# 3. Add a newline at the end.
#
# The contents of the <pre> tag are left completely alone. In
# particular, we don't start adding whitespace again once we
# encounter the first </pre> tag, because we know it's not
# the one that put us into string literal mode.
markup = """<div><pre><code>some
<script><pre>code</pre></script> for you
</code></pre></div>"""
expect = """<div>
<pre><code>some
<script><pre>code</pre></script> for you
</code></pre>
</div>
"""
soup = self.soup(markup)
assert expect == soup.div.prettify()
def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
@ -216,429 +249,6 @@ class TestFormatters(SoupTest):
assert soup.contents[0].name == 'pre'
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assert_selects(self, selector, expected_ids, **kwargs):
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
assertSelect = assert_selects
def assert_select_multiple(self, *tests):
for selector, expected_ids in tests:
self.assert_selects(selector, expected_ids)
def test_one_tag_one(self):
els = self.soup.select('title')
assert len(els) == 1
assert els[0].name == 'title'
assert els[0].contents == ['The title']
def test_one_tag_many(self):
els = self.soup.select('div')
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one('div')
assert 'main' == el['id']
def test_select_one_returns_none_if_no_match(self):
match = self.soup.select_one('nonexistenttag')
assert None == match
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assert_selects('div div', ['inner', 'data1'])
def test_tag_in_tag_many(self):
for selector in ('html div', 'html body div', 'body div'):
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assert_selects('html div', ['main'], limit=1)
self.assert_selects('html body div', ['inner', 'main'], limit=2)
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
assert len(self.soup.select('del')) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('tag%t')
def test_select_dashed_tag_ids(self):
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
def test_select_dashed_by_id(self):
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
assert dashed[0].name == 'custom-dashed-tag'
assert dashed[0]['id'] == 'dash2'
def test_dashed_tag_text(self):
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
def test_select_dashed_matches_find_all(self):
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
def test_header_tags(self):
self.assert_select_multiple(
('h1', ['header1']),
('h2', ['header2', 'header3']),
)
def test_class_one(self):
for selector in ('.onep', 'p.onep', 'html p.onep'):
els = self.soup.select(selector)
assert len(els) == 1
assert els[0].name == 'p'
assert els[0]['class'] == ['onep']
def test_class_mismatched_tag(self):
els = self.soup.select('div.onep')
assert len(els) == 0
def test_one_id(self):
for selector in ('div#inner', '#inner', 'div div#inner'):
self.assert_selects(selector, ['inner'])
def test_bad_id(self):
els = self.soup.select('#doesnotexist')
assert len(els) == 0
def test_items_in_id(self):
els = self.soup.select('div#inner p')
assert len(els) == 3
for el in els:
assert el.name == 'p'
assert els[1]['class'] == ['onep']
assert not els[0].has_attr('class')
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
assert len(self.soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assert_selects(selector, ['pmulti'])
def test_multi_class_selection(self):
for selector in ('.class1.class3', '.class3.class2',
'.class1.class2.class3'):
self.assert_selects(selector, ['pmulti'])
def test_child_selector(self):
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
self.assert_selects('.s1 > a span', ['s1a2s1'])
def test_child_selector_id(self):
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
def test_attribute_equals(self):
self.assert_select_multiple(
('p[class="onep"]', ['p1']),
('p[id="p1"]', ['p1']),
('[class="onep"]', ['p1']),
('[id="p1"]', ['p1']),
('link[rel="stylesheet"]', ['l1']),
('link[type="text/css"]', ['l1']),
('link[href="blah.css"]', ['l1']),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ['l1']),
('[type="text/css"]', ['l1']),
('[href="blah.css"]', ['l1']),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_select_multiple(
('p[class~="class1"]', ['pmulti']),
('p[class~="class2"]', ['pmulti']),
('p[class~="class3"]', ['pmulti']),
('[class~="class1"]', ['pmulti']),
('[class~="class2"]', ['pmulti']),
('[class~="class3"]', ['pmulti']),
('a[rel~="friend"]', ['bob']),
('a[rel~="met"]', ['bob']),
('[rel~="friend"]', ['bob']),
('[rel~="met"]', ['bob']),
)
def test_attribute_startswith(self):
self.assert_select_multiple(
('[rel^="style"]', ['l1']),
('link[rel^="style"]', ['l1']),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ['l1']),
('a[href^="http://"]', ['bob', 'me']),
('[href^="http://"]', ['bob', 'me']),
('[id^="p"]', ['pmulti', 'p1']),
('[id^="m"]', ['me', 'main']),
('div[id^="m"]', ['main']),
('a[id^="m"]', ['me']),
('div[data-tag^="dashed"]', ['data1'])
)
def test_attribute_endswith(self):
self.assert_select_multiple(
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', ['data1']),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ['l1']),
('link[rel*="style"]', ['l1']),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
('a[id*="m"]', ['me']),
# From test_attribute_endswith
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', ['data1']),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ['bob', 'me', 'l1']),
('a[href*="."]', ['bob', 'me']),
('link[href*="."]', ['l1']),
('div[id*="n"]', ['main', 'inner']),
('div[id*="nn"]', ['inner']),
('div[data-tag*="edval"]', ['data1'])
)
def test_attribute_exact_or_hypen(self):
self.assert_select_multiple(
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('p[lang|="fr"]', ['lang-fr']),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_select_multiple(
('[rel]', ['l1', 'bob', 'me']),
('link[rel]', ['l1']),
('a[rel]', ['bob', 'me']),
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
('p[class]', ['p1', 'pmulti']),
('[blah]', []),
('p[blah]', []),
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(NotImplementedError):
self.soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self.soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
assert len(els) == 1
assert els[0].string == 'Another'
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
assert len(els) == 0
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
def test_id_child_selector_nth_of_type(self):
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self.soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ['inner', 'data1'])
def test_overspecified_child_id(self):
self.assert_selects(".fancy #inner", ['inner'])
self.assert_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_selects('#p1 + h2', ['header2'])
self.assert_selects('#p1 + h2 + p', ['pmulti'])
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
assert [] == self.soup.select('#p1 + p')
def test_general_sibling_selector(self):
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
self.assert_selects('#p1 ~ #header2', ['header2'])
self.assert_selects('#p1 ~ h2 + a', ['me'])
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
assert [] == self.soup.select('#inner ~ h2')
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_with_no_space(self):
self.assert_selects('x,y', ['xid', 'yid'])
def test_multiple_select_with_more_space(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_duplicated(self):
self.assert_selects('x, x', ['xid'])
def test_multiple_select_sibling(self):
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_selects('x, y > z', ['xid', 'zidb'])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_multiple_select_indirect_descendant(self):
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select(',x, y')
with pytest.raises(SelectorSyntaxError):
self.soup.select('x,,y')
def test_multiple_select_attrs(self):
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
def test_multiple_select_ids(self):
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
def test_multiple_select_nested(self):
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected
class TestPersistence(SoupTest):
"Testing features like pickle and deepcopy."
@ -674,6 +284,18 @@ class TestPersistence(SoupTest):
copied = copy.deepcopy(self.tree)
assert copied.decode() == self.tree.decode()
def test_copy_deeply_nested_document(self):
# This test verifies that copy and deepcopy don't involve any
# recursive function calls. If they did, this test would
# overflow the Python interpreter stack.
limit = sys.getrecursionlimit() + 1
markup = "<span>" * limit
soup = self.soup(markup)
copied = copy.copy(soup)
copied = copy.deepcopy(soup)
def test_copy_preserves_encoding(self):
soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
encoding = soup.original_encoding

View file

@ -24,6 +24,7 @@ from bs4.builder import (
from bs4.element import (
Comment,
SoupStrainer,
PYTHON_SPECIFIC_ENCODINGS,
Tag,
NavigableString,
)
@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
assert [] == soup.string_container_stack
class TestOutput(SoupTest):
@pytest.mark.parametrize(
"eventual_encoding,actual_encoding", [
("utf-8", "utf-8"),
("utf-16", "utf-16"),
]
)
def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
# Most of the time, calling decode() on an XML document will
# give you a document declaration that mentions the encoding
# you intend to use when encoding the document as a
# bytestring.
soup = self.soup("<tag></tag>")
soup.is_xml = True
assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
== soup.decode(eventual_encoding=eventual_encoding))
@pytest.mark.parametrize(
"eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
)
def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
# But if you pass a Python internal encoding into decode(), or
# omit the eventual_encoding altogether, the document
# declaration won't mention any particular encoding.
soup = BeautifulSoup("<tag></tag>", "html.parser")
soup.is_xml = True
assert (f'<?xml version="1.0"?>\n<tag></tag>'
== soup.decode(eventual_encoding=eventual_encoding))
def test(self):
# BeautifulSoup subclasses Tag and extends the decode() method.
# Make sure the other Tag methods which call decode() call
# it correctly.
soup = self.soup("<tag></tag>")
assert b"<tag></tag>" == soup.encode(encoding="utf-8")
assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
assert "<tag></tag>" == soup.decode_contents()
assert "<tag>\n</tag>\n" == soup.prettify()
class TestWarnings(SoupTest):
# Note that some of the tests in this class create BeautifulSoup
# objects directly rather than using self.soup(). That's

View file

@ -1,4 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2022.12.07"
__version__ = "2023.07.22"

View file

@ -791,34 +791,6 @@ uLjbvrW5KfnaNwUASZQDhETnv0Mxz3WLJdH0pmT1kvarBes96aULNmLazAZfNou2
XjG4Kvte9nHfRCaexOYNkbQudZWAUWpLMKawYqGT8ZvYzsRjdT9ZR7E=
-----END CERTIFICATE-----
# Issuer: CN=Hongkong Post Root CA 1 O=Hongkong Post
# Subject: CN=Hongkong Post Root CA 1 O=Hongkong Post
# Label: "Hongkong Post Root CA 1"
# Serial: 1000
# MD5 Fingerprint: a8:0d:6f:39:78:b9:43:6d:77:42:6d:98:5a:cc:23:ca
# SHA1 Fingerprint: d6:da:a8:20:8d:09:d2:15:4d:24:b5:2f:cb:34:6e:b2:58:b2:8a:58
# SHA256 Fingerprint: f9:e6:7d:33:6c:51:00:2a:c0:54:c6:32:02:2d:66:dd:a2:e7:e3:ff:f1:0a:d0:61:ed:31:d8:bb:b4:10:cf:b2
-----BEGIN CERTIFICATE-----
MIIDMDCCAhigAwIBAgICA+gwDQYJKoZIhvcNAQEFBQAwRzELMAkGA1UEBhMCSEsx
FjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdrb25nIFBvc3Qg
Um9vdCBDQSAxMB4XDTAzMDUxNTA1MTMxNFoXDTIzMDUxNTA0NTIyOVowRzELMAkG
A1UEBhMCSEsxFjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdr
b25nIFBvc3QgUm9vdCBDQSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
AQEArP84tulmAknjorThkPlAj3n54r15/gK97iSSHSL22oVyaf7XPwnU3ZG1ApzQ
jVrhVcNQhrkpJsLj2aDxaQMoIIBFIi1WpztUlVYiWR8o3x8gPW2iNr4joLFutbEn
PzlTCeqrauh0ssJlXI6/fMN4hM2eFvz1Lk8gKgifd/PFHsSaUmYeSF7jEAaPIpjh
ZY4bXSNmO7ilMlHIhqqhqZ5/dpTCpmy3QfDVyAY45tQM4vM7TG1QjMSDJ8EThFk9
nnV0ttgCXjqQesBCNnLsak3c78QA3xMYV18meMjWCnl3v/evt3a5pQuEF10Q6m/h
q5URX208o1xNg1vysxmKgIsLhwIDAQABoyYwJDASBgNVHRMBAf8ECDAGAQH/AgED
MA4GA1UdDwEB/wQEAwIBxjANBgkqhkiG9w0BAQUFAAOCAQEADkbVPK7ih9legYsC
mEEIjEy82tvuJxuC52pF7BaLT4Wg87JwvVqWuspube5Gi27nKi6Wsxkz67SfqLI3
7piol7Yutmcn1KZJ/RyTZXaeQi/cImyaT/JaFTmxcdcrUehtHJjA2Sr0oYJ71clB
oiMBdDhViw+5LmeiIAQ32pwL0xch4I+XeTRvhEgCIDMb5jREn5Fw9IBehEPCKdJs
EhTkYY2sEJCehFC78JZvRZ+K88psT/oROhUVRsPNH4NbLUES7VBnQRM9IauUiqpO
fMGx+6fWtScvl6tu4B3i0RwsH0Ti/L6RoZz71ilTc4afU9hDDl3WY4JxHYB0yvbi
AmvZWg==
-----END CERTIFICATE-----
# Issuer: CN=SecureSign RootCA11 O=Japan Certification Services, Inc.
# Subject: CN=SecureSign RootCA11 O=Japan Certification Services, Inc.
# Label: "SecureSign RootCA11"
@ -1676,50 +1648,6 @@ HL/EVlP6Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVx
SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY=
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi
# Subject: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi
# Label: "E-Tugra Certification Authority"
# Serial: 7667447206703254355
# MD5 Fingerprint: b8:a1:03:63:b0:bd:21:71:70:8a:6f:13:3a:bb:79:49
# SHA1 Fingerprint: 51:c6:e7:08:49:06:6e:f3:92:d4:5c:a0:0d:6d:a3:62:8f:c3:52:39
# SHA256 Fingerprint: b0:bf:d5:2b:b0:d7:d9:bd:92:bf:5d:4d:c1:3d:a2:55:c0:2c:54:2f:37:83:65:ea:89:39:11:f5:5e:55:f2:3c
-----BEGIN CERTIFICATE-----
MIIGSzCCBDOgAwIBAgIIamg+nFGby1MwDQYJKoZIhvcNAQELBQAwgbIxCzAJBgNV
BAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+BgNVBAoMN0UtVHXEn3JhIEVCRyBC
aWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhpem1ldGxlcmkgQS7Fni4xJjAkBgNV
BAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBNZXJrZXppMSgwJgYDVQQDDB9FLVR1
Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTEzMDMwNTEyMDk0OFoXDTIz
MDMwMzEyMDk0OFowgbIxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+
BgNVBAoMN0UtVHXEn3JhIEVCRyBCaWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhp
em1ldGxlcmkgQS7Fni4xJjAkBgNVBAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBN
ZXJrZXppMSgwJgYDVQQDDB9FLVR1Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5
MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA4vU/kwVRHoViVF56C/UY
B4Oufq9899SKa6VjQzm5S/fDxmSJPZQuVIBSOTkHS0vdhQd2h8y/L5VMzH2nPbxH
D5hw+IyFHnSOkm0bQNGZDbt1bsipa5rAhDGvykPL6ys06I+XawGb1Q5KCKpbknSF
Q9OArqGIW66z6l7LFpp3RMih9lRozt6Plyu6W0ACDGQXwLWTzeHxE2bODHnv0ZEo
q1+gElIwcxmOj+GMB6LDu0rw6h8VqO4lzKRG+Bsi77MOQ7osJLjFLFzUHPhdZL3D
k14opz8n8Y4e0ypQBaNV2cvnOVPAmJ6MVGKLJrD3fY185MaeZkJVgkfnsliNZvcH
fC425lAcP9tDJMW/hkd5s3kc91r0E+xs+D/iWR+V7kI+ua2oMoVJl0b+SzGPWsut
dEcf6ZG33ygEIqDUD13ieU/qbIWGvaimzuT6w+Gzrt48Ue7LE3wBf4QOXVGUnhMM
ti6lTPk5cDZvlsouDERVxcr6XQKj39ZkjFqzAQqptQpHF//vkUAqjqFGOjGY5RH8
zLtJVor8udBhmm9lbObDyz51Sf6Pp+KJxWfXnUYTTjF2OySznhFlhqt/7x3U+Lzn
rFpct1pHXFXOVbQicVtbC/DP3KBhZOqp12gKY6fgDT+gr9Oq0n7vUaDmUStVkhUX
U8u3Zg5mTPj5dUyQ5xJwx0UCAwEAAaNjMGEwHQYDVR0OBBYEFC7j27JJ0JxUeVz6
Jyr+zE7S6E5UMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAULuPbsknQnFR5
XPonKv7MTtLoTlQwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBCwUAA4ICAQAF
Nzr0TbdF4kV1JI+2d1LoHNgQk2Xz8lkGpD4eKexd0dCrfOAKkEh47U6YA5n+KGCR
HTAduGN8qOY1tfrTYXbm1gdLymmasoR6d5NFFxWfJNCYExL/u6Au/U5Mh/jOXKqY
GwXgAEZKgoClM4so3O0409/lPun++1ndYYRP0lSWE2ETPo+Aab6TR7U1Q9Jauz1c
77NCR807VRMGsAnb/WP2OogKmW9+4c4bU2pEZiNRCHu8W1Ki/QY3OEBhj0qWuJA3
+GbHeJAAFS6LrVE1Uweoa2iu+U48BybNCAVwzDk/dr2l02cmAYamU9JgO3xDf1WK
vJUawSg5TB9D0pH0clmKuVb8P7Sd2nCcdlqMQ1DujjByTd//SffGqWfZbawCEeI6
FiWnWAjLb1NBnEg4R2gz0dfHj9R0IdTDBZB6/86WiLEVKV0jq9BgoRJP3vQXzTLl
yb/IQ639Lo7xr+L0mPoSHyDYwKcMhcWQ9DstliaxLL5Mq+ux0orJ23gTDx4JnW2P
AJ8C2sH6H3p6CcRK5ogql5+Ji/03X186zjhZhkuvcQu02PJwT58yE+Owp1fl2tpD
y4Q08ijE6m30Ku/Ba3ba+367hTzSU8JNvnHhRdH9I2cNE3X7z2VnIp2usAnRCf8d
NL/+I5c30jn6PQ0GC7TbO6Orb1wdtn7os4I07QZcJA==
-----END CERTIFICATE-----
# Issuer: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center
# Subject: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center
# Label: "T-TeleSec GlobalRoot Class 2"
@ -4397,73 +4325,6 @@ ut6Dacpps6kFtZaSF4fC0urQe87YQVt8rgIwRt7qy12a7DLCZRawTDBcMPPaTnOG
BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Subject: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Label: "E-Tugra Global Root CA RSA v3"
# Serial: 75951268308633135324246244059508261641472512052
# MD5 Fingerprint: 22:be:10:f6:c2:f8:03:88:73:5f:33:29:47:28:47:a4
# SHA1 Fingerprint: e9:a8:5d:22:14:52:1c:5b:aa:0a:b4:be:24:6a:23:8a:c9:ba:e2:a9
# SHA256 Fingerprint: ef:66:b0:b1:0a:3c:db:9f:2e:36:48:c7:6b:d2:af:18:ea:d2:bf:e6:f1:17:65:5e:28:c4:06:0d:a1:a3:f4:c2
-----BEGIN CERTIFICATE-----
MIIF8zCCA9ugAwIBAgIUDU3FzRYilZYIfrgLfxUGNPt5EDQwDQYJKoZIhvcNAQEL
BQAwgYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUt
VHVncmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYw
JAYDVQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIFJTQSB2MzAeFw0yMDAzMTgw
OTA3MTdaFw00NTAzMTIwOTA3MTdaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMG
QW5rYXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1
Z3JhIFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBD
QSBSU0EgdjMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCiZvCJt3J7
7gnJY9LTQ91ew6aEOErxjYG7FL1H6EAX8z3DeEVypi6Q3po61CBxyryfHUuXCscx
uj7X/iWpKo429NEvx7epXTPcMHD4QGxLsqYxYdE0PD0xesevxKenhOGXpOhL9hd8
7jwH7eKKV9y2+/hDJVDqJ4GohryPUkqWOmAalrv9c/SF/YP9f4RtNGx/ardLAQO/
rWm31zLZ9Vdq6YaCPqVmMbMWPcLzJmAy01IesGykNz709a/r4d+ABs8qQedmCeFL
l+d3vSFtKbZnwy1+7dZ5ZdHPOrbRsV5WYVB6Ws5OUDGAA5hH5+QYfERaxqSzO8bG
wzrwbMOLyKSRBfP12baqBqG3q+Sx6iEUXIOk/P+2UNOMEiaZdnDpwA+mdPy70Bt4
znKS4iicvObpCdg604nmvi533wEKb5b25Y08TVJ2Glbhc34XrD2tbKNSEhhw5oBO
M/J+JjKsBY04pOZ2PJ8QaQ5tndLBeSBrW88zjdGUdjXnXVXHt6woq0bM5zshtQoK
5EpZ3IE1S0SVEgpnpaH/WwAH0sDM+T/8nzPyAPiMbIedBi3x7+PmBvrFZhNb/FAH
nnGGstpvdDDPk1Po3CLW3iAfYY2jLqN4MpBs3KwytQXk9TwzDdbgh3cXTJ2w2Amo
DVf3RIXwyAS+XF1a4xeOVGNpf0l0ZAWMowIDAQABo2MwYTAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFLK0ruYt9ybVqnUtdkvAG1Mh0EjvMB0GA1UdDgQWBBSy
tK7mLfcm1ap1LXZLwBtTIdBI7zAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEL
BQADggIBAImocn+M684uGMQQgC0QDP/7FM0E4BQ8Tpr7nym/Ip5XuYJzEmMmtcyQ
6dIqKe6cLcwsmb5FJ+Sxce3kOJUxQfJ9emN438o2Fi+CiJ+8EUdPdk3ILY7r3y18
Tjvarvbj2l0Upq7ohUSdBm6O++96SmotKygY/r+QLHUWnw/qln0F7psTpURs+APQ
3SPh/QMSEgj0GDSz4DcLdxEBSL9htLX4GdnLTeqjjO/98Aa1bZL0SmFQhO3sSdPk
vmjmLuMxC1QLGpLWgti2omU8ZgT5Vdps+9u1FGZNlIM7zR6mK7L+d0CGq+ffCsn9
9t2HVhjYsCxVYJb6CH5SkPVLpi6HfMsg2wY+oF0Dd32iPBMbKaITVaA9FCKvb7jQ
mhty3QUBjYZgv6Rn7rWlDdF/5horYmbDB7rnoEgcOMPpRfunf/ztAmgayncSd6YA
VSgU7NbHEqIbZULpkejLPoeJVF3Zr52XnGnnCv8PWniLYypMfUeUP95L6VPQMPHF
9p5J3zugkaOj/s1YzOrfr28oO6Bpm4/srK4rVJ2bBLFHIK+WEj5jlB0E5y67hscM
moi/dkfv97ALl2bSRM9gUgfh1SxKOidhd8rXj+eHDjD/DLsE4mHDosiXYY60MGo8
bcIHX0pzLz/5FooBZu+6kcpSV3uu1OYP3Qt6f4ueJiDPO++BcYNZ
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Subject: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Label: "E-Tugra Global Root CA ECC v3"
# Serial: 218504919822255052842371958738296604628416471745
# MD5 Fingerprint: 46:bc:81:bb:f1:b5:1e:f7:4b:96:bc:14:e2:e7:27:64
# SHA1 Fingerprint: 8a:2f:af:57:53:b1:b0:e6:a1:04:ec:5b:6a:69:71:6d:f6:1c:e2:84
# SHA256 Fingerprint: 87:3f:46:85:fa:7f:56:36:25:25:2e:6d:36:bc:d7:f1:6f:c2:49:51:f2:64:e4:7e:1b:95:4f:49:08:cd:ca:13
-----BEGIN CERTIFICATE-----
MIICpTCCAiqgAwIBAgIUJkYZdzHhT28oNt45UYbm1JeIIsEwCgYIKoZIzj0EAwMw
gYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUtVHVn
cmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYwJAYD
VQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIEVDQyB2MzAeFw0yMDAzMTgwOTQ2
NThaFw00NTAzMTIwOTQ2NThaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMGQW5r
YXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1Z3Jh
IFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBDQSBF
Q0MgdjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASOmCm/xxAeJ9urA8woLNheSBkQ
KczLWYHMjLiSF4mDKpL2w6QdTGLVn9agRtwcvHbB40fQWxPa56WzZkjnIZpKT4YK
fWzqTTKACrJ6CZtpS5iB4i7sAnCWH/31Rs7K3IKjYzBhMA8GA1UdEwEB/wQFMAMB
Af8wHwYDVR0jBBgwFoAU/4Ixcj75xGZsrTie0bBRiKWQzPUwHQYDVR0OBBYEFP+C
MXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNp
ADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/6
7W4WAie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFx
vmjkI6TZraE3
-----END CERTIFICATE-----
# Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD.
# Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD.
# Label: "Security Communication RootCA3"
@ -4525,3 +4386,250 @@ BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu
9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3LsnNdo4gIxwwCMQDAqy0O
be0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70eN9k=
-----END CERTIFICATE-----
# Issuer: CN=BJCA Global Root CA1 O=BEIJING CERTIFICATE AUTHORITY
# Subject: CN=BJCA Global Root CA1 O=BEIJING CERTIFICATE AUTHORITY
# Label: "BJCA Global Root CA1"
# Serial: 113562791157148395269083148143378328608
# MD5 Fingerprint: 42:32:99:76:43:33:36:24:35:07:82:9b:28:f9:d0:90
# SHA1 Fingerprint: d5:ec:8d:7b:4c:ba:79:f4:e7:e8:cb:9d:6b:ae:77:83:10:03:21:6a
# SHA256 Fingerprint: f3:89:6f:88:fe:7c:0a:88:27:66:a7:fa:6a:d2:74:9f:b5:7a:7f:3e:98:fb:76:9c:1f:a7:b0:9c:2c:44:d5:ae
-----BEGIN CERTIFICATE-----
MIIFdDCCA1ygAwIBAgIQVW9l47TZkGobCdFsPsBsIDANBgkqhkiG9w0BAQsFADBU
MQswCQYDVQQGEwJDTjEmMCQGA1UECgwdQkVJSklORyBDRVJUSUZJQ0FURSBBVVRI
T1JJVFkxHTAbBgNVBAMMFEJKQ0EgR2xvYmFsIFJvb3QgQ0ExMB4XDTE5MTIxOTAz
MTYxN1oXDTQ0MTIxMjAzMTYxN1owVDELMAkGA1UEBhMCQ04xJjAkBgNVBAoMHUJF
SUpJTkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZMR0wGwYDVQQDDBRCSkNBIEdsb2Jh
bCBSb290IENBMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPFmCL3Z
xRVhy4QEQaVpN3cdwbB7+sN3SJATcmTRuHyQNZ0YeYjjlwE8R4HyDqKYDZ4/N+AZ
spDyRhySsTphzvq3Rp4Dhtczbu33RYx2N95ulpH3134rhxfVizXuhJFyV9xgw8O5
58dnJCNPYwpj9mZ9S1WnP3hkSWkSl+BMDdMJoDIwOvqfwPKcxRIqLhy1BDPapDgR
at7GGPZHOiJBhyL8xIkoVNiMpTAK+BcWyqw3/XmnkRd4OJmtWO2y3syJfQOcs4ll
5+M7sSKGjwZteAf9kRJ/sGsciQ35uMt0WwfCyPQ10WRjeulumijWML3mG90Vr4Tq
nMfK9Q7q8l0ph49pczm+LiRvRSGsxdRpJQaDrXpIhRMsDQa4bHlW/KNnMoH1V6XK
V0Jp6VwkYe/iMBhORJhVb3rCk9gZtt58R4oRTklH2yiUAguUSiz5EtBP6DF+bHq/
pj+bOT0CFqMYs2esWz8sgytnOYFcuX6U1WTdno9uruh8W7TXakdI136z1C2OVnZO
z2nxbkRs1CTqjSShGL+9V/6pmTW12xB3uD1IutbB5/EjPtffhZ0nPNRAvQoMvfXn
jSXWgXSHRtQpdaJCbPdzied9v3pKH9MiyRVVz99vfFXQpIsHETdfg6YmV6YBW37+
WGgHqel62bno/1Afq8K0wM7o6v0PvY1NuLxxAgMBAAGjQjBAMB0GA1UdDgQWBBTF
7+3M2I0hxkjk49cULqcWk+WYATAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQE
AwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAUoKsITQfI/Ki2Pm4rzc2IInRNwPWaZ+4
YRC6ojGYWUfo0Q0lHhVBDOAqVdVXUsv45Mdpox1NcQJeXyFFYEhcCY5JEMEE3Kli
awLwQ8hOnThJdMkycFRtwUf8jrQ2ntScvd0g1lPJGKm1Vrl2i5VnZu69mP6u775u
+2D2/VnGKhs/I0qUJDAnyIm860Qkmss9vk/Ves6OF8tiwdneHg56/0OGNFK8YT88
X7vZdrRTvJez/opMEi4r89fO4aL/3Xtw+zuhTaRjAv04l5U/BXCga99igUOLtFkN
SoxUnMW7gZ/NfaXvCyUeOiDbHPwfmGcCCtRzRBPbUYQaVQNW4AB+dAb/OMRyHdOo
P2gxXdMJxy6MW2Pg6Nwe0uxhHvLe5e/2mXZgLR6UcnHGCyoyx5JO1UbXHfmpGQrI
+pXObSOYqgs4rZpWDW+N8TEAiMEXnM0ZNjX+VVOg4DwzX5Ze4jLp3zO7Bkqp2IRz
znfSxqxx4VyjHQy7Ct9f4qNx2No3WqB4K/TUfet27fJhcKVlmtOJNBir+3I+17Q9
eVzYH6Eze9mCUAyTF6ps3MKCuwJXNq+YJyo5UOGwifUll35HaBC07HPKs5fRJNz2
YqAo07WjuGS3iGJCz51TzZm+ZGiPTx4SSPfSKcOYKMryMguTjClPPGAyzQWWYezy
r/6zcCwupvI=
-----END CERTIFICATE-----
# Issuer: CN=BJCA Global Root CA2 O=BEIJING CERTIFICATE AUTHORITY
# Subject: CN=BJCA Global Root CA2 O=BEIJING CERTIFICATE AUTHORITY
# Label: "BJCA Global Root CA2"
# Serial: 58605626836079930195615843123109055211
# MD5 Fingerprint: 5e:0a:f6:47:5f:a6:14:e8:11:01:95:3f:4d:01:eb:3c
# SHA1 Fingerprint: f4:27:86:eb:6e:b8:6d:88:31:67:02:fb:ba:66:a4:53:00:aa:7a:a6
# SHA256 Fingerprint: 57:4d:f6:93:1e:27:80:39:66:7b:72:0a:fd:c1:60:0f:c2:7e:b6:6d:d3:09:29:79:fb:73:85:64:87:21:28:82
-----BEGIN CERTIFICATE-----
MIICJTCCAaugAwIBAgIQLBcIfWQqwP6FGFkGz7RK6zAKBggqhkjOPQQDAzBUMQsw
CQYDVQQGEwJDTjEmMCQGA1UECgwdQkVJSklORyBDRVJUSUZJQ0FURSBBVVRIT1JJ
VFkxHTAbBgNVBAMMFEJKQ0EgR2xvYmFsIFJvb3QgQ0EyMB4XDTE5MTIxOTAzMTgy
MVoXDTQ0MTIxMjAzMTgyMVowVDELMAkGA1UEBhMCQ04xJjAkBgNVBAoMHUJFSUpJ
TkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZMR0wGwYDVQQDDBRCSkNBIEdsb2JhbCBS
b290IENBMjB2MBAGByqGSM49AgEGBSuBBAAiA2IABJ3LgJGNU2e1uVCxA/jlSR9B
IgmwUVJY1is0j8USRhTFiy8shP8sbqjV8QnjAyEUxEM9fMEsxEtqSs3ph+B99iK+
+kpRuDCK/eHeGBIK9ke35xe/J4rUQUyWPGCWwf0VHKNCMEAwHQYDVR0OBBYEFNJK
sVF/BvDRgh9Obl+rg/xI1LCRMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQD
AgEGMAoGCCqGSM49BAMDA2gAMGUCMBq8W9f+qdJUDkpd0m2xQNz0Q9XSSpkZElaA
94M04TVOSG0ED1cxMDAtsaqdAzjbBgIxAMvMh1PLet8gUXOQwKhbYdDFUDn9hf7B
43j4ptZLvZuHjw/l1lOWqzzIQNph91Oj9w==
-----END CERTIFICATE-----
# Issuer: CN=Sectigo Public Server Authentication Root E46 O=Sectigo Limited
# Subject: CN=Sectigo Public Server Authentication Root E46 O=Sectigo Limited
# Label: "Sectigo Public Server Authentication Root E46"
# Serial: 88989738453351742415770396670917916916
# MD5 Fingerprint: 28:23:f8:b2:98:5c:37:16:3b:3e:46:13:4e:b0:b3:01
# SHA1 Fingerprint: ec:8a:39:6c:40:f0:2e:bc:42:75:d4:9f:ab:1c:1a:5b:67:be:d2:9a
# SHA256 Fingerprint: c9:0f:26:f0:fb:1b:40:18:b2:22:27:51:9b:5c:a2:b5:3e:2c:a5:b3:be:5c:f1:8e:fe:1b:ef:47:38:0c:53:83
-----BEGIN CERTIFICATE-----
MIICOjCCAcGgAwIBAgIQQvLM2htpN0RfFf51KBC49DAKBggqhkjOPQQDAzBfMQsw
CQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1T
ZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBFNDYwHhcN
MjEwMzIyMDAwMDAwWhcNNDYwMzIxMjM1OTU5WjBfMQswCQYDVQQGEwJHQjEYMBYG
A1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1TZWN0aWdvIFB1YmxpYyBT
ZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBFNDYwdjAQBgcqhkjOPQIBBgUrgQQA
IgNiAAR2+pmpbiDt+dd34wc7qNs9Xzjoq1WmVk/WSOrsfy2qw7LFeeyZYX8QeccC
WvkEN/U0NSt3zn8gj1KjAIns1aeibVvjS5KToID1AZTc8GgHHs3u/iVStSBDHBv+
6xnOQ6OjQjBAMB0GA1UdDgQWBBTRItpMWfFLXyY4qp3W7usNw/upYTAOBgNVHQ8B
Af8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNnADBkAjAn7qRa
qCG76UeXlImldCBteU/IvZNeWBj7LRoAasm4PdCkT0RHlAFWovgzJQxC36oCMB3q
4S6ILuH5px0CMk7yn2xVdOOurvulGu7t0vzCAxHrRVxgED1cf5kDW21USAGKcw==
-----END CERTIFICATE-----
# Issuer: CN=Sectigo Public Server Authentication Root R46 O=Sectigo Limited
# Subject: CN=Sectigo Public Server Authentication Root R46 O=Sectigo Limited
# Label: "Sectigo Public Server Authentication Root R46"
# Serial: 156256931880233212765902055439220583700
# MD5 Fingerprint: 32:10:09:52:00:d5:7e:6c:43:df:15:c0:b1:16:93:e5
# SHA1 Fingerprint: ad:98:f9:f3:e4:7d:75:3b:65:d4:82:b3:a4:52:17:bb:6e:f5:e4:38
# SHA256 Fingerprint: 7b:b6:47:a6:2a:ee:ac:88:bf:25:7a:a5:22:d0:1f:fe:a3:95:e0:ab:45:c7:3f:93:f6:56:54:ec:38:f2:5a:06
-----BEGIN CERTIFICATE-----
MIIFijCCA3KgAwIBAgIQdY39i658BwD6qSWn4cetFDANBgkqhkiG9w0BAQwFADBf
MQswCQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQD
Ey1TZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBSNDYw
HhcNMjEwMzIyMDAwMDAwWhcNNDYwMzIxMjM1OTU5WjBfMQswCQYDVQQGEwJHQjEY
MBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1TZWN0aWdvIFB1Ymxp
YyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBSNDYwggIiMA0GCSqGSIb3DQEB
AQUAA4ICDwAwggIKAoICAQCTvtU2UnXYASOgHEdCSe5jtrch/cSV1UgrJnwUUxDa
ef0rty2k1Cz66jLdScK5vQ9IPXtamFSvnl0xdE8H/FAh3aTPaE8bEmNtJZlMKpnz
SDBh+oF8HqcIStw+KxwfGExxqjWMrfhu6DtK2eWUAtaJhBOqbchPM8xQljeSM9xf
iOefVNlI8JhD1mb9nxc4Q8UBUQvX4yMPFF1bFOdLvt30yNoDN9HWOaEhUTCDsG3X
ME6WW5HwcCSrv0WBZEMNvSE6Lzzpng3LILVCJ8zab5vuZDCQOc2TZYEhMbUjUDM3
IuM47fgxMMxF/mL50V0yeUKH32rMVhlATc6qu/m1dkmU8Sf4kaWD5QazYw6A3OAS
VYCmO2a0OYctyPDQ0RTp5A1NDvZdV3LFOxxHVp3i1fuBYYzMTYCQNFu31xR13NgE
SJ/AwSiItOkcyqex8Va3e0lMWeUgFaiEAin6OJRpmkkGj80feRQXEgyDet4fsZfu
+Zd4KKTIRJLpfSYFplhym3kT2BFfrsU4YjRosoYwjviQYZ4ybPUHNs2iTG7sijbt
8uaZFURww3y8nDnAtOFr94MlI1fZEoDlSfB1D++N6xybVCi0ITz8fAr/73trdf+L
HaAZBav6+CuBQug4urv7qv094PPK306Xlynt8xhW6aWWrL3DkJiy4Pmi1KZHQ3xt
zwIDAQABo0IwQDAdBgNVHQ4EFgQUVnNYZJX5khqwEioEYnmhQBWIIUkwDgYDVR0P
AQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAC9c
mTz8Bl6MlC5w6tIyMY208FHVvArzZJ8HXtXBc2hkeqK5Duj5XYUtqDdFqij0lgVQ
YKlJfp/imTYpE0RHap1VIDzYm/EDMrraQKFz6oOht0SmDpkBm+S8f74TlH7Kph52
gDY9hAaLMyZlbcp+nv4fjFg4exqDsQ+8FxG75gbMY/qB8oFM2gsQa6H61SilzwZA
Fv97fRheORKkU55+MkIQpiGRqRxOF3yEvJ+M0ejf5lG5Nkc/kLnHvALcWxxPDkjB
JYOcCj+esQMzEhonrPcibCTRAUH4WAP+JWgiH5paPHxsnnVI84HxZmduTILA7rpX
DhjvLpr3Etiga+kFpaHpaPi8TD8SHkXoUsCjvxInebnMMTzD9joiFgOgyY9mpFui
TdaBJQbpdqQACj7LzTWb4OE4y2BThihCQRxEV+ioratF4yUQvNs+ZUH7G6aXD+u5
dHn5HrwdVw1Hr8Mvn4dGp+smWg9WY7ViYG4A++MnESLn/pmPNPW56MORcr3Ywx65
LvKRRFHQV80MNNVIIb/bE/FmJUNS0nAiNs2fxBx1IK1jcmMGDw4nztJqDby1ORrp
0XZ60Vzk50lJLVU3aPAaOpg+VBeHVOmmJ1CJeyAvP/+/oYtKR5j/K3tJPsMpRmAY
QqszKbrAKbkTidOIijlBO8n9pu0f9GBj39ItVQGL
-----END CERTIFICATE-----
# Issuer: CN=SSL.com TLS RSA Root CA 2022 O=SSL Corporation
# Subject: CN=SSL.com TLS RSA Root CA 2022 O=SSL Corporation
# Label: "SSL.com TLS RSA Root CA 2022"
# Serial: 148535279242832292258835760425842727825
# MD5 Fingerprint: d8:4e:c6:59:30:d8:fe:a0:d6:7a:5a:2c:2c:69:78:da
# SHA1 Fingerprint: ec:2c:83:40:72:af:26:95:10:ff:0e:f2:03:ee:31:70:f6:78:9d:ca
# SHA256 Fingerprint: 8f:af:7d:2e:2c:b4:70:9b:b8:e0:b3:36:66:bf:75:a5:dd:45:b5:de:48:0f:8e:a8:d4:bf:e6:be:bc:17:f2:ed
-----BEGIN CERTIFICATE-----
MIIFiTCCA3GgAwIBAgIQb77arXO9CEDii02+1PdbkTANBgkqhkiG9w0BAQsFADBO
MQswCQYDVQQGEwJVUzEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMSUwIwYDVQQD
DBxTU0wuY29tIFRMUyBSU0EgUm9vdCBDQSAyMDIyMB4XDTIyMDgyNTE2MzQyMloX
DTQ2MDgxOTE2MzQyMVowTjELMAkGA1UEBhMCVVMxGDAWBgNVBAoMD1NTTCBDb3Jw
b3JhdGlvbjElMCMGA1UEAwwcU1NMLmNvbSBUTFMgUlNBIFJvb3QgQ0EgMjAyMjCC
AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANCkCXJPQIgSYT41I57u9nTP
L3tYPc48DRAokC+X94xI2KDYJbFMsBFMF3NQ0CJKY7uB0ylu1bUJPiYYf7ISf5OY
t6/wNr/y7hienDtSxUcZXXTzZGbVXcdotL8bHAajvI9AI7YexoS9UcQbOcGV0ins
S657Lb85/bRi3pZ7QcacoOAGcvvwB5cJOYF0r/c0WRFXCsJbwST0MXMwgsadugL3
PnxEX4MN8/HdIGkWCVDi1FW24IBydm5MR7d1VVm0U3TZlMZBrViKMWYPHqIbKUBO
L9975hYsLfy/7PO0+r4Y9ptJ1O4Fbtk085zx7AGL0SDGD6C1vBdOSHtRwvzpXGk3
R2azaPgVKPC506QVzFpPulJwoxJF3ca6TvvC0PeoUidtbnm1jPx7jMEWTO6Af77w
dr5BUxIzrlo4QqvXDz5BjXYHMtWrifZOZ9mxQnUjbvPNQrL8VfVThxc7wDNY8VLS
+YCk8OjwO4s4zKTGkH8PnP2L0aPP2oOnaclQNtVcBdIKQXTbYxE3waWglksejBYS
d66UNHsef8JmAOSqg+qKkK3ONkRN0VHpvB/zagX9wHQfJRlAUW7qglFA35u5CCoG
AtUjHBPW6dvbxrB6y3snm/vg1UYk7RBLY0ulBY+6uB0rpvqR4pJSvezrZ5dtmi2f
gTIFZzL7SAg/2SW4BCUvAgMBAAGjYzBhMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0j
BBgwFoAU+y437uOEeicuzRk1sTN8/9REQrkwHQYDVR0OBBYEFPsuN+7jhHonLs0Z
NbEzfP/UREK5MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAjYlt
hEUY8U+zoO9opMAdrDC8Z2awms22qyIZZtM7QbUQnRC6cm4pJCAcAZli05bg4vsM
QtfhWsSWTVTNj8pDU/0quOr4ZcoBwq1gaAafORpR2eCNJvkLTqVTJXojpBzOCBvf
R4iyrT7gJ4eLSYwfqUdYe5byiB0YrrPRpgqU+tvT5TgKa3kSM/tKWTcWQA673vWJ
DPFs0/dRa1419dvAJuoSc06pkZCmF8NsLzjUo3KUQyxi4U5cMj29TH0ZR6LDSeeW
P4+a0zvkEdiLA9z2tmBVGKaBUfPhqBVq6+AL8BQx1rmMRTqoENjwuSfr98t67wVy
lrXEj5ZzxOhWc5y8aVFjvO9nHEMaX3cZHxj4HCUp+UmZKbaSPaKDN7EgkaibMOlq
bLQjk2UEqxHzDh1TJElTHaE/nUiSEeJ9DU/1172iWD54nR4fK/4huxoTtrEoZP2w
AgDHbICivRZQIA9ygV/MlP+7mea6kMvq+cYMwq7FGc4zoWtcu358NFcXrfA/rs3q
r5nsLFR+jM4uElZI7xc7P0peYNLcdDa8pUNjyw9bowJWCZ4kLOGGgYz+qxcs+sji
Mho6/4UIyYOf8kpIEFR3N+2ivEC+5BB09+Rbu7nzifmPQdjH5FCQNYA+HLhNkNPU
98OwoX6EyneSMSy4kLGCenROmxMmtNVQZlR4rmA=
-----END CERTIFICATE-----
# Issuer: CN=SSL.com TLS ECC Root CA 2022 O=SSL Corporation
# Subject: CN=SSL.com TLS ECC Root CA 2022 O=SSL Corporation
# Label: "SSL.com TLS ECC Root CA 2022"
# Serial: 26605119622390491762507526719404364228
# MD5 Fingerprint: 99:d7:5c:f1:51:36:cc:e9:ce:d9:19:2e:77:71:56:c5
# SHA1 Fingerprint: 9f:5f:d9:1a:54:6d:f5:0c:71:f0:ee:7a:bd:17:49:98:84:73:e2:39
# SHA256 Fingerprint: c3:2f:fd:9f:46:f9:36:d1:6c:36:73:99:09:59:43:4b:9a:d6:0a:af:bb:9e:7c:f3:36:54:f1:44:cc:1b:a1:43
-----BEGIN CERTIFICATE-----
MIICOjCCAcCgAwIBAgIQFAP1q/s3ixdAW+JDsqXRxDAKBggqhkjOPQQDAzBOMQsw
CQYDVQQGEwJVUzEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMSUwIwYDVQQDDBxT
U0wuY29tIFRMUyBFQ0MgUm9vdCBDQSAyMDIyMB4XDTIyMDgyNTE2MzM0OFoXDTQ2
MDgxOTE2MzM0N1owTjELMAkGA1UEBhMCVVMxGDAWBgNVBAoMD1NTTCBDb3Jwb3Jh
dGlvbjElMCMGA1UEAwwcU1NMLmNvbSBUTFMgRUNDIFJvb3QgQ0EgMjAyMjB2MBAG
ByqGSM49AgEGBSuBBAAiA2IABEUpNXP6wrgjzhR9qLFNoFs27iosU8NgCTWyJGYm
acCzldZdkkAZDsalE3D07xJRKF3nzL35PIXBz5SQySvOkkJYWWf9lCcQZIxPBLFN
SeR7T5v15wj4A4j3p8OSSxlUgaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAfBgNVHSME
GDAWgBSJjy+j6CugFFR781a4Jl9nOAuc0DAdBgNVHQ4EFgQUiY8vo+groBRUe/NW
uCZfZzgLnNAwDgYDVR0PAQH/BAQDAgGGMAoGCCqGSM49BAMDA2gAMGUCMFXjIlbp
15IkWE8elDIPDAI2wv2sdDJO4fscgIijzPvX6yv/N33w7deedWo1dlJF4AIxAMeN
b0Igj762TVntd00pxCAgRWSGOlDGxK0tk/UYfXLtqc/ErFc2KAhl3zx5Zn6g6g==
-----END CERTIFICATE-----
# Issuer: CN=Atos TrustedRoot Root CA ECC TLS 2021 O=Atos
# Subject: CN=Atos TrustedRoot Root CA ECC TLS 2021 O=Atos
# Label: "Atos TrustedRoot Root CA ECC TLS 2021"
# Serial: 81873346711060652204712539181482831616
# MD5 Fingerprint: 16:9f:ad:f1:70:ad:79:d6:ed:29:b4:d1:c5:79:70:a8
# SHA1 Fingerprint: 9e:bc:75:10:42:b3:02:f3:81:f4:f7:30:62:d4:8f:c3:a7:51:b2:dd
# SHA256 Fingerprint: b2:fa:e5:3e:14:cc:d7:ab:92:12:06:47:01:ae:27:9c:1d:89:88:fa:cb:77:5f:a8:a0:08:91:4e:66:39:88:a8
-----BEGIN CERTIFICATE-----
MIICFTCCAZugAwIBAgIQPZg7pmY9kGP3fiZXOATvADAKBggqhkjOPQQDAzBMMS4w
LAYDVQQDDCVBdG9zIFRydXN0ZWRSb290IFJvb3QgQ0EgRUNDIFRMUyAyMDIxMQ0w
CwYDVQQKDARBdG9zMQswCQYDVQQGEwJERTAeFw0yMTA0MjIwOTI2MjNaFw00MTA0
MTcwOTI2MjJaMEwxLjAsBgNVBAMMJUF0b3MgVHJ1c3RlZFJvb3QgUm9vdCBDQSBF
Q0MgVExTIDIwMjExDTALBgNVBAoMBEF0b3MxCzAJBgNVBAYTAkRFMHYwEAYHKoZI
zj0CAQYFK4EEACIDYgAEloZYKDcKZ9Cg3iQZGeHkBQcfl+3oZIK59sRxUM6KDP/X
tXa7oWyTbIOiaG6l2b4siJVBzV3dscqDY4PMwL502eCdpO5KTlbgmClBk1IQ1SQ4
AjJn8ZQSb+/Xxd4u/RmAo0IwQDAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBR2
KCXWfeBmmnoJsmo7jjPXNtNPojAOBgNVHQ8BAf8EBAMCAYYwCgYIKoZIzj0EAwMD
aAAwZQIwW5kp85wxtolrbNa9d+F851F+uDrNozZffPc8dz7kUK2o59JZDCaOMDtu
CCrCp1rIAjEAmeMM56PDr9NJLkaCI2ZdyQAUEv049OGYa3cpetskz2VAv9LcjBHo
9H1/IISpQuQo
-----END CERTIFICATE-----
# Issuer: CN=Atos TrustedRoot Root CA RSA TLS 2021 O=Atos
# Subject: CN=Atos TrustedRoot Root CA RSA TLS 2021 O=Atos
# Label: "Atos TrustedRoot Root CA RSA TLS 2021"
# Serial: 111436099570196163832749341232207667876
# MD5 Fingerprint: d4:d3:46:b8:9a:c0:9c:76:5d:9e:3a:c3:b9:99:31:d2
# SHA1 Fingerprint: 18:52:3b:0d:06:37:e4:d6:3a:df:23:e4:98:fb:5b:16:fb:86:74:48
# SHA256 Fingerprint: 81:a9:08:8e:a5:9f:b3:64:c5:48:a6:f8:55:59:09:9b:6f:04:05:ef:bf:18:e5:32:4e:c9:f4:57:ba:00:11:2f
-----BEGIN CERTIFICATE-----
MIIFZDCCA0ygAwIBAgIQU9XP5hmTC/srBRLYwiqipDANBgkqhkiG9w0BAQwFADBM
MS4wLAYDVQQDDCVBdG9zIFRydXN0ZWRSb290IFJvb3QgQ0EgUlNBIFRMUyAyMDIx
MQ0wCwYDVQQKDARBdG9zMQswCQYDVQQGEwJERTAeFw0yMTA0MjIwOTIxMTBaFw00
MTA0MTcwOTIxMDlaMEwxLjAsBgNVBAMMJUF0b3MgVHJ1c3RlZFJvb3QgUm9vdCBD
QSBSU0EgVExTIDIwMjExDTALBgNVBAoMBEF0b3MxCzAJBgNVBAYTAkRFMIICIjAN
BgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAtoAOxHm9BYx9sKOdTSJNy/BBl01Z
4NH+VoyX8te9j2y3I49f1cTYQcvyAh5x5en2XssIKl4w8i1mx4QbZFc4nXUtVsYv
Ye+W/CBGvevUez8/fEc4BKkbqlLfEzfTFRVOvV98r61jx3ncCHvVoOX3W3WsgFWZ
kmGbzSoXfduP9LVq6hdKZChmFSlsAvFr1bqjM9xaZ6cF4r9lthawEO3NUDPJcFDs
GY6wx/J0W2tExn2WuZgIWWbeKQGb9Cpt0xU6kGpn8bRrZtkh68rZYnxGEFzedUln
nkL5/nWpo63/dgpnQOPF943HhZpZnmKaau1Fh5hnstVKPNe0OwANwI8f4UDErmwh
3El+fsqyjW22v5MvoVw+j8rtgI5Y4dtXz4U2OLJxpAmMkokIiEjxQGMYsluMWuPD
0xeqqxmjLBvk1cbiZnrXghmmOxYsL3GHX0WelXOTwkKBIROW1527k2gV+p2kHYzy
geBYBr3JtuP2iV2J+axEoctr+hbxx1A9JNr3w+SH1VbxT5Aw+kUJWdo0zuATHAR8
ANSbhqRAvNncTFd+rrcztl524WWLZt+NyteYr842mIycg5kDcPOvdO3GDjbnvezB
c6eUWsuSZIKmAMFwoW4sKeFYV+xafJlrJaSQOoD0IJ2azsct+bJLKZWD6TWNp0lI
pw9MGZHQ9b8Q4HECAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU
dEmZ0f+0emhFdcN+tNzMzjkz2ggwDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEB
DAUAA4ICAQAjQ1MkYlxt/T7Cz1UAbMVWiLkO3TriJQ2VSpfKgInuKs1l+NsW4AmS
4BjHeJi78+xCUvuppILXTdiK/ORO/auQxDh1MoSf/7OwKwIzNsAQkG8dnK/haZPs
o0UvFJ/1TCplQ3IM98P4lYsU84UgYt1UU90s3BiVaU+DR3BAM1h3Egyi61IxHkzJ
qM7F78PRreBrAwA0JrRUITWXAdxfG/F851X6LWh3e9NpzNMOa7pNdkTWwhWaJuyw
xfW70Xp0wmzNxbVe9kzmWy2B27O3Opee7c9GslA9hGCZcbUztVdF5kJHdWoOsAgM
rr3e97sPWD2PAzHoPYJQyi9eDF20l74gNAf0xBLh7tew2VktafcxBPTy+av5EzH4
AXcOPUIjJsyacmdRIXrMPIWo6iFqO9taPKU0nprALN+AnCng33eU0aKAQv9qTFsR
0PXNor6uzFFcw9VUewyu1rkGd4Di7wcaaMxZUa1+XGdrudviB0JbuAEFWDlN5LuY
o7Ey7Nmj1m+UI/87tyll5gfp77YZ6ufCOB0yiJA8EytuzO+rdwY0d4RPcuSBhPm5
dDTedk+SKlOxJTnbPP/lPqYO5Wue/9vsL3SD3460s6neFE3/MaNFcyT6lSnMEpcE
oji2jbDwN/zIIX8/syQbPYtuzE2wFg2WHYMfRsCbvUOZ58SWLs5fyQ==
-----END CERTIFICATE-----

View file

@ -21,7 +21,7 @@ at <https://github.com/Ousret/charset_normalizer>.
"""
import logging
from .api import from_bytes, from_fp, from_path
from .api import from_bytes, from_fp, from_path, is_binary
from .legacy import detect
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
@ -31,6 +31,7 @@ __all__ = (
"from_fp",
"from_path",
"from_bytes",
"is_binary",
"detect",
"CharsetMatch",
"CharsetMatches",

View file

@ -1,6 +1,6 @@
import logging
from os import PathLike
from typing import Any, BinaryIO, List, Optional, Set
from typing import BinaryIO, List, Optional, Set, Union
from .cd import (
coherence_ratio,
@ -31,7 +31,7 @@ explain_handler.setFormatter(
def from_bytes(
sequences: bytes,
sequences: Union[bytes, bytearray],
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
@ -40,6 +40,7 @@ def from_bytes(
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
@ -361,7 +362,8 @@ def from_bytes(
)
# Preparing those fallbacks in case we got nothing.
if (
encoding_iana in ["ascii", "utf_8", specified_encoding]
enable_fallback
and encoding_iana in ["ascii", "utf_8", specified_encoding]
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
@ -507,6 +509,7 @@ def from_fp(
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but using a file pointer that is already ready.
@ -522,11 +525,12 @@ def from_fp(
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def from_path(
path: "PathLike[Any]",
path: Union[str, bytes, PathLike], # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
@ -535,6 +539,7 @@ def from_path(
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -551,4 +556,71 @@ def from_path(
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def is_binary(
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = False,
) -> bool:
"""
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
"""
if isinstance(fp_or_path_or_payload, (str, PathLike)):
guesses = from_path(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
elif isinstance(
fp_or_path_or_payload,
(
bytes,
bytearray,
),
):
guesses = from_bytes(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
else:
guesses = from_fp(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
return not guesses

View file

@ -294,12 +294,23 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
if buffer_length >= 4:
if self._buffer_accent_count / buffer_length > 0.34:
self._is_current_word_bad = True
# Word/Buffer ending with a upper case accentuated letter are so rare,
# Word/Buffer ending with an upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
self._foreign_long_count += 1
self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch:
camel_case_dst = [
i
for c, i in zip(self._buffer, range(0, buffer_length))
if c.isupper()
]
probable_camel_cased: bool = False
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
probable_camel_cased = True
if not probable_camel_cased:
self._foreign_long_count += 1
self._is_current_word_bad = True

View file

@ -120,12 +120,12 @@ def is_emoticon(character: str) -> bool:
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_separator(character: str) -> bool:
if character.isspace() or character in {"", "+", ",", ";", "<", ">"}:
if character.isspace() or character in {"", "+", "<", ">"}:
return True
character_category: str = unicodedata.category(character)
return "Z" in character_category
return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)

View file

@ -2,5 +2,5 @@
Expose version
"""
__version__ = "3.1.0"
__version__ = "3.2.0"
VERSION = __version__.split(".")

View file

@ -6,7 +6,7 @@ __title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "23.0"
__version__ = "23.1"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"

View file

@ -14,6 +14,8 @@ EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager
def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
try:

View file

@ -163,7 +163,11 @@ def _parse_extras(tokenizer: Tokenizer) -> List[str]:
if not tokenizer.check("LEFT_BRACKET", peek=True):
return []
with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"):
with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS")
@ -203,7 +207,11 @@ def _parse_specifier(tokenizer: Tokenizer) -> str:
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS")
@ -217,7 +225,20 @@ def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
parsed_specifiers = ""
while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS")
if not tokenizer.check("COMMA"):
break
@ -254,7 +275,11 @@ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS")

View file

@ -78,6 +78,8 @@ DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
"AT": r"\@",
"URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+",
"END": r"$",
}
@ -167,21 +169,23 @@ class Tokenizer:
)
@contextlib.contextmanager
def enclosing_tokens(self, open_token: str, close_token: str) -> Iterator[bool]:
def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token):
open_position = self.position
self.read()
else:
open_position = None
yield open_position is not None
yield
if open_position is None:
return
if not self.check(close_token):
self.raise_syntax_error(
f"Expected closing {close_token}",
f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position,
)

View file

@ -8,7 +8,14 @@ import platform
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker
from ._parser import (
MarkerAtom,
MarkerList,
Op,
Value,
Variable,
parse_marker as _parse_marker,
)
from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name
@ -189,7 +196,7 @@ class Marker:
# packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement.
try:
self._markers = _normalize_extra_values(parse_marker(marker))
self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
#

408
lib/packaging/metadata.py Normal file
View file

@ -0,0 +1,408 @@
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import sys
import typing
from typing import Dict, List, Optional, Tuple, Union, cast
if sys.version_info >= (3, 8): # pragma: no cover
from typing import TypedDict
else: # pragma: no cover
if typing.TYPE_CHECKING:
from typing_extensions import TypedDict
else:
try:
from typing_extensions import TypedDict
except ImportError:
class TypedDict:
def __init_subclass__(*_args, **_kwargs):
pass
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: List[str]
summary: str
description: str
keywords: List[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: List[str]
download_url: str
classifiers: List[str]
requires: List[str]
provides: List[str]
obsoletes: List[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: List[str]
provides_dist: List[str]
obsoletes_dist: List[str]
requires_python: str
requires_external: List[str]
project_urls: Dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emiting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: List[str]
# Metadata 2.2 - PEP 643
dynamic: List[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_STRING_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
def _parse_keywords(data: str) -> List[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: List[str]) -> Dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
"""Parse a distribution's metadata.
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
unparsed: Dict[str, List[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name)
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: List[Tuple[bytes, Optional[str]]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_STRING_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed

View file

@ -5,7 +5,7 @@
import urllib.parse
from typing import Any, List, Optional, Set
from ._parser import parse_requirement
from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet
@ -32,7 +32,7 @@ class Requirement:
def __init__(self, requirement_string: str) -> None:
try:
parsed = parse_requirement(requirement_string)
parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e

View file

@ -252,7 +252,8 @@ class Specifier(BaseSpecifier):
# Store whether or not this Specifier should accept prereleases
self._prereleases = prereleases
@property
# https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
@property # type: ignore[override]
def prereleases(self) -> bool:
# If there is an explicit prereleases set for this, then we'll just
# blindly use that.
@ -398,7 +399,9 @@ class Specifier(BaseSpecifier):
# We need special logic to handle prefix matching
if spec.endswith(".*"):
# In the case of prefix matching we want to ignore local segment.
normalized_prospective = canonicalize_version(prospective.public)
normalized_prospective = canonicalize_version(
prospective.public, strip_trailing_zero=False
)
# Get the normalized version string ignoring the trailing .*
normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
# Split the spec out by dots, and pretend that there is an implicit

View file

@ -111,7 +111,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]:
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value = sysconfig.get_config_var(name)
value: Union[int, str, None] = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
@ -120,7 +120,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_")
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _abi3_applies(python_version: PythonVersion) -> bool:

View file

@ -10,7 +10,7 @@
import collections
import itertools
import re
from typing import Callable, Optional, SupportsInt, Tuple, Union
from typing import Any, Callable, Optional, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
@ -63,7 +63,7 @@ class InvalidVersion(ValueError):
class _BaseVersion:
_key: CmpKey
_key: Tuple[Any, ...]
def __hash__(self) -> int:
return hash(self._key)
@ -179,6 +179,7 @@ class Version(_BaseVersion):
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None:
"""Initialize a Version object.

View file

@ -66,10 +66,10 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
# Check urllib3 for compatibility.
major, minor, patch = urllib3_version # noqa: F811
major, minor, patch = int(major), int(minor), int(patch)
# urllib3 >= 1.21.1, <= 1.26
assert major == 1
# urllib3 >= 1.21.1
assert major >= 1
if major == 1:
assert minor >= 21
assert minor <= 26
# Check charset_normalizer for compatibility.
if chardet_version:

View file

@ -5,8 +5,8 @@
__title__ = "requests"
__description__ = "Python HTTP for Humans."
__url__ = "https://requests.readthedocs.io"
__version__ = "2.28.2"
__build__ = 0x022802
__version__ = "2.31.0"
__build__ = 0x023100
__author__ = "Kenneth Reitz"
__author_email__ = "me@kennethreitz.org"
__license__ = "Apache 2.0"

View file

@ -14,9 +14,11 @@ _VALID_HEADER_NAME_RE_STR = re.compile(r"^[^:\s][^:\r\n]*$")
_VALID_HEADER_VALUE_RE_BYTE = re.compile(rb"^\S[^\r\n]*$|^$")
_VALID_HEADER_VALUE_RE_STR = re.compile(r"^\S[^\r\n]*$|^$")
_HEADER_VALIDATORS_STR = (_VALID_HEADER_NAME_RE_STR, _VALID_HEADER_VALUE_RE_STR)
_HEADER_VALIDATORS_BYTE = (_VALID_HEADER_NAME_RE_BYTE, _VALID_HEADER_VALUE_RE_BYTE)
HEADER_VALIDATORS = {
bytes: (_VALID_HEADER_NAME_RE_BYTE, _VALID_HEADER_VALUE_RE_BYTE),
str: (_VALID_HEADER_NAME_RE_STR, _VALID_HEADER_VALUE_RE_STR),
bytes: _HEADER_VALIDATORS_BYTE,
str: _HEADER_VALIDATORS_STR,
}

View file

@ -22,7 +22,6 @@ from urllib3.exceptions import ProxyError as _ProxyError
from urllib3.exceptions import ReadTimeoutError, ResponseError
from urllib3.exceptions import SSLError as _SSLError
from urllib3.poolmanager import PoolManager, proxy_from_url
from urllib3.response import HTTPResponse
from urllib3.util import Timeout as TimeoutSauce
from urllib3.util import parse_url
from urllib3.util.retry import Retry
@ -194,7 +193,6 @@ class HTTPAdapter(BaseAdapter):
num_pools=connections,
maxsize=maxsize,
block=block,
strict=True,
**pool_kwargs,
)
@ -485,7 +483,6 @@ class HTTPAdapter(BaseAdapter):
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
if not chunked:
resp = conn.urlopen(
method=request.method,
url=url,
@ -497,52 +494,9 @@ class HTTPAdapter(BaseAdapter):
decode_content=False,
retries=self.max_retries,
timeout=timeout,
chunked=chunked,
)
# Send the request.
else:
if hasattr(conn, "proxy_pool"):
conn = conn.proxy_pool
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
try:
skip_host = "Host" in request.headers
low_conn.putrequest(
request.method,
url,
skip_accept_encoding=True,
skip_host=skip_host,
)
for header, value in request.headers.items():
low_conn.putheader(header, value)
low_conn.endheaders()
for i in request.body:
low_conn.send(hex(len(i))[2:].encode("utf-8"))
low_conn.send(b"\r\n")
low_conn.send(i)
low_conn.send(b"\r\n")
low_conn.send(b"0\r\n\r\n")
# Receive the response from the server
r = low_conn.getresponse()
resp = HTTPResponse.from_httplib(
r,
pool=conn,
connection=low_conn,
preload_content=False,
decode_content=False,
)
except Exception:
# If we hit any problems here, clean up the connection.
# Then, raise so that we can handle the actual exception.
low_conn.close()
raise
except (ProtocolError, OSError) as err:
raise ConnectionError(err, request=request)

View file

@ -106,7 +106,7 @@ def post(url, data=None, json=None, **kwargs):
:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the :class:`Request`.
:param json: (optional) json data to send in the body of the :class:`Request`.
:param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object
:rtype: requests.Response
@ -121,7 +121,7 @@ def put(url, data=None, **kwargs):
:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the :class:`Request`.
:param json: (optional) json data to send in the body of the :class:`Request`.
:param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object
:rtype: requests.Response
@ -136,7 +136,7 @@ def patch(url, data=None, **kwargs):
:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the :class:`Request`.
:param json: (optional) json data to send in the body of the :class:`Request`.
:param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object
:rtype: requests.Response

View file

@ -324,7 +324,9 @@ class SessionRedirectMixin:
except KeyError:
username, password = None, None
if username and password:
# urllib3 handles proxy authorization for us in the standard adapter.
# Avoid appending this to TLS tunneled requests where it may be leaked.
if not scheme.startswith('https') and username and password:
headers["Proxy-Authorization"] = _basic_auth_str(username, password)
return new_proxies

View file

@ -25,7 +25,12 @@ from . import certs
from .__version__ import __version__
# to_native_string is unused here, but imported here for backwards compatibility
from ._internal_utils import HEADER_VALIDATORS, to_native_string # noqa: F401
from ._internal_utils import ( # noqa: F401
_HEADER_VALIDATORS_BYTE,
_HEADER_VALIDATORS_STR,
HEADER_VALIDATORS,
to_native_string,
)
from .compat import (
Mapping,
basestring,
@ -1031,20 +1036,23 @@ def check_header_validity(header):
:param header: tuple, in the format (name, value).
"""
name, value = header
_validate_header_part(header, name, 0)
_validate_header_part(header, value, 1)
for part in header:
if type(part) not in HEADER_VALIDATORS:
def _validate_header_part(header, header_part, header_validator_index):
if isinstance(header_part, str):
validator = _HEADER_VALIDATORS_STR[header_validator_index]
elif isinstance(header_part, bytes):
validator = _HEADER_VALIDATORS_BYTE[header_validator_index]
else:
raise InvalidHeader(
f"Header part ({part!r}) from {{{name!r}: {value!r}}} must be "
f"of type str or bytes, not {type(part)}"
f"Header part ({header_part!r}) from {header} "
f"must be of type str or bytes, not {type(header_part)}"
)
_validate_header_part(name, "name", HEADER_VALIDATORS[type(name)][0])
_validate_header_part(value, "value", HEADER_VALIDATORS[type(value)][1])
def _validate_header_part(header_part, header_kind, validator):
if not validator.match(header_part):
header_kind = "name" if header_validator_index == 0 else "value"
raise InvalidHeader(
f"Invalid leading whitespace, reserved character(s), or return"
f"character(s) in header {header_kind}: {header_part!r}"

View file

@ -118,7 +118,7 @@ Serializing multiple objects to JSON lines (newline-delimited JSON)::
"""
from __future__ import absolute_import
__version__ = '3.18.3'
__version__ = '3.19.1'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@ -149,28 +149,10 @@ def _import_c_make_encoder():
except ImportError:
return None
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
use_decimal=True,
namedtuple_as_object=True,
tuple_as_array=True,
iterable_as_array=False,
bigint_as_string=False,
item_sort_key=None,
for_json=False,
ignore_nan=False,
int_as_string_bitcount=None,
)
_default_encoder = JSONEncoder()
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
contain non-ASCII characters, so long as they do not need to be escaped
by JSON. When it is true, all non-ASCII characters are escaped.
If *allow_nan* is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the original JSON specification, instead of using
the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
If *allow_nan* is true (default: ``False``), then out of range ``float``
values (``nan``, ``inf``, ``-inf``) will be serialized to
their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members
@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
check_circular and not allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array
@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If *allow_nan* is true (default: ``False``), then out of range ``float``
values (``nan``, ``inf``, ``-inf``) will be serialized to
their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior.
If ``indent`` is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
check_circular and not allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array
@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
**kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None)
_default_decoder = JSONDecoder()
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
**kw):
use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document as `str` or `bytes`) to a Python object.
@ -451,14 +432,18 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*allow_nan*, if True (default false), will allow the parser to
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
and enable the use of the deprecated *parse_constant*.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
use_decimal=use_decimal, **kw)
use_decimal=use_decimal, allow_nan=allow_nan, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, **kw):
use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object.
@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*allow_nan*, if True (default false), will allow the parser to
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
and enable the use of the deprecated *parse_constant*.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None
and not use_decimal and not kw):
and not use_decimal and not allow_nan and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if parse_float is not None:
raise TypeError("use_decimal=True implies parse_float=Decimal")
kw['parse_float'] = Decimal
if allow_nan:
kw['allow_nan'] = True
return cls(encoding=encoding, **kw).decode(s)
@ -560,22 +551,9 @@ def _toggle_speedups(enabled):
scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner
global _default_decoder
_default_decoder = JSONDecoder(
encoding=None,
object_hook=None,
object_pairs_hook=None,
)
_default_decoder = JSONDecoder()
global _default_encoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
)
_default_encoder = JSONEncoder()
def simple_first(kv):
"""Helper function to pass to item_sort_key to sort simple

View file

@ -46,9 +46,35 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8"
if hasattr(sys, 'get_int_max_str_digits'):
bounded_int = int
else:
def bounded_int(s, INT_MAX_STR_DIGITS=4300):
"""Backport of the integer string length conversion limitation
https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
"""
if len(s) > INT_MAX_STR_DIGITS:
raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
return int(s)
def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
"""Scan a four digit hex number from s[end:end + 4]
"""
msg = "Invalid \\uXXXX escape sequence"
esc = s[end:end + 4]
if not _m(esc):
raise JSONDecodeError(msg, s, end - 2)
try:
return int(esc, 16), end + 4
except ValueError:
raise JSONDecodeError(msg, s, end - 2)
def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
_PY3=PY3, _maxunicode=sys.maxunicode):
_PY3=PY3, _maxunicode=sys.maxunicode,
_scan_four_digit_hex=scan_four_digit_hex):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@ -67,6 +93,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
if chunk is None:
raise JSONDecodeError(
"Unterminated string starting at", s, begin)
prev_end = end
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
@ -81,7 +108,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
elif terminator != '\\':
if strict:
msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
raise JSONDecodeError(msg, s, prev_end)
else:
_append(terminator)
continue
@ -100,35 +127,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
end += 1
else:
# Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
escX = esc[1:2]
if len(esc) != 4 or escX == 'x' or escX == 'X':
raise JSONDecodeError(msg, s, end - 1)
try:
uni = int(esc, 16)
except ValueError:
raise JSONDecodeError(msg, s, end - 1)
if uni < 0 or uni > _maxunicode:
raise JSONDecodeError(msg, s, end - 1)
end += 5
uni, end = _scan_four_digit_hex(s, end + 1)
# Check for surrogate pair on UCS-4 systems
# Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through
if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'):
esc2 = s[end + 2:end + 6]
escX = esc2[1:2]
if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
try:
uni2 = int(esc2, 16)
except ValueError:
raise JSONDecodeError(msg, s, end)
uni2, end2 = _scan_four_digit_hex(s, end + 2)
if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00))
end += 6
end = end2
char = unichr(uni)
# Append the unescaped character
_append(char)
@ -169,7 +179,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
return pairs, end + 1
elif nextchar != '"':
raise JSONDecodeError(
"Expecting property name enclosed in double quotes",
"Expecting property name enclosed in double quotes or '}'",
s, end)
end += 1
while True:
@ -296,14 +306,15 @@ class JSONDecoder(object):
| null | None |
+---------------+-------------------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
When allow_nan=True, it also understands
``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.
"""
def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
object_pairs_hook=None, allow_nan=False):
"""
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
@ -336,10 +347,13 @@ class JSONDecoder(object):
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*allow_nan*, if True (default false), will allow the parser to
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
*strict* controls the parser's behavior when it encounters an
invalid control character in a string. The default setting of
@ -353,8 +367,8 @@ class JSONDecoder(object):
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.parse_int = parse_int or bounded_int
self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
self.strict = strict
self.parse_object = JSONObject
self.parse_array = JSONArray

View file

@ -5,7 +5,7 @@ import re
from operator import itemgetter
# Do not import Decimal directly to avoid reload issues
import decimal
from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
from .compat import binary_type, text_type, string_types, integer_types, PY3
def _import_speedups():
try:
from . import _speedups
@ -140,7 +140,7 @@ class JSONEncoder(object):
key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False,
check_circular=True, allow_nan=False, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
@ -161,10 +161,11 @@ class JSONEncoder(object):
prevent an infinite recursion (which would cause an OverflowError).
Otherwise, no such check takes place.
If allow_nan is true, then NaN, Infinity, and -Infinity will be
encoded as such. This behavior is not JSON specification compliant,
but is consistent with most JavaScript based encoders and decoders.
Otherwise, it will be a ValueError to encode such floats.
If allow_nan is true (default: False), then out of range float
values (nan, inf, -inf) will be serialized to
their JavaScript equivalents (NaN, Infinity, -Infinity)
instead of raising a ValueError. See
ignore_nan for ECMA-262 compliant behavior.
If sort_keys is true, then the output of dictionaries will be
sorted by key; this is useful for regression tests to ensure
@ -294,7 +295,7 @@ class JSONEncoder(object):
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True)
chunks = self.iterencode(o)
if not isinstance(chunks, (list, tuple)):
chunks = list(chunks)
if self.ensure_ascii:
@ -302,7 +303,7 @@ class JSONEncoder(object):
else:
return u''.join(chunks)
def iterencode(self, o, _one_shot=False):
def iterencode(self, o):
"""Encode the given object and yield each string
representation as available.
@ -356,8 +357,7 @@ class JSONEncoder(object):
key_memo = {}
int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None
and self.indent is None):
if (c_make_encoder is not None and self.indent is None):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
@ -370,7 +370,7 @@ class JSONEncoder(object):
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal,
self.skipkeys, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder):
def encode(self, o):
# Override JSONEncoder.encode because it has hacks for
# performance that make things more complicated.
chunks = self.iterencode(o, True)
chunks = self.iterencode(o)
if self.ensure_ascii:
return ''.join(chunks)
else:
return u''.join(chunks)
def iterencode(self, o, _one_shot=False):
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
def iterencode(self, o):
chunks = super(JSONEncoderForHTML, self).iterencode(o)
for chunk in chunks:
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_key_separator, _item_separator, _sort_keys, _skipkeys,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
_int_as_string_bitcount, _item_sort_key,
_encoding,_for_json,

View file

@ -60,11 +60,11 @@ def py_make_scanner(context):
else:
res = parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise JSONDecodeError(errmsg, string, idx)

View file

@ -2,6 +2,7 @@ from __future__ import absolute_import
import decimal
from unittest import TestCase
import sys
import simplejson as json
from simplejson.compat import StringIO, b, binary_type
from simplejson import OrderedDict
@ -117,3 +118,10 @@ class TestDecode(TestCase):
diff = id(x) - id(y)
self.assertRaises(ValueError, j.scan_once, y, diff)
self.assertRaises(ValueError, j.raw_decode, y, i)
def test_bounded_int(self):
# SJ-PT-23-03, limit quadratic number parsing per Python 3.11
max_str_digits = getattr(sys, 'get_int_max_str_digits', lambda: 4300)()
s = '1' + '0' * (max_str_digits - 1)
self.assertEqual(json.loads(s), int(s))
self.assertRaises(ValueError, json.loads, s + '0')

View file

@ -145,7 +145,7 @@ class TestFail(TestCase):
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1),
('{', "Expecting property name enclosed in double quotes or '}'", 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
@ -156,6 +156,8 @@ class TestFail(TestCase):
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
('[,', "Expecting value", 1),
('--', 'Expecting value', 0),
('"\x18d', "Invalid control character %r", 1),
]
for data, msg, idx in test_cases:
try:

View file

@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf
class TestFloat(TestCase):
def test_degenerates_allow(self):
for inf in (PosInf, NegInf):
self.assertEqual(json.loads(json.dumps(inf)), inf)
self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf)
# Python 2.5 doesn't have math.isnan
nan = json.loads(json.dumps(NaN))
nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True)
self.assertTrue((0 + nan) != nan)
def test_degenerates_ignore(self):
@ -19,6 +19,9 @@ class TestFloat(TestCase):
def test_degenerates_deny(self):
for f in (PosInf, NegInf, NaN):
self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
for s in ('Infinity', '-Infinity', 'NaN'):
self.assertRaises(ValueError, json.loads, s, allow_nan=False)
self.assertRaises(ValueError, json.loads, s)
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100,

View file

@ -132,7 +132,9 @@ class TestScanString(TestCase):
self.assertRaises(ValueError,
scanstring, '\\ud834\\x0123"', 0, None, True)
self.assertRaises(json.JSONDecodeError, scanstring, "\\u-123", 0, None, True)
self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
# SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,

View file

@ -32,7 +32,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Optional, Any, Iterator, Iterable
from typing import Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -45,10 +45,10 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
@ -79,10 +79,10 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
@ -93,10 +93,10 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bool:
"""Match node."""
@ -107,10 +107,10 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Filter list of nodes."""
@ -121,10 +121,10 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
@ -135,11 +135,11 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Select the specified tags."""
@ -150,11 +150,11 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""

View file

@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 0, "final")
__version_info__ = Version(2, 4, 1, "final")
__version__ = __version_info__._get_canonical()

View file

@ -6,7 +6,7 @@ import re
from . import css_types as ct
import unicodedata
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -171,7 +171,7 @@ class _DocumentNav:
def get_children(
self,
el: bs4.Tag,
start: Optional[int] = None,
start: int | None = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
@ -239,22 +239,22 @@ class _DocumentNav:
return parent
@staticmethod
def get_tag_name(el: bs4.Tag) -> Optional[str]:
def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag."""
return cast(Optional[str], el.name)
return cast('str | None', el.name)
@staticmethod
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix."""
return cast(Optional[str], el.prefix)
return cast('str | None', el.prefix)
@staticmethod
def get_uri(el: bs4.Tag) -> Optional[str]:
def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`."""
return cast(Optional[str], el.namespace)
return cast('str | None', el.namespace)
@classmethod
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
@ -287,7 +287,7 @@ class _DocumentNav:
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@ -330,8 +330,8 @@ class _DocumentNav:
cls,
el: bs4.Tag,
name: str,
default: Optional[str | Sequence[str]] = None
) -> Optional[str | Sequence[str]]:
default: str | Sequence[str] | None = None
) -> str | Sequence[str] | None:
"""Get attribute by name."""
value = default
@ -348,7 +348,7 @@ class _DocumentNav:
return value
@classmethod
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
@ -424,10 +424,10 @@ class Inputs:
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value."""
parsed = None # type: Optional[tuple[float, ...]]
parsed = None # type: tuple[float, ...] | None
if value is None:
return value
if itype == "date":
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
self,
selectors: ct.SelectorList,
scope: bs4.Tag,
namespaces: Optional[ct.Namespaces],
namespaces: ct.Namespaces | None,
flags: int
) -> None:
"""Initialize."""
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: bs4.Tag) -> Optional[str]:
def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
self,
el: bs4.Tag,
attr: str,
prefix: Optional[str]
) -> Optional[str | Sequence[str]]:
prefix: str | None
) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists."""
value = None
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*')
)
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag."""
match = True
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
"""Match element if it contains text."""
match = True
content = None # type: Optional[str | Sequence[str]]
content = None # type: str | Sequence[str] | None
for contain_list in contains:
if content is None:
if contain_list.own:
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
match = False
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
if lim < 1:
break
def closest(self) -> Optional[bs4.Tag]:
def closest(self) -> bs4.Tag | None:
"""Match closest ancestor."""
current = self.tag
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
pattern: str
selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces]
namespaces: ct.Namespaces | None
custom: dict[str, str]
flags: int
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
self,
pattern: str,
selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
):
"""Initialize."""

View file

@ -7,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Optional, Match, Any, Iterator, cast
from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -113,7 +113,7 @@ VALUE = r'''
'''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
# Selector patterns
@ -207,8 +207,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(
pattern: str,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
) -> cm.SoupSieve:
"""Cached CSS compile."""
@ -233,7 +233,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
@ -317,7 +317,7 @@ class SelectorPattern:
return self.name
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
return self.re_pattern.match(selector, index)
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]:
self.patterns[pseudo] = pattern
self.matched_name = None # type: Optional[SelectorPattern]
self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str:
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
pseudo = None
@ -372,14 +372,14 @@ class _Selector:
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
@ -462,7 +462,7 @@ class CSSParser:
def __init__(
self,
selector: str,
custom: Optional[dict[str, str | ct.SelectorList]] = None,
custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0
) -> None:
"""Initialize."""

View file

@ -2,7 +2,7 @@
from __future__ import annotations
import copyreg
from .pretty import pretty
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
__all__ = (
'Selector',
@ -189,28 +189,28 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional[SelectorTag]
tag: SelectorTag | None
ids: tuple[str, ...]
classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...]
relation: SelectorList
rel_type: Optional[str]
rel_type: str | None
contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...]
flags: int
def __init__(
self,
tag: Optional[SelectorTag],
tag: SelectorTag | None,
ids: tuple[str, ...],
classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...],
relation: SelectorList,
rel_type: Optional[str],
rel_type: str | None,
contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...],
flags: int
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash")
name: str
prefix: Optional[str]
prefix: str | None
def __init__(self, name: str, prefix: Optional[str]) -> None:
def __init__(self, name: str, prefix: str | None) -> None:
"""Initialize."""
super().__init__(name=name, prefix=prefix)
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None
xml_type_pattern: Pattern[str] | None
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None,
xml_type_pattern: Pattern[str] | None
) -> None:
"""Initialize."""
@ -360,7 +360,7 @@ class SelectorList(Immutable):
def __init__(
self,
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
selectors: Iterable[Selector | SelectorNull] | None = None,
is_not: bool = False,
is_html: bool = False
) -> None:

View file

@ -3,7 +3,7 @@ from __future__ import annotations
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional
from typing import Callable, Any
DEBUG = 0x00001
@ -27,7 +27,7 @@ def lower(string: str) -> str:
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
"""Initialize."""
self.line = None
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
col = 1
text = [] # type: list[str]
line = 1
offset = None # type: Optional[int]
offset = None # type: int | None
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):

View file

@ -1,6 +1,6 @@
# IANA versions like 2020a are not valid PEP 440 identifiers; the recommended
# way to translate the version is to use YYYY.n where `n` is a 0-based index.
__version__ = "2022.7"
__version__ = "2023.3"
# This exposes the original IANA version number.
IANA_VERSION = "2022g"
IANA_VERSION = "2023c"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -238,7 +238,7 @@ SY Syria
SZ Eswatini (Swaziland)
TC Turks & Caicos Is
TD Chad
TF French Southern Territories
TF French S. Terr.
TG Togo
TH Thailand
TJ Tajikistan

View file

@ -72,11 +72,11 @@ Leap 2016 Dec 31 23:59:60 + S
# Any additional leap seconds will come after this.
# This Expires line is commented out for now,
# so that pre-2020a zic implementations do not reject this file.
#Expires 2023 Jun 28 00:00:00
#Expires 2023 Dec 28 00:00:00
# POSIX timestamps for the data in this file:
#updated 1467936000 (2016-07-08 00:00:00 UTC)
#expires 1687910400 (2023-06-28 00:00:00 UTC)
#expires 1703721600 (2023-12-28 00:00:00 UTC)
# Updated through IERS Bulletin C64
# File expires on: 28 June 2023
# Updated through IERS Bulletin C65
# File expires on: 28 December 2023

View file

@ -1,4 +1,4 @@
# version 2022g
# version 2023c
# This zic input file is in the public domain.
R d 1916 o - Jun 14 23s 1 S
R d 1916 1919 - O Su>=1 23s 0 -
@ -75,6 +75,8 @@ R K 2014 o - May 15 24 1 S
R K 2014 o - Jun 26 24 0 -
R K 2014 o - Jul 31 24 1 S
R K 2014 o - S lastTh 24 0 -
R K 2023 ma - Ap lastF 0 1 S
R K 2023 ma - O lastTh 24 0 -
Z Africa/Cairo 2:5:9 - LMT 1900 O
2 K EE%sT
Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
@ -172,7 +174,7 @@ R M 2021 o - May 16 2 0 -
R M 2022 o - Mar 27 3 -1 -
R M 2022 o - May 8 2 0 -
R M 2023 o - Mar 19 3 -1 -
R M 2023 o - Ap 30 2 0 -
R M 2023 o - Ap 23 2 0 -
R M 2024 o - Mar 10 3 -1 -
R M 2024 o - Ap 14 2 0 -
R M 2025 o - F 23 3 -1 -
@ -188,7 +190,7 @@ R M 2029 o - F 18 2 0 -
R M 2029 o - D 30 3 -1 -
R M 2030 o - F 10 2 0 -
R M 2030 o - D 22 3 -1 -
R M 2031 o - F 2 2 0 -
R M 2031 o - Ja 26 2 0 -
R M 2031 o - D 14 3 -1 -
R M 2032 o - Ja 18 2 0 -
R M 2032 o - N 28 3 -1 -
@ -204,7 +206,7 @@ R M 2036 o - N 23 2 0 -
R M 2037 o - O 4 3 -1 -
R M 2037 o - N 15 2 0 -
R M 2038 o - S 26 3 -1 -
R M 2038 o - N 7 2 0 -
R M 2038 o - O 31 2 0 -
R M 2039 o - S 18 3 -1 -
R M 2039 o - O 23 2 0 -
R M 2040 o - S 2 3 -1 -
@ -220,7 +222,7 @@ R M 2044 o - Au 28 2 0 -
R M 2045 o - Jul 9 3 -1 -
R M 2045 o - Au 20 2 0 -
R M 2046 o - Jul 1 3 -1 -
R M 2046 o - Au 12 2 0 -
R M 2046 o - Au 5 2 0 -
R M 2047 o - Jun 23 3 -1 -
R M 2047 o - Jul 28 2 0 -
R M 2048 o - Jun 7 3 -1 -
@ -236,7 +238,7 @@ R M 2052 o - Jun 2 2 0 -
R M 2053 o - Ap 13 3 -1 -
R M 2053 o - May 25 2 0 -
R M 2054 o - Ap 5 3 -1 -
R M 2054 o - May 17 2 0 -
R M 2054 o - May 10 2 0 -
R M 2055 o - Mar 28 3 -1 -
R M 2055 o - May 2 2 0 -
R M 2056 o - Mar 12 3 -1 -
@ -252,7 +254,7 @@ R M 2060 o - Mar 7 2 0 -
R M 2061 o - Ja 16 3 -1 -
R M 2061 o - F 27 2 0 -
R M 2062 o - Ja 8 3 -1 -
R M 2062 o - F 19 2 0 -
R M 2062 o - F 12 2 0 -
R M 2062 o - D 31 3 -1 -
R M 2063 o - F 4 2 0 -
R M 2063 o - D 16 3 -1 -
@ -268,7 +270,7 @@ R M 2067 o - D 11 2 0 -
R M 2068 o - O 21 3 -1 -
R M 2068 o - D 2 2 0 -
R M 2069 o - O 13 3 -1 -
R M 2069 o - N 24 2 0 -
R M 2069 o - N 17 2 0 -
R M 2070 o - O 5 3 -1 -
R M 2070 o - N 9 2 0 -
R M 2071 o - S 20 3 -1 -
@ -284,7 +286,7 @@ R M 2075 o - S 15 2 0 -
R M 2076 o - Jul 26 3 -1 -
R M 2076 o - S 6 2 0 -
R M 2077 o - Jul 18 3 -1 -
R M 2077 o - Au 29 2 0 -
R M 2077 o - Au 22 2 0 -
R M 2078 o - Jul 10 3 -1 -
R M 2078 o - Au 14 2 0 -
R M 2079 o - Jun 25 3 -1 -
@ -294,13 +296,13 @@ R M 2080 o - Jul 21 2 0 -
R M 2081 o - Jun 1 3 -1 -
R M 2081 o - Jul 13 2 0 -
R M 2082 o - May 24 3 -1 -
R M 2082 o - Jul 5 2 0 -
R M 2082 o - Jun 28 2 0 -
R M 2083 o - May 16 3 -1 -
R M 2083 o - Jun 20 2 0 -
R M 2084 o - Ap 30 3 -1 -
R M 2084 o - Jun 11 2 0 -
R M 2085 o - Ap 22 3 -1 -
R M 2085 o - Jun 3 2 0 -
R M 2085 o - May 27 2 0 -
R M 2086 o - Ap 14 3 -1 -
R M 2086 o - May 19 2 0 -
R M 2087 o - Mar 30 3 -1 -
@ -997,8 +999,86 @@ R P 2020 2021 - Mar Sa<=30 0 1 S
R P 2020 o - O 24 1 0 -
R P 2021 o - O 29 1 0 -
R P 2022 o - Mar 27 0 1 S
R P 2022 ma - O Sa<=30 2 0 -
R P 2023 ma - Mar Sa<=30 2 1 S
R P 2022 2035 - O Sa<=30 2 0 -
R P 2023 o - Ap 29 2 1 S
R P 2024 o - Ap 13 2 1 S
R P 2025 o - Ap 5 2 1 S
R P 2026 2054 - Mar Sa<=30 2 1 S
R P 2036 o - O 18 2 0 -
R P 2037 o - O 10 2 0 -
R P 2038 o - S 25 2 0 -
R P 2039 o - S 17 2 0 -
R P 2039 o - O 22 2 1 S
R P 2039 2067 - O Sa<=30 2 0 -
R P 2040 o - S 1 2 0 -
R P 2040 o - O 13 2 1 S
R P 2041 o - Au 24 2 0 -
R P 2041 o - S 28 2 1 S
R P 2042 o - Au 16 2 0 -
R P 2042 o - S 20 2 1 S
R P 2043 o - Au 1 2 0 -
R P 2043 o - S 12 2 1 S
R P 2044 o - Jul 23 2 0 -
R P 2044 o - Au 27 2 1 S
R P 2045 o - Jul 15 2 0 -
R P 2045 o - Au 19 2 1 S
R P 2046 o - Jun 30 2 0 -
R P 2046 o - Au 11 2 1 S
R P 2047 o - Jun 22 2 0 -
R P 2047 o - Jul 27 2 1 S
R P 2048 o - Jun 6 2 0 -
R P 2048 o - Jul 18 2 1 S
R P 2049 o - May 29 2 0 -
R P 2049 o - Jul 3 2 1 S
R P 2050 o - May 21 2 0 -
R P 2050 o - Jun 25 2 1 S
R P 2051 o - May 6 2 0 -
R P 2051 o - Jun 17 2 1 S
R P 2052 o - Ap 27 2 0 -
R P 2052 o - Jun 1 2 1 S
R P 2053 o - Ap 12 2 0 -
R P 2053 o - May 24 2 1 S
R P 2054 o - Ap 4 2 0 -
R P 2054 o - May 16 2 1 S
R P 2055 o - May 1 2 1 S
R P 2056 o - Ap 22 2 1 S
R P 2057 o - Ap 7 2 1 S
R P 2058 ma - Mar Sa<=30 2 1 S
R P 2068 o - O 20 2 0 -
R P 2069 o - O 12 2 0 -
R P 2070 o - O 4 2 0 -
R P 2071 o - S 19 2 0 -
R P 2072 o - S 10 2 0 -
R P 2072 o - O 15 2 1 S
R P 2073 o - S 2 2 0 -
R P 2073 o - O 7 2 1 S
R P 2074 o - Au 18 2 0 -
R P 2074 o - S 29 2 1 S
R P 2075 o - Au 10 2 0 -
R P 2075 o - S 14 2 1 S
R P 2075 ma - O Sa<=30 2 0 -
R P 2076 o - Jul 25 2 0 -
R P 2076 o - S 5 2 1 S
R P 2077 o - Jul 17 2 0 -
R P 2077 o - Au 28 2 1 S
R P 2078 o - Jul 9 2 0 -
R P 2078 o - Au 13 2 1 S
R P 2079 o - Jun 24 2 0 -
R P 2079 o - Au 5 2 1 S
R P 2080 o - Jun 15 2 0 -
R P 2080 o - Jul 20 2 1 S
R P 2081 o - Jun 7 2 0 -
R P 2081 o - Jul 12 2 1 S
R P 2082 o - May 23 2 0 -
R P 2082 o - Jul 4 2 1 S
R P 2083 o - May 15 2 0 -
R P 2083 o - Jun 19 2 1 S
R P 2084 o - Ap 29 2 0 -
R P 2084 o - Jun 10 2 1 S
R P 2085 o - Ap 21 2 0 -
R P 2085 o - Jun 2 2 1 S
R P 2086 o - Ap 13 2 0 -
R P 2086 o - May 18 2 1 S
Z Asia/Gaza 2:17:52 - LMT 1900 O
2 Z EET/EEST 1948 May 15
2 K EE%sT 1967 Jun 5
@ -1754,8 +1834,8 @@ Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
-1 E -01/+00
Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
-3 - -03 1980 Ap 6 2
-3 E -03/-02 2023 Mar 25 22
-2 - -02
-3 E -03/-02 2023 O 29 1u
-2 E -02/-01
Z America/Thule -4:35:8 - LMT 1916 Jul 28
-4 Th A%sT
Z Europe/Tallinn 1:39 - LMT 1880
@ -2175,13 +2255,13 @@ Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
3 - +03 1930 Jun 21
4 - +04 1961 N 11
4 R +04/+05 1988 Mar 27 2s
3 R +03/+04 1991 Mar 31 2s
3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s
4 - +04 2014 O 26 2s
3 - +03 2018 O 28 2s
3 R MSK/MSD 2011 Mar 27 2s
4 - MSK 2014 O 26 2s
3 - MSK 2018 O 28 2s
4 - +04 2020 D 27 2s
3 - +03
3 - MSK
Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 R +04/+05 1988 Mar 27 2s
@ -2194,11 +2274,11 @@ Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 R +04/+05 1989 Mar 26 2s
3 R +03/+04 1991 Mar 31 2s
3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s
4 - +04 2014 O 26 2s
3 - +03
3 R MSK/MSD 2011 Mar 27 2s
4 - MSK 2014 O 26 2s
3 - MSK
Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 - +04 1935 Ja 27
@ -3070,9 +3150,6 @@ Z America/Cambridge_Bay 0 - -00 1920
-5 - EST 2000 N 5
-6 - CST 2001 Ap 1 3
-7 C M%sT
Z America/Yellowknife 0 - -00 1935
-7 Y M%sT 1980
-7 C M%sT
Z America/Inuvik 0 - -00 1953
-8 Y P%sT 1979 Ap lastSu 2
-7 Y M%sT 1980
@ -4171,6 +4248,7 @@ L America/Argentina/Cordoba America/Rosario
L America/Tijuana America/Santa_Isabel
L America/Denver America/Shiprock
L America/Toronto America/Thunder_Bay
L America/Edmonton America/Yellowknife
L Pacific/Auckland Antarctica/South_Pole
L Asia/Shanghai Asia/Chongqing
L Asia/Shanghai Asia/Harbin

View file

@ -121,9 +121,8 @@ CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +4906-11631 America/Creston MST - BC (Creston)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
@ -139,7 +138,7 @@ CG -0416+01517 Africa/Brazzaville
CH +4723+00832 Europe/Zurich
CI +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas)
CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island
CM +0403+00942 Africa/Douala
@ -151,10 +150,10 @@ CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde
CW +1211-06900 America/Curacao
CX -1025+10543 Indian/Christmas
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ +5005+01426 Europe/Prague
DE +5230+01322 Europe/Berlin Germany (most areas)
DE +5230+01322 Europe/Berlin most of Germany
DE +4742+00841 Europe/Busingen Busingen
DJ +1136+04309 Africa/Djibouti
DK +5540+01235 Europe/Copenhagen
@ -187,7 +186,7 @@ GF +0456-05220 America/Cayenne
GG +492717-0023210 Europe/Guernsey
GH +0533-00013 Africa/Accra
GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas)
GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik
@ -235,7 +234,7 @@ KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul
KW +2920+04759 Asia/Kuwait
KY +1918-08123 America/Cayman
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe
@ -259,12 +258,12 @@ MD +4700+02850 Europe/Chisinau
ME +4226+01916 Europe/Podgorica
MF +1804-06305 America/Marigot
MG -1855+04731 Indian/Antananarivo
MH +0709+17112 Pacific/Majuro Marshall Islands (most areas)
MH +0709+17112 Pacific/Majuro most of Marshall Islands
MH +0905+16720 Pacific/Kwajalein Kwajalein
MK +4159+02126 Europe/Skopje
ML +1239-00800 Africa/Bamako
MM +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar
MO +221150+1133230 Asia/Macau
@ -302,7 +301,7 @@ NO +5955+01045 Europe/Oslo
NP +2743+08519 Asia/Kathmandu
NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue
NZ -3652+17446 Pacific/Auckland New Zealand (most areas)
NZ -3652+17446 Pacific/Auckland most of New Zealand
NZ -4357-17633 Pacific/Chatham Chatham Islands
OM +2336+05835 Asia/Muscat
PA +0858-07932 America/Panama
@ -310,7 +309,7 @@ PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands
PF -2308-13457 Pacific/Gambier Gambier Islands
PG -0930+14710 Pacific/Port_Moresby Papua New Guinea (most areas)
PG -0930+14710 Pacific/Port_Moresby most of Papua New Guinea
PG -0613+15534 Pacific/Bougainville Bougainville
PH +1435+12100 Asia/Manila
PK +2452+06703 Asia/Karachi
@ -356,7 +355,7 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
RW -0157+03004 Africa/Kigali
@ -397,7 +396,7 @@ TT +1039-06131 America/Port_of_Spain
TV -0831+17913 Pacific/Funafuti
TW +2503+12130 Asia/Taipei
TZ -0648+03917 Africa/Dar_es_Salaam
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
UA +5026+03031 Europe/Kyiv most of Ukraine
UG +0019+03225 Africa/Kampala
UM +2813-17722 Pacific/Midway Midway Islands
UM +1917+16637 Pacific/Wake Wake Island
@ -420,7 +419,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US +332654-1120424 America/Phoenix MST - Arizona (except Navajo)
US +332654-1120424 America/Phoenix MST - AZ (except Navajo)
US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area
@ -428,7 +427,7 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands
US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)

View file

@ -18,7 +18,10 @@
# Please see the theory.html file for how these names are chosen.
# If multiple timezones overlap a country, each has a row in the
# table, with each column 1 containing the country code.
# 4. Comments; present if and only if a country has multiple timezones.
# 4. Comments; present if and only if countries have multiple timezones,
# and useful only for those countries. For example, the comments
# for the row with countries CH,DE,LI and name Europe/Zurich
# are useful only for DE, since CH and LI have no other timezones.
#
# If a timezone covers multiple countries, the most-populous city is used,
# and that country is listed first in column 1; any other countries
@ -34,7 +37,7 @@
#country-
#codes coordinates TZ comments
AD +4230+00131 Europe/Andorra
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai UAE, Oman, Réunion, Seychelles, Crozet, Scattered Is
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai Crozet, Scattered Is
AF +3431+06912 Asia/Kabul
AL +4120+01950 Europe/Tirane
AM +4011+04430 Asia/Yerevan
@ -45,7 +48,7 @@ AQ -6448-06406 Antarctica/Palmer Palmer
AQ -6734-06808 Antarctica/Rothera Rothera
AQ -720041+0023206 Antarctica/Troll Troll
AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF)
AR -3124-06411 America/Argentina/Cordoba Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
AR -3124-06411 America/Argentina/Cordoba most areas: CB, CC, CN, ER, FM, MN, SE, SF
AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN)
AR -2411-06518 America/Argentina/Jujuy Jujuy (JY)
AR -2649-06513 America/Argentina/Tucuman Tucumán (TM)
@ -56,7 +59,7 @@ AR -3253-06849 America/Argentina/Mendoza Mendoza (MZ)
AR -3319-06621 America/Argentina/San_Luis San Luis (SL)
AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC)
AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF)
AS,UM -1416-17042 Pacific/Pago_Pago Samoa, Midway
AS,UM -1416-17042 Pacific/Pago_Pago Midway
AT +4813+01620 Europe/Vienna
AU -3133+15905 Australia/Lord_Howe Lord Howe Island
AU -5430+15857 Antarctica/Macquarie Macquarie Island
@ -101,26 +104,25 @@ CA +4439-06336 America/Halifax Atlantic - NS (most areas); PE
CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton)
CA +4606-06447 America/Moncton Atlantic - New Brunswick
CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas)
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas), Bahamas
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas)
CA +6344-06828 America/Iqaluit Eastern - NU (most areas)
CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba
CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson)
CA +6043-13503 America/Whitehorse MST - Yukon (east)
CA +6404-13925 America/Dawson MST - Yukon (west)
CA +4916-12307 America/Vancouver Pacific - BC (most areas)
CH,DE,LI +4723+00832 Europe/Zurich Swiss time
CH,DE,LI +4723+00832 Europe/Zurich Büsingen
CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas)
CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island
CN +3114+12128 Asia/Shanghai Beijing Time
@ -129,10 +131,10 @@ CO +0436-07405 America/Bogota
CR +0956-08405 America/Costa_Rica
CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ,SK +5005+01426 Europe/Prague
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin Germany (most areas), Scandinavia
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin most of Germany
DO +1828-06954 America/Santo_Domingo
DZ +3647+00303 Africa/Algiers
EC -0210-07950 America/Guayaquil Ecuador (mainland)
@ -153,7 +155,7 @@ GB,GG,IM,JE +513030-0000731 Europe/London
GE +4143+04449 Asia/Tbilisi
GF +0456-05220 America/Cayenne
GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas)
GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik
@ -183,12 +185,12 @@ JO +3157+03556 Asia/Amman
JP +353916+1394441 Asia/Tokyo
KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi
KG +4254+07436 Asia/Bishkek
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Tuvalu, Wallis & Futuna, Wake
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Wake
KI -0247-17143 Pacific/Kanton Phoenix Islands
KI +0152-15720 Pacific/Kiritimati Line Islands
KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe
@ -205,14 +207,14 @@ MA +3339-00735 Africa/Casablanca
MD +4700+02850 Europe/Chisinau
MH +0905+16720 Pacific/Kwajalein Kwajalein
MM,CC +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar
MO +221150+1133230 Asia/Macau
MQ +1436-06105 America/Martinique
MT +3554+01431 Europe/Malta
MU -2010+05730 Indian/Mauritius
MV,TF +0410+07330 Indian/Maldives Maldives, Kerguelen, St Paul I, Amsterdam I
MV,TF +0410+07330 Indian/Maldives Kerguelen, St Paul I, Amsterdam I
MX +1924-09909 America/Mexico_City Central Mexico
MX +2105-08646 America/Cancun Quintana Roo
MX +2058-08937 America/Merida Campeche, Yucatán
@ -225,7 +227,7 @@ MX +2313-10625 America/Mazatlan Baja California Sur, Nayarit (most areas), Sinal
MX +2048-10515 America/Bahia_Banderas Bahía de Banderas
MX +2904-11058 America/Hermosillo Sonora
MX +3232-11701 America/Tijuana Baja California
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak, Brunei
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak
MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time
NA -2234+01706 Africa/Windhoek
NC -2216+16627 Pacific/Noumea
@ -237,7 +239,7 @@ NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue
NZ,AQ -3652+17446 Pacific/Auckland New Zealand time
NZ -4357-17633 Pacific/Chatham Chatham Islands
PA,CA,KY +0858-07932 America/Panama EST - Panama, Cayman, ON (Atikokan), NU (Coral H)
PA,CA,KY +0858-07932 America/Panama EST - ON (Atikokan), NU (Coral H)
PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands
@ -285,13 +287,13 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Arabia, Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Solomons, Pohnpei
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Pohnpei
SD +1536+03232 Africa/Khartoum
SG,MY +0117+10351 Asia/Singapore Singapore, peninsular Malaysia
SG,MY +0117+10351 Asia/Singapore peninsular Malaysia
SR +0550-05510 America/Paramaribo
SS +0451+03137 Africa/Juba
ST +0020+00644 Africa/Sao_Tome
@ -299,7 +301,7 @@ SV +1342-08912 America/El_Salvador
SY +3330+03618 Asia/Damascus
TC +2128-07108 America/Grand_Turk
TD +1207+01503 Africa/Ndjamena
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok Indochina (most areas)
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok north Vietnam
TJ +3835+06848 Asia/Dushanbe
TK -0922-17114 Pacific/Fakaofo
TL -0833+12535 Asia/Dili
@ -308,7 +310,7 @@ TN +3648+01011 Africa/Tunis
TO -210800-1751200 Pacific/Tongatapu
TR +4101+02858 Europe/Istanbul
TW +2503+12130 Asia/Taipei
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
UA +5026+03031 Europe/Kyiv most of Ukraine
US +404251-0740023 America/New_York Eastern (most areas)
US +421953-0830245 America/Detroit Eastern - MI (most areas)
US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area)
@ -328,7 +330,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US,CA +332654-1120424 America/Phoenix MST - Arizona (except Navajo), Creston BC
US,CA +332654-1120424 America/Phoenix MST - AZ (most areas), Creston BC
US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area
@ -336,13 +338,13 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands
US,UM +211825-1575130 Pacific/Honolulu Hawaii
US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
UZ +4120+06918 Asia/Tashkent Uzbekistan (east)
VE +1030-06656 America/Caracas
VN +1045+10640 Asia/Ho_Chi_Minh Vietnam (south)
VN +1045+10640 Asia/Ho_Chi_Minh south Vietnam
VU -1740+16825 Pacific/Efate
WS -1350-17144 Pacific/Apia
ZA,LS,SZ -2615+02800 Africa/Johannesburg

View file

@ -243,7 +243,6 @@ America/Iqaluit
America/Resolute
America/Rankin_Inlet
America/Cambridge_Bay
America/Yellowknife
America/Inuvik
America/Whitehorse
America/Dawson
@ -561,6 +560,7 @@ America/Rosario
America/Santa_Isabel
America/Shiprock
America/Thunder_Bay
America/Yellowknife
Antarctica/South_Pole
Asia/Chongqing
Asia/Harbin

View file

@ -1,23 +1,48 @@
"""
Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
"""
from __future__ import absolute_import
from __future__ import annotations
# Set default logging handler to avoid "No handler found" warnings.
import logging
import typing
import warnings
from logging import NullHandler
from . import exceptions
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from ._version import __version__
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
from .filepost import encode_multipart_formdata
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
from .response import BaseHTTPResponse, HTTPResponse
from .util.request import make_headers
from .util.retry import Retry
from .util.timeout import Timeout
from .util.url import get_host
# Ensure that Python is compiled with OpenSSL 1.1.1+
# If the 'ssl' module isn't available at all that's
# fine, we only care if the module is available.
try:
import ssl
except ImportError:
pass
else:
if not ssl.OPENSSL_VERSION.startswith("OpenSSL "): # Defensive:
warnings.warn(
"urllib3 v2.0 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/3020",
exceptions.NotOpenSSLWarning,
)
elif ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # Defensive:
raise ImportError(
"urllib3 v2.0 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/2168"
)
# === NOTE TO REPACKAGERS AND VENDORS ===
# Please delete this block, this logic is only
@ -25,12 +50,12 @@ from .util.url import get_host
# See: https://github.com/urllib3/urllib3/issues/2680
try:
import urllib3_secure_extra # type: ignore # noqa: F401
except ImportError:
except ModuleNotFoundError:
pass
else:
warnings.warn(
"'urllib3[secure]' extra is deprecated and will be removed "
"in a future release of urllib3 2.x. Read more in this issue: "
"in urllib3 v2.1.0. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2680",
category=DeprecationWarning,
stacklevel=2,
@ -42,6 +67,7 @@ __version__ = __version__
__all__ = (
"HTTPConnectionPool",
"HTTPHeaderDict",
"HTTPSConnectionPool",
"PoolManager",
"ProxyManager",
@ -52,15 +78,18 @@ __all__ = (
"connection_from_url",
"disable_warnings",
"encode_multipart_formdata",
"get_host",
"make_headers",
"proxy_from_url",
"request",
"BaseHTTPResponse",
)
logging.getLogger(__name__).addHandler(NullHandler())
def add_stderr_logger(level=logging.DEBUG):
def add_stderr_logger(
level: int = logging.DEBUG,
) -> logging.StreamHandler[typing.TextIO]:
"""
Helper for quickly adding a StreamHandler to the logger. Useful for
debugging.
@ -87,16 +116,51 @@ del NullHandler
# mechanisms to silence them.
# SecurityWarning's always go off by default.
warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
# SubjectAltNameWarning's should go off once per host
warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
# SNIMissingWarnings should go off only once.
warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
def disable_warnings(category=exceptions.HTTPWarning):
def disable_warnings(category: type[Warning] = exceptions.HTTPWarning) -> None:
"""
Helper for quickly disabling all urllib3 warnings.
"""
warnings.simplefilter("ignore", category)
_DEFAULT_POOL = PoolManager()
def request(
method: str,
url: str,
*,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
preload_content: bool | None = True,
decode_content: bool | None = True,
redirect: bool | None = True,
retries: Retry | bool | int | None = None,
timeout: Timeout | float | int | None = 3,
json: typing.Any | None = None,
) -> BaseHTTPResponse:
"""
A convenience, top-level request method. It uses a module-global ``PoolManager`` instance.
Therefore, its side effects could be shared across dependencies relying on it.
To avoid side effects create a new ``PoolManager`` instance and use it instead.
The method does not accept low-level ``**urlopen_kw`` keyword arguments.
"""
return _DEFAULT_POOL.request(
method,
url,
body=body,
fields=fields,
headers=headers,
preload_content=preload_content,
decode_content=decode_content,
redirect=redirect,
retries=retries,
timeout=timeout,
json=json,
)

View file

@ -0,0 +1,173 @@
from __future__ import annotations
import typing
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from .util.url import Url
_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str]
class ProxyConfig(typing.NamedTuple):
ssl_context: ssl.SSLContext | None
use_forwarding_for_https: bool
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
class _ResponseOptions(typing.NamedTuple):
# TODO: Remove this in favor of a better
# HTTP request/response lifecycle tracking.
request_method: str
request_url: str
preload_content: bool
decode_content: bool
enforce_content_length: bool
if typing.TYPE_CHECKING:
import ssl
from typing_extensions import Literal, Protocol
from .response import BaseHTTPResponse
class BaseHTTPConnection(Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
host: str
port: int
timeout: None | (
float
) # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved.
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool
proxy_is_verified: bool | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
...
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
...
def connect(self) -> None:
...
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
...
def getresponse(self) -> BaseHTTPResponse:
...
def close(self) -> None:
...
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
class BaseHTTPSConnection(BaseHTTPConnection, Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
# Certificate verification methods
cert_reqs: int | str | None
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
ssl_context: ssl.SSLContext | None
# Trusted CAs
ca_certs: str | None
ca_cert_dir: str | None
ca_cert_data: None | str | bytes
# TLS version
ssl_minimum_version: int | None
ssl_maximum_version: int | None
ssl_version: int | str | None # Deprecated
# Client certificates
cert_file: str | None
key_file: str | None
key_password: str | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
...

View file

@ -1,34 +1,66 @@
from __future__ import absolute_import
try:
from collections.abc import Mapping, MutableMapping
except ImportError:
from collections import Mapping, MutableMapping
try:
from threading import RLock
except ImportError: # Platform-specific: No threads available
class RLock:
def __enter__(self):
pass
def __exit__(self, exc_type, exc_value, traceback):
pass
from __future__ import annotations
import typing
from collections import OrderedDict
from enum import Enum, auto
from threading import RLock
if typing.TYPE_CHECKING:
# We can only import Protocol if TYPE_CHECKING because it's a development
# dependency, and is not available at runtime.
from typing_extensions import Protocol
class HasGettableStringKeys(Protocol):
def keys(self) -> typing.Iterator[str]:
...
def __getitem__(self, key: str) -> str:
...
from .exceptions import InvalidHeader
from .packages import six
from .packages.six import iterkeys, itervalues
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
_Null = object()
# Key type
_KT = typing.TypeVar("_KT")
# Value type
_VT = typing.TypeVar("_VT")
# Default type
_DT = typing.TypeVar("_DT")
ValidHTTPHeaderSource = typing.Union[
"HTTPHeaderDict",
typing.Mapping[str, str],
typing.Iterable[typing.Tuple[str, str]],
"HasGettableStringKeys",
]
class RecentlyUsedContainer(MutableMapping):
class _Sentinel(Enum):
not_passed = auto()
def ensure_can_construct_http_header_dict(
potential: object,
) -> ValidHTTPHeaderSource | None:
if isinstance(potential, HTTPHeaderDict):
return potential
elif isinstance(potential, typing.Mapping):
# Full runtime checking of the contents of a Mapping is expensive, so for the
# purposes of typechecking, we assume that any Mapping is the right shape.
return typing.cast(typing.Mapping[str, str], potential)
elif isinstance(potential, typing.Iterable):
# Similarly to Mapping, full runtime checking of the contents of an Iterable is
# expensive, so for the purposes of typechecking, we assume that any Iterable
# is the right shape.
return typing.cast(typing.Iterable[typing.Tuple[str, str]], potential)
elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"):
return typing.cast("HasGettableStringKeys", potential)
else:
return None
class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]):
"""
Provides a thread-safe dict-like container which maintains up to
``maxsize`` keys while throwing away the least-recently-used keys beyond
@ -42,69 +74,134 @@ class RecentlyUsedContainer(MutableMapping):
``dispose_func(value)`` is called. Callback which will get called
"""
ContainerCls = OrderedDict
_container: typing.OrderedDict[_KT, _VT]
_maxsize: int
dispose_func: typing.Callable[[_VT], None] | None
lock: RLock
def __init__(self, maxsize=10, dispose_func=None):
def __init__(
self,
maxsize: int = 10,
dispose_func: typing.Callable[[_VT], None] | None = None,
) -> None:
super().__init__()
self._maxsize = maxsize
self.dispose_func = dispose_func
self._container = self.ContainerCls()
self._container = OrderedDict()
self.lock = RLock()
def __getitem__(self, key):
def __getitem__(self, key: _KT) -> _VT:
# Re-insert the item, moving it to the end of the eviction line.
with self.lock:
item = self._container.pop(key)
self._container[key] = item
return item
def __setitem__(self, key, value):
evicted_value = _Null
def __setitem__(self, key: _KT, value: _VT) -> None:
evicted_item = None
with self.lock:
# Possibly evict the existing value of 'key'
evicted_value = self._container.get(key, _Null)
try:
# If the key exists, we'll overwrite it, which won't change the
# size of the pool. Because accessing a key should move it to
# the end of the eviction line, we pop it out first.
evicted_item = key, self._container.pop(key)
self._container[key] = value
except KeyError:
# When the key does not exist, we insert the value first so that
# evicting works in all cases, including when self._maxsize is 0
self._container[key] = value
# If we didn't evict an existing value, we might have to evict the
# least recently used item from the beginning of the container.
if len(self._container) > self._maxsize:
_key, evicted_value = self._container.popitem(last=False)
# If we didn't evict an existing value, and we've hit our maximum
# size, then we have to evict the least recently used item from
# the beginning of the container.
evicted_item = self._container.popitem(last=False)
if self.dispose_func and evicted_value is not _Null:
# After releasing the lock on the pool, dispose of any evicted value.
if evicted_item is not None and self.dispose_func:
_, evicted_value = evicted_item
self.dispose_func(evicted_value)
def __delitem__(self, key):
def __delitem__(self, key: _KT) -> None:
with self.lock:
value = self._container.pop(key)
if self.dispose_func:
self.dispose_func(value)
def __len__(self):
def __len__(self) -> int:
with self.lock:
return len(self._container)
def __iter__(self):
def __iter__(self) -> typing.NoReturn:
raise NotImplementedError(
"Iteration over this class is unlikely to be threadsafe."
)
def clear(self):
def clear(self) -> None:
with self.lock:
# Copy pointers to all values, then wipe the mapping
values = list(itervalues(self._container))
values = list(self._container.values())
self._container.clear()
if self.dispose_func:
for value in values:
self.dispose_func(value)
def keys(self):
def keys(self) -> set[_KT]: # type: ignore[override]
with self.lock:
return list(iterkeys(self._container))
return set(self._container.keys())
class HTTPHeaderDict(MutableMapping):
class HTTPHeaderDictItemView(typing.Set[typing.Tuple[str, str]]):
"""
HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of
address.
If we directly try to get an item with a particular name, we will get a string
back that is the concatenated version of all the values:
>>> d['X-Header-Name']
'Value1, Value2, Value3'
However, if we iterate over an HTTPHeaderDict's items, we will optionally combine
these values based on whether combine=True was called when building up the dictionary
>>> d = HTTPHeaderDict({"A": "1", "B": "foo"})
>>> d.add("A", "2", combine=True)
>>> d.add("B", "bar")
>>> list(d.items())
[
('A', '1, 2'),
('B', 'foo'),
('B', 'bar'),
]
This class conforms to the interface required by the MutableMapping ABC while
also giving us the nonstandard iteration behavior we want; items with duplicate
keys, ordered by time of first insertion.
"""
_headers: HTTPHeaderDict
def __init__(self, headers: HTTPHeaderDict) -> None:
self._headers = headers
def __len__(self) -> int:
return len(list(self._headers.iteritems()))
def __iter__(self) -> typing.Iterator[tuple[str, str]]:
return self._headers.iteritems()
def __contains__(self, item: object) -> bool:
if isinstance(item, tuple) and len(item) == 2:
passed_key, passed_val = item
if isinstance(passed_key, str) and isinstance(passed_val, str):
return self._headers._has_value_for_header(passed_key, passed_val)
return False
class HTTPHeaderDict(typing.MutableMapping[str, str]):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
@ -138,9 +235,11 @@ class HTTPHeaderDict(MutableMapping):
'7'
"""
def __init__(self, headers=None, **kwargs):
super(HTTPHeaderDict, self).__init__()
self._container = OrderedDict()
_container: typing.MutableMapping[str, list[str]]
def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str):
super().__init__()
self._container = {} # 'dict' is insert-ordered in Python 3.7+
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
@ -149,123 +248,147 @@ class HTTPHeaderDict(MutableMapping):
if kwargs:
self.extend(kwargs)
def __setitem__(self, key, val):
def __setitem__(self, key: str, val: str) -> None:
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
self._container[key.lower()] = [key, val]
return self._container[key.lower()]
def __getitem__(self, key):
def __getitem__(self, key: str) -> str:
val = self._container[key.lower()]
return ", ".join(val[1:])
def __delitem__(self, key):
def __delitem__(self, key: str) -> None:
del self._container[key.lower()]
def __contains__(self, key):
def __contains__(self, key: object) -> bool:
if isinstance(key, str):
return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, "keys"):
return False
if not isinstance(other, type(self)):
other = type(self)(other)
return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
(k.lower(), v) for k, v in other.itermerged()
)
def __ne__(self, other):
def setdefault(self, key: str, default: str = "") -> str:
return super().setdefault(key, default)
def __eq__(self, other: object) -> bool:
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return False
else:
other_as_http_header_dict = type(self)(maybe_constructable)
return {k.lower(): v for k, v in self.itermerged()} == {
k.lower(): v for k, v in other_as_http_header_dict.itermerged()
}
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
if six.PY2: # Python 2
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
def __len__(self):
def __len__(self) -> int:
return len(self._container)
def __iter__(self):
def __iter__(self) -> typing.Iterator[str]:
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def pop(self, key, default=__marker):
"""D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
"""
# Using the MutableMapping function directly fails due to the private marker.
# Using ordinary dict.pop would expose the internal structures.
# So let's reinvent the wheel.
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def discard(self, key):
def discard(self, key: str) -> None:
try:
del self[key]
except KeyError:
pass
def add(self, key, val):
def add(self, key: str, val: str, *, combine: bool = False) -> None:
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
If this is called with combine=True, instead of adding a new header value
as a distinct item during iteration, this will instead append the value to
any existing header value with a comma. If no existing header value exists
for the key, then the value will simply be added, ignoring the combine parameter.
>>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz')
>>> headers['foo']
'bar, baz'
>>> list(headers.items())
[('foo', 'bar'), ('foo', 'baz')]
>>> headers.add('foo', 'quz', combine=True)
>>> list(headers.items())
[('foo', 'bar, baz, quz')]
"""
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
key_lower = key.lower()
new_vals = [key, val]
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# if there are values here, then there is at least the initial
# key/value pair
assert len(vals) >= 2
if combine:
vals[-1] = vals[-1] + ", " + val
else:
vals.append(val)
def extend(self, *args, **kwargs):
def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None:
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError(
"extend() takes at most 1 positional "
"arguments ({0} given)".format(len(args))
f"extend() takes at most 1 positional arguments ({len(args)} given)"
)
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, Mapping):
for key in other:
self.add(key, other[key])
elif hasattr(other, "keys"):
for key in other.keys():
self.add(key, other[key])
else:
elif isinstance(other, typing.Mapping):
for key, val in other.items():
self.add(key, val)
elif isinstance(other, typing.Iterable):
other = typing.cast(typing.Iterable[typing.Tuple[str, str]], other)
for key, value in other:
self.add(key, value)
elif hasattr(other, "keys") and hasattr(other, "__getitem__"):
# THIS IS NOT A TYPESAFE BRANCH
# In this branch, the object has a `keys` attr but is not a Mapping or any of
# the other types indicated in the method signature. We do some stuff with
# it as though it partially implements the Mapping interface, but we're not
# doing that stuff safely AT ALL.
for key in other.keys():
self.add(key, other[key])
for key, value in kwargs.items():
self.add(key, value)
def getlist(self, key, default=__marker):
@typing.overload
def getlist(self, key: str) -> list[str]:
...
@typing.overload
def getlist(self, key: str, default: _DT) -> list[str] | _DT:
...
def getlist(
self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed
) -> list[str] | _DT:
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
vals = self._container[key.lower()]
except KeyError:
if default is self.__marker:
if default is _Sentinel.not_passed:
# _DT is unbound; empty list is instance of List[str]
return []
# _DT is bound; default is instance of _DT
return default
else:
# _DT may or may not be bound; vals[1:] is instance of List[str], which
# meets our external interface requirement of `Union[List[str], _DT]`.
return vals[1:]
# Backwards compatibility for httplib
@ -276,62 +399,65 @@ class HTTPHeaderDict(MutableMapping):
# Backwards compatibility for http.cookiejar
get_all = getlist
def __repr__(self):
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
def __repr__(self) -> str:
return f"{type(self).__name__}({dict(self.itermerged())})"
def _copy_from(self, other):
def _copy_from(self, other: HTTPHeaderDict) -> None:
for key in other:
val = other.getlist(key)
if isinstance(val, list):
# Don't need to convert tuples
val = list(val)
self._container[key.lower()] = [key] + val
self._container[key.lower()] = [key, *val]
def copy(self):
def copy(self) -> HTTPHeaderDict:
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self):
def iteritems(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self):
def itermerged(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ", ".join(val[1:])
def items(self):
return list(self.iteritems())
def items(self) -> HTTPHeaderDictItemView: # type: ignore[override]
return HTTPHeaderDictItemView(self)
@classmethod
def from_httplib(cls, message): # Python 2
"""Read headers from a Python 2 httplib message object."""
# python2.7 does not expose a proper API for exporting multiheaders
# efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
obs_fold_continued_leaders = (" ", "\t")
headers = []
def _has_value_for_header(self, header_name: str, potential_value: str) -> bool:
if header_name in self:
return potential_value in self._container[header_name.lower()][1:]
return False
for line in message.headers:
if line.startswith(obs_fold_continued_leaders):
if not headers:
# We received a header line that starts with OWS as described
# in RFC-7230 S3.2.4. This indicates a multiline header, but
# there exists no previous header to which we can attach it.
raise InvalidHeader(
"Header continuation with no previous header: %s" % line
)
else:
key, value = headers[-1]
headers[-1] = (key, value + " " + line.strip())
continue
def __ior__(self, other: object) -> HTTPHeaderDict:
# Supports extending a header dict in-place using operator |=
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
self.extend(maybe_constructable)
return self
key, value = line.split(":", 1)
headers.append((key, value.strip()))
def __or__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator |
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = self.copy()
result.extend(maybe_constructable)
return result
return cls(headers)
def __ror__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator | when other is on left side
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = type(self)(maybe_constructable)
result.extend(self)
return result

View file

@ -1,12 +1,23 @@
from __future__ import absolute_import
from __future__ import annotations
from .filepost import encode_multipart_formdata
from .packages.six.moves.urllib.parse import urlencode
import json as _json
import typing
from urllib.parse import urlencode
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .response import BaseHTTPResponse
__all__ = ["RequestMethods"]
_TYPE_ENCODE_URL_FIELDS = typing.Union[
typing.Sequence[typing.Tuple[str, typing.Union[str, bytes]]],
typing.Mapping[str, typing.Union[str, bytes]],
]
class RequestMethods(object):
class RequestMethods:
"""
Convenience mixin for classes who implement a :meth:`urlopen` method, such
as :class:`urllib3.HTTPConnectionPool` and
@ -37,25 +48,34 @@ class RequestMethods(object):
_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
def __init__(self, headers=None):
def __init__(self, headers: typing.Mapping[str, str] | None = None) -> None:
self.headers = headers or {}
def urlopen(
self,
method,
url,
body=None,
headers=None,
encode_multipart=True,
multipart_boundary=None,
**kw
): # Abstract
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**kw: typing.Any,
) -> BaseHTTPResponse: # Abstract
raise NotImplementedError(
"Classes extending RequestMethods must implement "
"their own ``urlopen`` method."
)
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
json: typing.Any | None = None,
**urlopen_kw: typing.Any,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the appropriate encoding of
``fields`` based on the ``method`` used.
@ -68,18 +88,45 @@ class RequestMethods(object):
"""
method = method.upper()
urlopen_kw["request_url"] = url
if json is not None and body is not None:
raise TypeError(
"request got values for both 'body' and 'json' parameters which are mutually exclusive"
)
if json is not None:
if headers is None:
headers = self.headers.copy() # type: ignore
if not ("content-type" in map(str.lower, headers.keys())):
headers["Content-Type"] = "application/json" # type: ignore
body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode(
"utf-8"
)
if body is not None:
urlopen_kw["body"] = body
if method in self._encode_url_methods:
return self.request_encode_url(
method, url, fields=fields, headers=headers, **urlopen_kw
method,
url,
fields=fields, # type: ignore[arg-type]
headers=headers,
**urlopen_kw,
)
else:
return self.request_encode_body(
method, url, fields=fields, headers=headers, **urlopen_kw
)
def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw):
def request_encode_url(
self,
method: str,
url: str,
fields: _TYPE_ENCODE_URL_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
@ -87,7 +134,7 @@ class RequestMethods(object):
if headers is None:
headers = self.headers
extra_kw = {"headers": headers}
extra_kw: dict[str, typing.Any] = {"headers": headers}
extra_kw.update(urlopen_kw)
if fields:
@ -97,14 +144,14 @@ class RequestMethods(object):
def request_encode_body(
self,
method,
url,
fields=None,
headers=None,
encode_multipart=True,
multipart_boundary=None,
**urlopen_kw
):
method: str,
url: str,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the body. This is useful for request methods like POST, PUT, PATCH, etc.
@ -143,7 +190,8 @@ class RequestMethods(object):
if headers is None:
headers = self.headers
extra_kw = {"headers": {}}
extra_kw: dict[str, typing.Any] = {"headers": HTTPHeaderDict(headers)}
body: bytes | str
if fields:
if "body" in urlopen_kw:
@ -157,14 +205,13 @@ class RequestMethods(object):
)
else:
body, content_type = (
urlencode(fields),
urlencode(fields), # type: ignore[arg-type]
"application/x-www-form-urlencoded",
)
extra_kw["body"] = body
extra_kw["headers"] = {"Content-Type": content_type}
extra_kw["headers"].setdefault("Content-Type", content_type)
extra_kw["headers"].update(headers)
extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw)

View file

@ -1,2 +1,4 @@
# This file is protected via CODEOWNERS
__version__ = "1.26.15"
from __future__ import annotations
__version__ = "2.0.4"

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,36 +0,0 @@
"""
This module provides means to detect the App Engine environment.
"""
import os
def is_appengine():
return is_local_appengine() or is_prod_appengine()
def is_appengine_sandbox():
"""Reports if the app is running in the first generation sandbox.
The second generation runtimes are technically still in a sandbox, but it
is much less restrictive, so generally you shouldn't need to check for it.
see https://cloud.google.com/appengine/docs/standard/runtimes
"""
return is_appengine() and os.environ["APPENGINE_RUNTIME"] == "python27"
def is_local_appengine():
return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
"SERVER_SOFTWARE", ""
).startswith("Development/")
def is_prod_appengine():
return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
"SERVER_SOFTWARE", ""
).startswith("Google App Engine/")
def is_prod_appengine_mvms():
"""Deprecated."""
return False

View file

@ -1,3 +1,5 @@
# type: ignore
"""
This module uses ctypes to bind a whole bunch of functions and constants from
SecureTransport. The goal here is to provide the low-level API to
@ -29,7 +31,8 @@ license and by oscrypto's:
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
"""
from __future__ import absolute_import
from __future__ import annotations
import platform
from ctypes import (
@ -48,8 +51,6 @@ from ctypes import (
)
from ctypes.util import find_library
from ...packages.six import raise_from
if platform.system() != "Darwin":
raise ImportError("Only macOS is supported")
@ -57,16 +58,16 @@ version = platform.mac_ver()[0]
version_info = tuple(map(int, version.split(".")))
if version_info < (10, 8):
raise OSError(
"Only OS X 10.8 and newer are supported, not %s.%s"
% (version_info[0], version_info[1])
f"Only OS X 10.8 and newer are supported, not {version_info[0]}.{version_info[1]}"
)
def load_cdll(name, macos10_16_path):
def load_cdll(name: str, macos10_16_path: str) -> CDLL:
"""Loads a CDLL by name, falling back to known path on 10.16+"""
try:
# Big Sur is technically 11 but we use 10.16 due to the Big Sur
# beta being labeled as 10.16.
path: str | None
if version_info >= (10, 16):
path = macos10_16_path
else:
@ -75,7 +76,7 @@ def load_cdll(name, macos10_16_path):
raise OSError # Caught and reraised as 'ImportError'
return CDLL(path, use_errno=True)
except OSError:
raise_from(ImportError("The library %s failed to load" % name), None)
raise ImportError(f"The library {name} failed to load") from None
Security = load_cdll(
@ -416,104 +417,14 @@ try:
CoreFoundation.CFStringRef = CFStringRef
CoreFoundation.CFDictionaryRef = CFDictionaryRef
except (AttributeError):
raise ImportError("Error initializing ctypes")
except AttributeError:
raise ImportError("Error initializing ctypes") from None
class CFConst(object):
class CFConst:
"""
A class object that acts as essentially a namespace for CoreFoundation
constants.
"""
kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
class SecurityConst(object):
"""
A class object that acts as essentially a namespace for Security constants.
"""
kSSLSessionOptionBreakOnServerAuth = 0
kSSLProtocol2 = 1
kSSLProtocol3 = 2
kTLSProtocol1 = 4
kTLSProtocol11 = 7
kTLSProtocol12 = 8
# SecureTransport does not support TLS 1.3 even if there's a constant for it
kTLSProtocol13 = 10
kTLSProtocolMaxSupported = 999
kSSLClientSide = 1
kSSLStreamType = 0
kSecFormatPEMSequence = 10
kSecTrustResultInvalid = 0
kSecTrustResultProceed = 1
# This gap is present on purpose: this was kSecTrustResultConfirm, which
# is deprecated.
kSecTrustResultDeny = 3
kSecTrustResultUnspecified = 4
kSecTrustResultRecoverableTrustFailure = 5
kSecTrustResultFatalTrustFailure = 6
kSecTrustResultOtherError = 7
errSSLProtocol = -9800
errSSLWouldBlock = -9803
errSSLClosedGraceful = -9805
errSSLClosedNoNotify = -9816
errSSLClosedAbort = -9806
errSSLXCertChainInvalid = -9807
errSSLCrypto = -9809
errSSLInternal = -9810
errSSLCertExpired = -9814
errSSLCertNotYetValid = -9815
errSSLUnknownRootCert = -9812
errSSLNoRootCert = -9813
errSSLHostNameMismatch = -9843
errSSLPeerHandshakeFail = -9824
errSSLPeerUserCancelled = -9839
errSSLWeakPeerEphemeralDHKey = -9850
errSSLServerAuthCompleted = -9841
errSSLRecordOverflow = -9847
errSecVerifyFailed = -67808
errSecNoTrustSettings = -25263
errSecItemNotFound = -25300
errSecInvalidTrustSettings = -25262
# Cipher suites. We only pick the ones our default cipher string allows.
# Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9
TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8
TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
TLS_AES_128_GCM_SHA256 = 0x1301
TLS_AES_256_GCM_SHA384 = 0x1302
TLS_AES_128_CCM_8_SHA256 = 0x1305
TLS_AES_128_CCM_SHA256 = 0x1304

View file

@ -7,6 +7,8 @@ CoreFoundation messing about and memory management. The concerns in this module
are almost entirely about trying to avoid memory leaks and providing
appropriate and useful assistance to the higher-level code.
"""
from __future__ import annotations
import base64
import ctypes
import itertools
@ -15,8 +17,20 @@ import re
import ssl
import struct
import tempfile
import typing
from .bindings import CFConst, CoreFoundation, Security
from .bindings import ( # type: ignore[attr-defined]
CFArray,
CFConst,
CFData,
CFDictionary,
CFMutableArray,
CFString,
CFTypeRef,
CoreFoundation,
SecKeychainRef,
Security,
)
# This regular expression is used to grab PEM data out of a PEM bundle.
_PEM_CERTS_RE = re.compile(
@ -24,7 +38,7 @@ _PEM_CERTS_RE = re.compile(
)
def _cf_data_from_bytes(bytestring):
def _cf_data_from_bytes(bytestring: bytes) -> CFData:
"""
Given a bytestring, create a CFData object from it. This CFData object must
be CFReleased by the caller.
@ -34,7 +48,9 @@ def _cf_data_from_bytes(bytestring):
)
def _cf_dictionary_from_tuples(tuples):
def _cf_dictionary_from_tuples(
tuples: list[tuple[typing.Any, typing.Any]]
) -> CFDictionary:
"""
Given a list of Python tuples, create an associated CFDictionary.
"""
@ -56,7 +72,7 @@ def _cf_dictionary_from_tuples(tuples):
)
def _cfstr(py_bstr):
def _cfstr(py_bstr: bytes) -> CFString:
"""
Given a Python binary data, create a CFString.
The string must be CFReleased by the caller.
@ -70,7 +86,7 @@ def _cfstr(py_bstr):
return cf_str
def _create_cfstring_array(lst):
def _create_cfstring_array(lst: list[bytes]) -> CFMutableArray:
"""
Given a list of Python binary data, create an associated CFMutableArray.
The array must be CFReleased by the caller.
@ -97,11 +113,11 @@ def _create_cfstring_array(lst):
except BaseException as e:
if cf_arr:
CoreFoundation.CFRelease(cf_arr)
raise ssl.SSLError("Unable to allocate array: %s" % (e,))
raise ssl.SSLError(f"Unable to allocate array: {e}") from None
return cf_arr
def _cf_string_to_unicode(value):
def _cf_string_to_unicode(value: CFString) -> str | None:
"""
Creates a Unicode string from a CFString object. Used entirely for error
reporting.
@ -123,10 +139,12 @@ def _cf_string_to_unicode(value):
string = buffer.value
if string is not None:
string = string.decode("utf-8")
return string
return string # type: ignore[no-any-return]
def _assert_no_error(error, exception_class=None):
def _assert_no_error(
error: int, exception_class: type[BaseException] | None = None
) -> None:
"""
Checks the return code and throws an exception if there is an error to
report
@ -138,8 +156,8 @@ def _assert_no_error(error, exception_class=None):
output = _cf_string_to_unicode(cf_error_string)
CoreFoundation.CFRelease(cf_error_string)
if output is None or output == u"":
output = u"OSStatus %s" % error
if output is None or output == "":
output = f"OSStatus {error}"
if exception_class is None:
exception_class = ssl.SSLError
@ -147,7 +165,7 @@ def _assert_no_error(error, exception_class=None):
raise exception_class(output)
def _cert_array_from_pem(pem_bundle):
def _cert_array_from_pem(pem_bundle: bytes) -> CFArray:
"""
Given a bundle of certs in PEM format, turns them into a CFArray of certs
that can be used to validate a cert chain.
@ -193,23 +211,23 @@ def _cert_array_from_pem(pem_bundle):
return cert_array
def _is_cert(item):
def _is_cert(item: CFTypeRef) -> bool:
"""
Returns True if a given CFTypeRef is a certificate.
"""
expected = Security.SecCertificateGetTypeID()
return CoreFoundation.CFGetTypeID(item) == expected
return CoreFoundation.CFGetTypeID(item) == expected # type: ignore[no-any-return]
def _is_identity(item):
def _is_identity(item: CFTypeRef) -> bool:
"""
Returns True if a given CFTypeRef is an identity.
"""
expected = Security.SecIdentityGetTypeID()
return CoreFoundation.CFGetTypeID(item) == expected
return CoreFoundation.CFGetTypeID(item) == expected # type: ignore[no-any-return]
def _temporary_keychain():
def _temporary_keychain() -> tuple[SecKeychainRef, str]:
"""
This function creates a temporary Mac keychain that we can use to work with
credentials. This keychain uses a one-time password and a temporary file to
@ -244,7 +262,9 @@ def _temporary_keychain():
return keychain, tempdirectory
def _load_items_from_file(keychain, path):
def _load_items_from_file(
keychain: SecKeychainRef, path: str
) -> tuple[list[CFTypeRef], list[CFTypeRef]]:
"""
Given a single file, loads all the trust objects from it into arrays and
the keychain.
@ -299,7 +319,7 @@ def _load_items_from_file(keychain, path):
return (identities, certificates)
def _load_client_cert_chain(keychain, *paths):
def _load_client_cert_chain(keychain: SecKeychainRef, *paths: str | None) -> CFArray:
"""
Load certificates and maybe keys from a number of files. Has the end goal
of returning a CFArray containing one SecIdentityRef, and then zero or more
@ -335,10 +355,10 @@ def _load_client_cert_chain(keychain, *paths):
identities = []
# Filter out bad paths.
paths = (path for path in paths if path)
filtered_paths = (path for path in paths if path)
try:
for file_path in paths:
for file_path in filtered_paths:
new_identities, new_certs = _load_items_from_file(keychain, file_path)
identities.extend(new_identities)
certificates.extend(new_certs)
@ -383,7 +403,7 @@ TLS_PROTOCOL_VERSIONS = {
}
def _build_tls_unknown_ca_alert(version):
def _build_tls_unknown_ca_alert(version: str) -> bytes:
"""
Builds a TLS alert record for an unknown CA.
"""
@ -395,3 +415,60 @@ def _build_tls_unknown_ca_alert(version):
record_type_alert = 0x15
record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg
return record
class SecurityConst:
"""
A class object that acts as essentially a namespace for Security constants.
"""
kSSLSessionOptionBreakOnServerAuth = 0
kSSLProtocol2 = 1
kSSLProtocol3 = 2
kTLSProtocol1 = 4
kTLSProtocol11 = 7
kTLSProtocol12 = 8
# SecureTransport does not support TLS 1.3 even if there's a constant for it
kTLSProtocol13 = 10
kTLSProtocolMaxSupported = 999
kSSLClientSide = 1
kSSLStreamType = 0
kSecFormatPEMSequence = 10
kSecTrustResultInvalid = 0
kSecTrustResultProceed = 1
# This gap is present on purpose: this was kSecTrustResultConfirm, which
# is deprecated.
kSecTrustResultDeny = 3
kSecTrustResultUnspecified = 4
kSecTrustResultRecoverableTrustFailure = 5
kSecTrustResultFatalTrustFailure = 6
kSecTrustResultOtherError = 7
errSSLProtocol = -9800
errSSLWouldBlock = -9803
errSSLClosedGraceful = -9805
errSSLClosedNoNotify = -9816
errSSLClosedAbort = -9806
errSSLXCertChainInvalid = -9807
errSSLCrypto = -9809
errSSLInternal = -9810
errSSLCertExpired = -9814
errSSLCertNotYetValid = -9815
errSSLUnknownRootCert = -9812
errSSLNoRootCert = -9813
errSSLHostNameMismatch = -9843
errSSLPeerHandshakeFail = -9824
errSSLPeerUserCancelled = -9839
errSSLWeakPeerEphemeralDHKey = -9850
errSSLServerAuthCompleted = -9841
errSSLRecordOverflow = -9847
errSecVerifyFailed = -67808
errSecNoTrustSettings = -25263
errSecItemNotFound = -25300
errSecInvalidTrustSettings = -25262

View file

@ -1,314 +0,0 @@
"""
This module provides a pool manager that uses Google App Engine's
`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
Example usage::
from urllib3 import PoolManager
from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
if is_appengine_sandbox():
# AppEngineManager uses AppEngine's URLFetch API behind the scenes
http = AppEngineManager()
else:
# PoolManager uses a socket-level API behind the scenes
http = PoolManager()
r = http.request('GET', 'https://google.com/')
There are `limitations <https://cloud.google.com/appengine/docs/python/\
urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
the best choice for your application. There are three options for using
urllib3 on Google App Engine:
1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
cost-effective in many circumstances as long as your usage is within the
limitations.
2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
Sockets also have `limitations and restrictions
<https://cloud.google.com/appengine/docs/python/sockets/\
#limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
To use sockets, be sure to specify the following in your ``app.yaml``::
env_variables:
GAE_USE_SOCKETS_HTTPLIB : 'true'
3. If you are using `App Engine Flexible
<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
:class:`PoolManager` without any configuration or special environment variables.
"""
from __future__ import absolute_import
import io
import logging
import warnings
from ..exceptions import (
HTTPError,
HTTPWarning,
MaxRetryError,
ProtocolError,
SSLError,
TimeoutError,
)
from ..packages.six.moves.urllib.parse import urljoin
from ..request import RequestMethods
from ..response import HTTPResponse
from ..util.retry import Retry
from ..util.timeout import Timeout
from . import _appengine_environ
try:
from google.appengine.api import urlfetch
except ImportError:
urlfetch = None
log = logging.getLogger(__name__)
class AppEnginePlatformWarning(HTTPWarning):
pass
class AppEnginePlatformError(HTTPError):
pass
class AppEngineManager(RequestMethods):
"""
Connection manager for Google App Engine sandbox applications.
This manager uses the URLFetch service directly instead of using the
emulated httplib, and is subject to URLFetch limitations as described in
the App Engine documentation `here
<https://cloud.google.com/appengine/docs/python/urlfetch>`_.
Notably it will raise an :class:`AppEnginePlatformError` if:
* URLFetch is not available.
* If you attempt to use this on App Engine Flexible, as full socket
support is available.
* If a request size is more than 10 megabytes.
* If a response size is more than 32 megabytes.
* If you use an unsupported request method such as OPTIONS.
Beyond those cases, it will raise normal urllib3 errors.
"""
def __init__(
self,
headers=None,
retries=None,
validate_certificate=True,
urlfetch_retries=True,
):
if not urlfetch:
raise AppEnginePlatformError(
"URLFetch is not available in this environment."
)
warnings.warn(
"urllib3 is using URLFetch on Google App Engine sandbox instead "
"of sockets. To use sockets directly instead of URLFetch see "
"https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
AppEnginePlatformWarning,
)
RequestMethods.__init__(self, headers)
self.validate_certificate = validate_certificate
self.urlfetch_retries = urlfetch_retries
self.retries = retries or Retry.DEFAULT
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
# Return False to re-raise any potential exceptions
return False
def urlopen(
self,
method,
url,
body=None,
headers=None,
retries=None,
redirect=True,
timeout=Timeout.DEFAULT_TIMEOUT,
**response_kw
):
retries = self._get_retries(retries, redirect)
try:
follow_redirects = redirect and retries.redirect != 0 and retries.total
response = urlfetch.fetch(
url,
payload=body,
method=method,
headers=headers or {},
allow_truncated=False,
follow_redirects=self.urlfetch_retries and follow_redirects,
deadline=self._get_absolute_timeout(timeout),
validate_certificate=self.validate_certificate,
)
except urlfetch.DeadlineExceededError as e:
raise TimeoutError(self, e)
except urlfetch.InvalidURLError as e:
if "too large" in str(e):
raise AppEnginePlatformError(
"URLFetch request too large, URLFetch only "
"supports requests up to 10mb in size.",
e,
)
raise ProtocolError(e)
except urlfetch.DownloadError as e:
if "Too many redirects" in str(e):
raise MaxRetryError(self, url, reason=e)
raise ProtocolError(e)
except urlfetch.ResponseTooLargeError as e:
raise AppEnginePlatformError(
"URLFetch response too large, URLFetch only supports"
"responses up to 32mb in size.",
e,
)
except urlfetch.SSLCertificateError as e:
raise SSLError(e)
except urlfetch.InvalidMethodError as e:
raise AppEnginePlatformError(
"URLFetch does not support method: %s" % method, e
)
http_response = self._urlfetch_response_to_http_response(
response, retries=retries, **response_kw
)
# Handle redirect?
redirect_location = redirect and http_response.get_redirect_location()
if redirect_location:
# Check for redirect response
if self.urlfetch_retries and retries.raise_on_redirect:
raise MaxRetryError(self, url, "too many redirects")
else:
if http_response.status == 303:
method = "GET"
try:
retries = retries.increment(
method, url, response=http_response, _pool=self
)
except MaxRetryError:
if retries.raise_on_redirect:
raise MaxRetryError(self, url, "too many redirects")
return http_response
retries.sleep_for_retry(http_response)
log.debug("Redirecting %s -> %s", url, redirect_location)
redirect_url = urljoin(url, redirect_location)
return self.urlopen(
method,
redirect_url,
body,
headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw
)
# Check if we should retry the HTTP response.
has_retry_after = bool(http_response.headers.get("Retry-After"))
if retries.is_retry(method, http_response.status, has_retry_after):
retries = retries.increment(method, url, response=http_response, _pool=self)
log.debug("Retry: %s", url)
retries.sleep(http_response)
return self.urlopen(
method,
url,
body=body,
headers=headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw
)
return http_response
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
if is_prod_appengine():
# Production GAE handles deflate encoding automatically, but does
# not remove the encoding header.
content_encoding = urlfetch_resp.headers.get("content-encoding")
if content_encoding == "deflate":
del urlfetch_resp.headers["content-encoding"]
transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
# We have a full response's content,
# so let's make sure we don't report ourselves as chunked data.
if transfer_encoding == "chunked":
encodings = transfer_encoding.split(",")
encodings.remove("chunked")
urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
original_response = HTTPResponse(
# In order for decoding to work, we must present the content as
# a file-like object.
body=io.BytesIO(urlfetch_resp.content),
msg=urlfetch_resp.header_msg,
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
**response_kw
)
return HTTPResponse(
body=io.BytesIO(urlfetch_resp.content),
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
original_response=original_response,
**response_kw
)
def _get_absolute_timeout(self, timeout):
if timeout is Timeout.DEFAULT_TIMEOUT:
return None # Defer to URLFetch's default.
if isinstance(timeout, Timeout):
if timeout._read is not None or timeout._connect is not None:
warnings.warn(
"URLFetch does not support granular timeout settings, "
"reverting to total or default URLFetch timeout.",
AppEnginePlatformWarning,
)
return timeout.total
return timeout
def _get_retries(self, retries, redirect):
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if retries.connect or retries.read or retries.redirect:
warnings.warn(
"URLFetch only supports total retries and does not "
"recognize connect, read, or redirect retry parameters.",
AppEnginePlatformWarning,
)
return retries
# Alias methods from _appengine_environ to maintain public API interface.
is_appengine = _appengine_environ.is_appengine
is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
is_local_appengine = _appengine_environ.is_local_appengine
is_prod_appengine = _appengine_environ.is_prod_appengine
is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms

View file

@ -1,130 +0,0 @@
"""
NTLM authenticating pool, contributed by erikcederstran
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
"""
from __future__ import absolute_import
import warnings
from logging import getLogger
from ntlm import ntlm
from .. import HTTPSConnectionPool
from ..packages.six.moves.http_client import HTTPSConnection
warnings.warn(
"The 'urllib3.contrib.ntlmpool' module is deprecated and will be removed "
"in urllib3 v2.0 release, urllib3 is not able to support it properly due "
"to reasons listed in issue: https://github.com/urllib3/urllib3/issues/2282. "
"If you are a user of this module please comment in the mentioned issue.",
DeprecationWarning,
)
log = getLogger(__name__)
class NTLMConnectionPool(HTTPSConnectionPool):
"""
Implements an NTLM authentication version of an urllib3 connection pool
"""
scheme = "https"
def __init__(self, user, pw, authurl, *args, **kwargs):
"""
authurl is a random URL on the server that is protected by NTLM.
user is the Windows user, probably in the DOMAIN\\username format.
pw is the password for the user.
"""
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
self.authurl = authurl
self.rawuser = user
user_parts = user.split("\\", 1)
self.domain = user_parts[0].upper()
self.user = user_parts[1]
self.pw = pw
def _new_conn(self):
# Performs the NTLM handshake that secures the connection. The socket
# must be kept open while requests are performed.
self.num_connections += 1
log.debug(
"Starting NTLM HTTPS connection no. %d: https://%s%s",
self.num_connections,
self.host,
self.authurl,
)
headers = {"Connection": "Keep-Alive"}
req_header = "Authorization"
resp_header = "www-authenticate"
conn = HTTPSConnection(host=self.host, port=self.port)
# Send negotiation message
headers[req_header] = "NTLM %s" % ntlm.create_NTLM_NEGOTIATE_MESSAGE(
self.rawuser
)
log.debug("Request headers: %s", headers)
conn.request("GET", self.authurl, None, headers)
res = conn.getresponse()
reshdr = dict(res.headers)
log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", reshdr)
log.debug("Response data: %s [...]", res.read(100))
# Remove the reference to the socket, so that it can not be closed by
# the response object (we want to keep the socket open)
res.fp = None
# Server should respond with a challenge message
auth_header_values = reshdr[resp_header].split(", ")
auth_header_value = None
for s in auth_header_values:
if s[:5] == "NTLM ":
auth_header_value = s[5:]
if auth_header_value is None:
raise Exception(
"Unexpected %s response header: %s" % (resp_header, reshdr[resp_header])
)
# Send authentication message
ServerChallenge, NegotiateFlags = ntlm.parse_NTLM_CHALLENGE_MESSAGE(
auth_header_value
)
auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(
ServerChallenge, self.user, self.domain, self.pw, NegotiateFlags
)
headers[req_header] = "NTLM %s" % auth_msg
log.debug("Request headers: %s", headers)
conn.request("GET", self.authurl, None, headers)
res = conn.getresponse()
log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", dict(res.headers))
log.debug("Response data: %s [...]", res.read()[:100])
if res.status != 200:
if res.status == 401:
raise Exception("Server rejected request: wrong username or password")
raise Exception("Wrong server response: %s %s" % (res.status, res.reason))
res.fp = None
log.debug("Connection established")
return conn
def urlopen(
self,
method,
url,
body=None,
headers=None,
retries=3,
redirect=True,
assert_same_host=True,
):
if headers is None:
headers = {}
headers["Connection"] = "Keep-Alive"
return super(NTLMConnectionPool, self).urlopen(
method, url, body, headers, retries, redirect, assert_same_host
)

View file

@ -1,8 +1,8 @@
"""
TLS with SNI_-support for Python 2. Follow these instructions if you would
like to verify TLS certificates in Python 2. Note, the default libraries do
*not* do certificate checking; you need to do additional work to validate
certificates yourself.
Module for using pyOpenSSL as a TLS backend. This module was relevant before
the standard library ``ssl`` module supported SNI, but now that we've dropped
support for Python 2.7 all relevant Python versions support SNI so
**this module is no longer recommended**.
This needs the following packages installed:
@ -10,7 +10,7 @@ This needs the following packages installed:
* `cryptography`_ (minimum 1.3.4, from pyopenssl)
* `idna`_ (minimum 2.0, from cryptography)
However, pyopenssl depends on cryptography, which depends on idna, so while we
However, pyOpenSSL depends on cryptography, which depends on idna, so while we
use all three directly here we end up having relatively few packages required.
You can install them with the following command:
@ -33,75 +33,55 @@ like this:
except ImportError:
pass
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
Activating this module also has the positive side effect of disabling SSL/TLS
compression in Python 2 (see `CRIME attack`_).
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
.. _pyopenssl: https://www.pyopenssl.org
.. _cryptography: https://cryptography.io
.. _idna: https://github.com/kjd/idna
"""
from __future__ import absolute_import
import OpenSSL.crypto
import OpenSSL.SSL
from __future__ import annotations
import OpenSSL.SSL # type: ignore[import]
from cryptography import x509
from cryptography.hazmat.backends.openssl import backend as openssl_backend
try:
from cryptography.x509 import UnsupportedExtension
from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined]
except ImportError:
# UnsupportedExtension is gone in cryptography >= 2.1.0
class UnsupportedExtension(Exception):
class UnsupportedExtension(Exception): # type: ignore[no-redef]
pass
from io import BytesIO
from socket import error as SocketError
from socket import timeout
try: # Platform-specific: Python 2
from socket import _fileobject
except ImportError: # Platform-specific: Python 3
_fileobject = None
from ..packages.backports.makefile import backport_makefile
import logging
import ssl
import sys
import typing
import warnings
from io import BytesIO
from socket import socket as socket_cls
from socket import timeout
from .. import util
from ..packages import six
from ..util.ssl_ import PROTOCOL_TLS_CLIENT
warnings.warn(
"'urllib3.contrib.pyopenssl' module is deprecated and will be removed "
"in a future release of urllib3 2.x. Read more in this issue: "
"in urllib3 v2.1.0. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2680",
category=DeprecationWarning,
stacklevel=2,
)
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]
if typing.TYPE_CHECKING:
from OpenSSL.crypto import X509 # type: ignore[import]
# SNI always works.
HAS_SNI = True
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]
# Map from urllib3 to PyOpenSSL compatible parameter-values.
_openssl_versions = {
util.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD,
PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD,
util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
}
if hasattr(ssl, "PROTOCOL_SSLv3") and hasattr(OpenSSL.SSL, "SSLv3_METHOD"):
_openssl_versions[ssl.PROTOCOL_SSLv3] = OpenSSL.SSL.SSLv3_METHOD
if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"):
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
@ -115,43 +95,77 @@ _stdlib_to_openssl_verify = {
ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
_openssl_to_stdlib_verify = dict((v, k) for k, v in _stdlib_to_openssl_verify.items())
_openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()}
# The SSLvX values are the most likely to be missing in the future
# but we check them all just to be sure.
_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr(
OpenSSL.SSL, "OP_NO_SSLv3", 0
)
_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0)
_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0)
_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0)
_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0)
_openssl_to_ssl_minimum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1,
ssl.TLSVersion.TLSv1_3: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
ssl.TLSVersion.MAXIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
}
_openssl_to_ssl_maximum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3
| _OP_NO_TLSv1
| _OP_NO_TLSv1_1
| _OP_NO_TLSv1_2
| _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
}
# OpenSSL will only write 16K at a time
SSL_WRITE_BLOCKSIZE = 16384
orig_util_HAS_SNI = util.HAS_SNI
orig_util_SSLContext = util.ssl_.SSLContext
log = logging.getLogger(__name__)
def inject_into_urllib3():
def inject_into_urllib3() -> None:
"Monkey-patch urllib3 with PyOpenSSL-backed SSL-support."
_validate_dependencies_met()
util.SSLContext = PyOpenSSLContext
util.ssl_.SSLContext = PyOpenSSLContext
util.HAS_SNI = HAS_SNI
util.ssl_.HAS_SNI = HAS_SNI
util.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.IS_PYOPENSSL = True
util.ssl_.IS_PYOPENSSL = True
def extract_from_urllib3():
def extract_from_urllib3() -> None:
"Undo monkey-patching by :func:`inject_into_urllib3`."
util.SSLContext = orig_util_SSLContext
util.ssl_.SSLContext = orig_util_SSLContext
util.HAS_SNI = orig_util_HAS_SNI
util.ssl_.HAS_SNI = orig_util_HAS_SNI
util.IS_PYOPENSSL = False
util.ssl_.IS_PYOPENSSL = False
def _validate_dependencies_met():
def _validate_dependencies_met() -> None:
"""
Verifies that PyOpenSSL's package-level dependencies have been met.
Throws `ImportError` if they are not met.
@ -177,7 +191,7 @@ def _validate_dependencies_met():
)
def _dnsname_to_stdlib(name):
def _dnsname_to_stdlib(name: str) -> str | None:
"""
Converts a dNSName SubjectAlternativeName field to the form used by the
standard library on the given Python version.
@ -191,7 +205,7 @@ def _dnsname_to_stdlib(name):
the name given should be skipped.
"""
def idna_encode(name):
def idna_encode(name: str) -> bytes | None:
"""
Borrowed wholesale from the Python Cryptography Project. It turns out
that we can't just safely call `idna.encode`: it can explode for
@ -200,7 +214,7 @@ def _dnsname_to_stdlib(name):
import idna
try:
for prefix in [u"*.", u"."]:
for prefix in ["*.", "."]:
if name.startswith(prefix):
name = name[len(prefix) :]
return prefix.encode("ascii") + idna.encode(name)
@ -212,24 +226,17 @@ def _dnsname_to_stdlib(name):
if ":" in name:
return name
name = idna_encode(name)
if name is None:
encoded_name = idna_encode(name)
if encoded_name is None:
return None
elif sys.version_info >= (3, 0):
name = name.decode("utf-8")
return name
return encoded_name.decode("utf-8")
def get_subj_alt_name(peer_cert):
def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]:
"""
Given an PyOpenSSL certificate, provides all the subject alternative names.
"""
# Pass the cert to cryptography, which has much better APIs for this.
if hasattr(peer_cert, "to_cryptography"):
cert = peer_cert.to_cryptography()
else:
der = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, peer_cert)
cert = x509.load_der_x509_certificate(der, openssl_backend)
# We want to find the SAN extension. Ask Cryptography to locate it (it's
# faster than looping in Python)
@ -273,93 +280,94 @@ def get_subj_alt_name(peer_cert):
return names
class WrappedSocket(object):
"""API-compatibility wrapper for Python OpenSSL's Connection-class.
class WrappedSocket:
"""API-compatibility wrapper for Python OpenSSL's Connection-class."""
Note: _makefile_refs, _drop() and _reuse() are needed for the garbage
collector of pypy.
"""
def __init__(self, connection, socket, suppress_ragged_eofs=True):
def __init__(
self,
connection: OpenSSL.SSL.Connection,
socket: socket_cls,
suppress_ragged_eofs: bool = True,
) -> None:
self.connection = connection
self.socket = socket
self.suppress_ragged_eofs = suppress_ragged_eofs
self._makefile_refs = 0
self._io_refs = 0
self._closed = False
def fileno(self):
def fileno(self) -> int:
return self.socket.fileno()
# Copy-pasted from Python 3.5 source code
def _decref_socketios(self):
if self._makefile_refs > 0:
self._makefile_refs -= 1
def _decref_socketios(self) -> None:
if self._io_refs > 0:
self._io_refs -= 1
if self._closed:
self.close()
def recv(self, *args, **kwargs):
def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes:
try:
data = self.connection.recv(*args, **kwargs)
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return b""
else:
raise SocketError(str(e))
raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return b""
else:
raise
except OpenSSL.SSL.WantReadError:
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out")
raise timeout("The read operation timed out") from e
else:
return self.recv(*args, **kwargs)
# TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e:
raise ssl.SSLError("read error: %r" % e)
raise ssl.SSLError(f"read error: {e!r}") from e
else:
return data
return data # type: ignore[no-any-return]
def recv_into(self, *args, **kwargs):
def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int:
try:
return self.connection.recv_into(*args, **kwargs)
return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return]
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return 0
else:
raise SocketError(str(e))
raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return 0
else:
raise
except OpenSSL.SSL.WantReadError:
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out")
raise timeout("The read operation timed out") from e
else:
return self.recv_into(*args, **kwargs)
# TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e:
raise ssl.SSLError("read error: %r" % e)
raise ssl.SSLError(f"read error: {e!r}") from e
def settimeout(self, timeout):
def settimeout(self, timeout: float) -> None:
return self.socket.settimeout(timeout)
def _send_until_done(self, data):
def _send_until_done(self, data: bytes) -> int:
while True:
try:
return self.connection.send(data)
except OpenSSL.SSL.WantWriteError:
return self.connection.send(data) # type: ignore[no-any-return]
except OpenSSL.SSL.WantWriteError as e:
if not util.wait_for_write(self.socket, self.socket.gettimeout()):
raise timeout()
raise timeout() from e
continue
except OpenSSL.SSL.SysCallError as e:
raise SocketError(str(e))
raise OSError(e.args[0], str(e)) from e
def sendall(self, data):
def sendall(self, data: bytes) -> None:
total_sent = 0
while total_sent < len(data):
sent = self._send_until_done(
@ -367,135 +375,135 @@ class WrappedSocket(object):
)
total_sent += sent
def shutdown(self):
def shutdown(self) -> None:
# FIXME rethrow compatible exceptions should we ever use this
self.connection.shutdown()
def close(self):
if self._makefile_refs < 1:
try:
def close(self) -> None:
self._closed = True
return self.connection.close()
if self._io_refs <= 0:
self._real_close()
def _real_close(self) -> None:
try:
return self.connection.close() # type: ignore[no-any-return]
except OpenSSL.SSL.Error:
return
else:
self._makefile_refs -= 1
def getpeercert(self, binary_form=False):
def getpeercert(
self, binary_form: bool = False
) -> dict[str, list[typing.Any]] | None:
x509 = self.connection.get_peer_certificate()
if not x509:
return x509
return x509 # type: ignore[no-any-return]
if binary_form:
return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509)
return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return]
return {
"subject": ((("commonName", x509.get_subject().CN),),),
"subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item]
"subjectAltName": get_subj_alt_name(x509),
}
def version(self):
return self.connection.get_protocol_version_name()
def _reuse(self):
self._makefile_refs += 1
def _drop(self):
if self._makefile_refs < 1:
self.close()
else:
self._makefile_refs -= 1
def version(self) -> str:
return self.connection.get_protocol_version_name() # type: ignore[no-any-return]
if _fileobject: # Platform-specific: Python 2
def makefile(self, mode, bufsize=-1):
self._makefile_refs += 1
return _fileobject(self, mode, bufsize, close=True)
else: # Platform-specific: Python 3
makefile = backport_makefile
WrappedSocket.makefile = makefile
WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined]
class PyOpenSSLContext(object):
class PyOpenSSLContext:
"""
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
for translating the interface of the standard library ``SSLContext`` object
to calls into PyOpenSSL.
"""
def __init__(self, protocol):
def __init__(self, protocol: int) -> None:
self.protocol = _openssl_versions[protocol]
self._ctx = OpenSSL.SSL.Context(self.protocol)
self._options = 0
self.check_hostname = False
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
@property
def options(self):
def options(self) -> int:
return self._options
@options.setter
def options(self, value):
def options(self, value: int) -> None:
self._options = value
self._ctx.set_options(value)
self._set_ctx_options()
@property
def verify_mode(self):
def verify_mode(self) -> int:
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
@verify_mode.setter
def verify_mode(self, value):
def verify_mode(self, value: ssl.VerifyMode) -> None:
self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback)
def set_default_verify_paths(self):
def set_default_verify_paths(self) -> None:
self._ctx.set_default_verify_paths()
def set_ciphers(self, ciphers):
if isinstance(ciphers, six.text_type):
def set_ciphers(self, ciphers: bytes | str) -> None:
if isinstance(ciphers, str):
ciphers = ciphers.encode("utf-8")
self._ctx.set_cipher_list(ciphers)
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
def load_verify_locations(
self,
cafile: str | None = None,
capath: str | None = None,
cadata: bytes | None = None,
) -> None:
if cafile is not None:
cafile = cafile.encode("utf-8")
cafile = cafile.encode("utf-8") # type: ignore[assignment]
if capath is not None:
capath = capath.encode("utf-8")
capath = capath.encode("utf-8") # type: ignore[assignment]
try:
self._ctx.load_verify_locations(cafile, capath)
if cadata is not None:
self._ctx.load_verify_locations(BytesIO(cadata))
except OpenSSL.SSL.Error as e:
raise ssl.SSLError("unable to load trusted certificates: %r" % e)
raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e
def load_cert_chain(self, certfile, keyfile=None, password=None):
def load_cert_chain(
self,
certfile: str,
keyfile: str | None = None,
password: str | None = None,
) -> None:
try:
self._ctx.use_certificate_chain_file(certfile)
if password is not None:
if not isinstance(password, six.binary_type):
password = password.encode("utf-8")
if not isinstance(password, bytes):
password = password.encode("utf-8") # type: ignore[assignment]
self._ctx.set_passwd_cb(lambda *_: password)
self._ctx.use_privatekey_file(keyfile or certfile)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e
def set_alpn_protocols(self, protocols):
protocols = [six.ensure_binary(p) for p in protocols]
return self._ctx.set_alpn_protos(protocols)
def set_alpn_protocols(self, protocols: list[bytes | str]) -> None:
protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return]
def wrap_socket(
self,
sock,
server_side=False,
do_handshake_on_connect=True,
suppress_ragged_eofs=True,
server_hostname=None,
):
sock: socket_cls,
server_side: bool = False,
do_handshake_on_connect: bool = True,
suppress_ragged_eofs: bool = True,
server_hostname: bytes | str | None = None,
) -> WrappedSocket:
cnx = OpenSSL.SSL.Connection(self._ctx, sock)
if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3
# If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3
if server_hostname and not util.ssl_.is_ipaddress(server_hostname):
if isinstance(server_hostname, str):
server_hostname = server_hostname.encode("utf-8")
if server_hostname is not None:
cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state()
@ -503,16 +511,47 @@ class PyOpenSSLContext(object):
while True:
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError:
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(sock, sock.gettimeout()):
raise timeout("select timed out")
raise timeout("select timed out") from e
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError("bad handshake: %r" % e)
raise ssl.SSLError(f"bad handshake: {e!r}") from e
break
return WrappedSocket(cnx, sock)
def _set_ctx_options(self) -> None:
self._ctx.set_options(
self._options
| _openssl_to_ssl_minimum_version[self._minimum_version]
| _openssl_to_ssl_maximum_version[self._maximum_version]
)
def _verify_callback(cnx, x509, err_no, err_depth, return_code):
@property
def minimum_version(self) -> int:
return self._minimum_version
@minimum_version.setter
def minimum_version(self, minimum_version: int) -> None:
self._minimum_version = minimum_version
self._set_ctx_options()
@property
def maximum_version(self) -> int:
return self._maximum_version
@maximum_version.setter
def maximum_version(self, maximum_version: int) -> None:
self._maximum_version = maximum_version
self._set_ctx_options()
def _verify_callback(
cnx: OpenSSL.SSL.Connection,
x509: X509,
err_no: int,
err_depth: int,
return_code: int,
) -> bool:
return err_no == 0

View file

@ -51,7 +51,8 @@ license and by oscrypto's:
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
"""
from __future__ import absolute_import
from __future__ import annotations
import contextlib
import ctypes
@ -62,14 +63,18 @@ import socket
import ssl
import struct
import threading
import typing
import warnings
import weakref
import six
from socket import socket as socket_cls
from .. import util
from ..util.ssl_ import PROTOCOL_TLS_CLIENT
from ._securetransport.bindings import CoreFoundation, Security, SecurityConst
from ._securetransport.bindings import ( # type: ignore[attr-defined]
CoreFoundation,
Security,
)
from ._securetransport.low_level import (
SecurityConst,
_assert_no_error,
_build_tls_unknown_ca_alert,
_cert_array_from_pem,
@ -78,18 +83,19 @@ from ._securetransport.low_level import (
_temporary_keychain,
)
try: # Platform-specific: Python 2
from socket import _fileobject
except ImportError: # Platform-specific: Python 3
_fileobject = None
from ..packages.backports.makefile import backport_makefile
warnings.warn(
"'urllib3.contrib.securetransport' module is deprecated and will be removed "
"in urllib3 v2.1.0. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2681",
category=DeprecationWarning,
stacklevel=2,
)
if typing.TYPE_CHECKING:
from typing_extensions import Literal
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]
# SNI always works
HAS_SNI = True
orig_util_HAS_SNI = util.HAS_SNI
orig_util_SSLContext = util.ssl_.SSLContext
# This dictionary is used by the read callback to obtain a handle to the
@ -108,55 +114,24 @@ orig_util_SSLContext = util.ssl_.SSLContext
#
# This is good: if we had to lock in the callbacks we'd drastically slow down
# the performance of this code.
_connection_refs = weakref.WeakValueDictionary()
_connection_refs: weakref.WeakValueDictionary[
int, WrappedSocket
] = weakref.WeakValueDictionary()
_connection_ref_lock = threading.Lock()
# Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over
# for no better reason than we need *a* limit, and this one is right there.
SSL_WRITE_BLOCKSIZE = 16384
# This is our equivalent of util.ssl_.DEFAULT_CIPHERS, but expanded out to
# individual cipher suites. We need to do this because this is how
# SecureTransport wants them.
CIPHER_SUITES = [
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,
SecurityConst.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256,
SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA,
SecurityConst.TLS_AES_256_GCM_SHA384,
SecurityConst.TLS_AES_128_GCM_SHA256,
SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_AES_128_CCM_8_SHA256,
SecurityConst.TLS_AES_128_CCM_SHA256,
SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256,
SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA,
]
# Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of
# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version.
# TLSv1 to 1.2 are supported on macOS 10.8+
_protocol_to_min_max = {
util.PROTOCOL_TLS: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12),
PROTOCOL_TLS_CLIENT: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12),
util.ssl_.PROTOCOL_TLS: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), # type: ignore[attr-defined]
util.ssl_.PROTOCOL_TLS_CLIENT: ( # type: ignore[attr-defined]
SecurityConst.kTLSProtocol1,
SecurityConst.kTLSProtocol12,
),
}
if hasattr(ssl, "PROTOCOL_SSLv2"):
@ -186,31 +161,38 @@ if hasattr(ssl, "PROTOCOL_TLSv1_2"):
)
def inject_into_urllib3():
_tls_version_to_st: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: SecurityConst.kTLSProtocol1,
ssl.TLSVersion.TLSv1: SecurityConst.kTLSProtocol1,
ssl.TLSVersion.TLSv1_1: SecurityConst.kTLSProtocol11,
ssl.TLSVersion.TLSv1_2: SecurityConst.kTLSProtocol12,
ssl.TLSVersion.MAXIMUM_SUPPORTED: SecurityConst.kTLSProtocol12,
}
def inject_into_urllib3() -> None:
"""
Monkey-patch urllib3 with SecureTransport-backed SSL-support.
"""
util.SSLContext = SecureTransportContext
util.ssl_.SSLContext = SecureTransportContext
util.HAS_SNI = HAS_SNI
util.ssl_.HAS_SNI = HAS_SNI
util.SSLContext = SecureTransportContext # type: ignore[assignment]
util.ssl_.SSLContext = SecureTransportContext # type: ignore[assignment]
util.IS_SECURETRANSPORT = True
util.ssl_.IS_SECURETRANSPORT = True
def extract_from_urllib3():
def extract_from_urllib3() -> None:
"""
Undo monkey-patching by :func:`inject_into_urllib3`.
"""
util.SSLContext = orig_util_SSLContext
util.ssl_.SSLContext = orig_util_SSLContext
util.HAS_SNI = orig_util_HAS_SNI
util.ssl_.HAS_SNI = orig_util_HAS_SNI
util.IS_SECURETRANSPORT = False
util.ssl_.IS_SECURETRANSPORT = False
def _read_callback(connection_id, data_buffer, data_length_pointer):
def _read_callback(
connection_id: int, data_buffer: int, data_length_pointer: bytearray
) -> int:
"""
SecureTransport read callback. This is called by ST to request that data
be returned from the socket.
@ -232,7 +214,7 @@ def _read_callback(connection_id, data_buffer, data_length_pointer):
while read_count < requested_length:
if timeout is None or timeout >= 0:
if not util.wait_for_read(base_socket, timeout):
raise socket.error(errno.EAGAIN, "timed out")
raise OSError(errno.EAGAIN, "timed out")
remaining = requested_length - read_count
buffer = (ctypes.c_char * remaining).from_address(
@ -244,7 +226,7 @@ def _read_callback(connection_id, data_buffer, data_length_pointer):
if not read_count:
return SecurityConst.errSSLClosedGraceful
break
except (socket.error) as e:
except OSError as e:
error = e.errno
if error is not None and error != errno.EAGAIN:
@ -265,7 +247,9 @@ def _read_callback(connection_id, data_buffer, data_length_pointer):
return SecurityConst.errSSLInternal
def _write_callback(connection_id, data_buffer, data_length_pointer):
def _write_callback(
connection_id: int, data_buffer: int, data_length_pointer: bytearray
) -> int:
"""
SecureTransport write callback. This is called by ST to request that data
actually be sent on the network.
@ -288,14 +272,14 @@ def _write_callback(connection_id, data_buffer, data_length_pointer):
while sent < bytes_to_write:
if timeout is None or timeout >= 0:
if not util.wait_for_write(base_socket, timeout):
raise socket.error(errno.EAGAIN, "timed out")
raise OSError(errno.EAGAIN, "timed out")
chunk_sent = base_socket.send(data)
sent += chunk_sent
# This has some needless copying here, but I'm not sure there's
# much value in optimising this data path.
data = data[chunk_sent:]
except (socket.error) as e:
except OSError as e:
error = e.errno
if error is not None and error != errno.EAGAIN:
@ -323,22 +307,20 @@ _read_callback_pointer = Security.SSLReadFunc(_read_callback)
_write_callback_pointer = Security.SSLWriteFunc(_write_callback)
class WrappedSocket(object):
class WrappedSocket:
"""
API-compatibility wrapper for Python's OpenSSL wrapped socket object.
Note: _makefile_refs, _drop(), and _reuse() are needed for the garbage
collector of PyPy.
"""
def __init__(self, socket):
def __init__(self, socket: socket_cls) -> None:
self.socket = socket
self.context = None
self._makefile_refs = 0
self._io_refs = 0
self._closed = False
self._exception = None
self._real_closed = False
self._exception: Exception | None = None
self._keychain = None
self._keychain_dir = None
self._keychain_dir: str | None = None
self._client_cert_chain = None
# We save off the previously-configured timeout and then set it to
@ -350,7 +332,7 @@ class WrappedSocket(object):
self.socket.settimeout(0)
@contextlib.contextmanager
def _raise_on_error(self):
def _raise_on_error(self) -> typing.Generator[None, None, None]:
"""
A context manager that can be used to wrap calls that do I/O from
SecureTransport. If any of the I/O callbacks hit an exception, this
@ -367,23 +349,10 @@ class WrappedSocket(object):
yield
if self._exception is not None:
exception, self._exception = self._exception, None
self.close()
self._real_close()
raise exception
def _set_ciphers(self):
"""
Sets up the allowed ciphers. By default this matches the set in
util.ssl_.DEFAULT_CIPHERS, at least as supported by macOS. This is done
custom and doesn't allow changing at this time, mostly because parsing
OpenSSL cipher strings is going to be a freaking nightmare.
"""
ciphers = (Security.SSLCipherSuite * len(CIPHER_SUITES))(*CIPHER_SUITES)
result = Security.SSLSetEnabledCiphers(
self.context, ciphers, len(CIPHER_SUITES)
)
_assert_no_error(result)
def _set_alpn_protocols(self, protocols):
def _set_alpn_protocols(self, protocols: list[bytes] | None) -> None:
"""
Sets up the ALPN protocols on the context.
"""
@ -396,7 +365,7 @@ class WrappedSocket(object):
finally:
CoreFoundation.CFRelease(protocols_arr)
def _custom_validate(self, verify, trust_bundle):
def _custom_validate(self, verify: bool, trust_bundle: bytes | None) -> None:
"""
Called when we have set custom validation. We do this in two cases:
first, when cert validation is entirely disabled; and second, when
@ -404,7 +373,7 @@ class WrappedSocket(object):
Raises an SSLError if the connection is not trusted.
"""
# If we disabled cert validation, just say: cool.
if not verify:
if not verify or trust_bundle is None:
return
successes = (
@ -415,10 +384,12 @@ class WrappedSocket(object):
trust_result = self._evaluate_trust(trust_bundle)
if trust_result in successes:
return
reason = "error code: %d" % (trust_result,)
reason = f"error code: {int(trust_result)}"
exc = None
except Exception as e:
# Do not trust on error
reason = "exception: %r" % (e,)
reason = f"exception: {e!r}"
exc = e
# SecureTransport does not send an alert nor shuts down the connection.
rec = _build_tls_unknown_ca_alert(self.version())
@ -428,10 +399,10 @@ class WrappedSocket(object):
# l_linger = 0, linger for 0 seoncds
opts = struct.pack("ii", 1, 0)
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, opts)
self.close()
raise ssl.SSLError("certificate verify failed, %s" % reason)
self._real_close()
raise ssl.SSLError(f"certificate verify failed, {reason}") from exc
def _evaluate_trust(self, trust_bundle):
def _evaluate_trust(self, trust_bundle: bytes) -> int:
# We want data in memory, so load it up.
if os.path.isfile(trust_bundle):
with open(trust_bundle, "rb") as f:
@ -469,20 +440,20 @@ class WrappedSocket(object):
if cert_array is not None:
CoreFoundation.CFRelease(cert_array)
return trust_result.value
return trust_result.value # type: ignore[no-any-return]
def handshake(
self,
server_hostname,
verify,
trust_bundle,
min_version,
max_version,
client_cert,
client_key,
client_key_passphrase,
alpn_protocols,
):
server_hostname: bytes | str | None,
verify: bool,
trust_bundle: bytes | None,
min_version: int,
max_version: int,
client_cert: str | None,
client_key: str | None,
client_key_passphrase: typing.Any,
alpn_protocols: list[bytes] | None,
) -> None:
"""
Actually performs the TLS handshake. This is run automatically by
wrapped socket, and shouldn't be needed in user code.
@ -510,6 +481,8 @@ class WrappedSocket(object):
_assert_no_error(result)
# If we have a server hostname, we should set that too.
# RFC6066 Section 3 tells us not to use SNI when the host is an IP, but we have
# to do it anyway to match server_hostname against the server certificate
if server_hostname:
if not isinstance(server_hostname, bytes):
server_hostname = server_hostname.encode("utf-8")
@ -519,9 +492,6 @@ class WrappedSocket(object):
)
_assert_no_error(result)
# Setup the ciphers.
self._set_ciphers()
# Setup the ALPN protocols.
self._set_alpn_protocols(alpn_protocols)
@ -564,25 +534,27 @@ class WrappedSocket(object):
_assert_no_error(result)
break
def fileno(self):
def fileno(self) -> int:
return self.socket.fileno()
# Copy-pasted from Python 3.5 source code
def _decref_socketios(self):
if self._makefile_refs > 0:
self._makefile_refs -= 1
def _decref_socketios(self) -> None:
if self._io_refs > 0:
self._io_refs -= 1
if self._closed:
self.close()
def recv(self, bufsiz):
def recv(self, bufsiz: int) -> bytes:
buffer = ctypes.create_string_buffer(bufsiz)
bytes_read = self.recv_into(buffer, bufsiz)
data = buffer[:bytes_read]
return data
return typing.cast(bytes, data)
def recv_into(self, buffer, nbytes=None):
def recv_into(
self, buffer: ctypes.Array[ctypes.c_char], nbytes: int | None = None
) -> int:
# Read short on EOF.
if self._closed:
if self._real_closed:
return 0
if nbytes is None:
@ -615,7 +587,7 @@ class WrappedSocket(object):
# well. Note that we don't actually return here because in
# principle this could actually be fired along with return data.
# It's unlikely though.
self.close()
self._real_close()
else:
_assert_no_error(result)
@ -623,13 +595,13 @@ class WrappedSocket(object):
# was actually read.
return processed_bytes.value
def settimeout(self, timeout):
def settimeout(self, timeout: float) -> None:
self._timeout = timeout
def gettimeout(self):
def gettimeout(self) -> float | None:
return self._timeout
def send(self, data):
def send(self, data: bytes) -> int:
processed_bytes = ctypes.c_size_t(0)
with self._raise_on_error():
@ -646,20 +618,24 @@ class WrappedSocket(object):
# We sent, and probably succeeded. Tell them how much we sent.
return processed_bytes.value
def sendall(self, data):
def sendall(self, data: bytes) -> None:
total_sent = 0
while total_sent < len(data):
sent = self.send(data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE])
total_sent += sent
def shutdown(self):
def shutdown(self) -> None:
with self._raise_on_error():
Security.SSLClose(self.context)
def close(self):
# TODO: should I do clean shutdown here? Do I have to?
if self._makefile_refs < 1:
def close(self) -> None:
self._closed = True
# TODO: should I do clean shutdown here? Do I have to?
if self._io_refs <= 0:
self._real_close()
def _real_close(self) -> None:
self._real_closed = True
if self.context:
CoreFoundation.CFRelease(self.context)
self.context = None
@ -672,10 +648,8 @@ class WrappedSocket(object):
shutil.rmtree(self._keychain_dir)
self._keychain = self._keychain_dir = None
return self.socket.close()
else:
self._makefile_refs -= 1
def getpeercert(self, binary_form=False):
def getpeercert(self, binary_form: bool = False) -> bytes | None:
# Urgh, annoying.
#
# Here's how we do this:
@ -733,7 +707,7 @@ class WrappedSocket(object):
return der_bytes
def version(self):
def version(self) -> str:
protocol = Security.SSLProtocol()
result = Security.SSLGetNegotiatedProtocolVersion(
self.context, ctypes.byref(protocol)
@ -752,55 +726,50 @@ class WrappedSocket(object):
elif protocol.value == SecurityConst.kSSLProtocol2:
return "SSLv2"
else:
raise ssl.SSLError("Unknown TLS version: %r" % protocol)
def _reuse(self):
self._makefile_refs += 1
def _drop(self):
if self._makefile_refs < 1:
self.close()
else:
self._makefile_refs -= 1
raise ssl.SSLError(f"Unknown TLS version: {protocol!r}")
if _fileobject: # Platform-specific: Python 2
def makefile(self, mode, bufsize=-1):
self._makefile_refs += 1
return _fileobject(self, mode, bufsize, close=True)
else: # Platform-specific: Python 3
def makefile(self, mode="r", buffering=None, *args, **kwargs):
def makefile(
self: socket_cls,
mode: (
Literal["r"] | Literal["w"] | Literal["rw"] | Literal["wr"] | Literal[""]
) = "r",
buffering: int | None = None,
*args: typing.Any,
**kwargs: typing.Any,
) -> typing.BinaryIO | typing.TextIO:
# We disable buffering with SecureTransport because it conflicts with
# the buffering that ST does internally (see issue #1153 for more).
buffering = 0
return backport_makefile(self, mode, buffering, *args, **kwargs)
return socket_cls.makefile(self, mode, buffering, *args, **kwargs)
WrappedSocket.makefile = makefile
WrappedSocket.makefile = makefile # type: ignore[attr-defined]
class SecureTransportContext(object):
class SecureTransportContext:
"""
I am a wrapper class for the SecureTransport library, to translate the
interface of the standard library ``SSLContext`` object to calls into
SecureTransport.
"""
def __init__(self, protocol):
def __init__(self, protocol: int) -> None:
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
if protocol not in (None, ssl.PROTOCOL_TLS, ssl.PROTOCOL_TLS_CLIENT):
self._min_version, self._max_version = _protocol_to_min_max[protocol]
self._options = 0
self._verify = False
self._trust_bundle = None
self._client_cert = None
self._client_key = None
self._trust_bundle: bytes | None = None
self._client_cert: str | None = None
self._client_key: str | None = None
self._client_key_passphrase = None
self._alpn_protocols = None
self._alpn_protocols: list[bytes] | None = None
@property
def check_hostname(self):
def check_hostname(self) -> Literal[True]:
"""
SecureTransport cannot have its hostname checking disabled. For more,
see the comment on getpeercert() in this file.
@ -808,15 +777,14 @@ class SecureTransportContext(object):
return True
@check_hostname.setter
def check_hostname(self, value):
def check_hostname(self, value: typing.Any) -> None:
"""
SecureTransport cannot have its hostname checking disabled. For more,
see the comment on getpeercert() in this file.
"""
pass
@property
def options(self):
def options(self) -> int:
# TODO: Well, crap.
#
# So this is the bit of the code that is the most likely to cause us
@ -826,19 +794,19 @@ class SecureTransportContext(object):
return self._options
@options.setter
def options(self, value):
def options(self, value: int) -> None:
# TODO: Update in line with above.
self._options = value
@property
def verify_mode(self):
def verify_mode(self) -> int:
return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE
@verify_mode.setter
def verify_mode(self, value):
self._verify = True if value == ssl.CERT_REQUIRED else False
def verify_mode(self, value: int) -> None:
self._verify = value == ssl.CERT_REQUIRED
def set_default_verify_paths(self):
def set_default_verify_paths(self) -> None:
# So, this has to do something a bit weird. Specifically, what it does
# is nothing.
#
@ -850,15 +818,18 @@ class SecureTransportContext(object):
# ignoring it.
pass
def load_default_certs(self):
def load_default_certs(self) -> None:
return self.set_default_verify_paths()
def set_ciphers(self, ciphers):
# For now, we just require the default cipher string.
if ciphers != util.ssl_.DEFAULT_CIPHERS:
def set_ciphers(self, ciphers: typing.Any) -> None:
raise ValueError("SecureTransport doesn't support custom cipher strings")
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
def load_verify_locations(
self,
cafile: str | None = None,
capath: str | None = None,
cadata: bytes | None = None,
) -> None:
# OK, we only really support cadata and cafile.
if capath is not None:
raise ValueError("SecureTransport does not support cert directories")
@ -868,14 +839,19 @@ class SecureTransportContext(object):
with open(cafile):
pass
self._trust_bundle = cafile or cadata
self._trust_bundle = cafile or cadata # type: ignore[assignment]
def load_cert_chain(self, certfile, keyfile=None, password=None):
def load_cert_chain(
self,
certfile: str,
keyfile: str | None = None,
password: str | None = None,
) -> None:
self._client_cert = certfile
self._client_key = keyfile
self._client_cert_passphrase = password
def set_alpn_protocols(self, protocols):
def set_alpn_protocols(self, protocols: list[str | bytes]) -> None:
"""
Sets the ALPN protocols that will later be set on the context.
@ -885,16 +861,16 @@ class SecureTransportContext(object):
raise NotImplementedError(
"SecureTransport supports ALPN only in macOS 10.12+"
)
self._alpn_protocols = [six.ensure_binary(p) for p in protocols]
self._alpn_protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
def wrap_socket(
self,
sock,
server_side=False,
do_handshake_on_connect=True,
suppress_ragged_eofs=True,
server_hostname=None,
):
sock: socket_cls,
server_side: bool = False,
do_handshake_on_connect: bool = True,
suppress_ragged_eofs: bool = True,
server_hostname: bytes | str | None = None,
) -> WrappedSocket:
# So, what do we do here? Firstly, we assert some properties. This is a
# stripped down shim, so there is some functionality we don't support.
# See PEP 543 for the real deal.
@ -911,11 +887,27 @@ class SecureTransportContext(object):
server_hostname,
self._verify,
self._trust_bundle,
self._min_version,
self._max_version,
_tls_version_to_st[self._minimum_version],
_tls_version_to_st[self._maximum_version],
self._client_cert,
self._client_key,
self._client_key_passphrase,
self._alpn_protocols,
)
return wrapped_socket
@property
def minimum_version(self) -> int:
return self._minimum_version
@minimum_version.setter
def minimum_version(self, minimum_version: int) -> None:
self._minimum_version = minimum_version
@property
def maximum_version(self) -> int:
return self._maximum_version
@maximum_version.setter
def maximum_version(self, maximum_version: int) -> None:
self._maximum_version = maximum_version

View file

@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
"""
This module contains provisional support for SOCKS proxies from within
urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
@ -38,10 +37,11 @@ with the proxy:
proxy_url="socks5h://<username>:<password>@proxy-host"
"""
from __future__ import absolute_import
from __future__ import annotations
try:
import socks
import socks # type: ignore[import]
except ImportError:
import warnings
@ -51,13 +51,13 @@ except ImportError:
(
"SOCKS support in urllib3 requires the installation of optional "
"dependencies: specifically, PySocks. For more information, see "
"https://urllib3.readthedocs.io/en/1.26.x/contrib.html#socks-proxies"
"https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies"
),
DependencyWarning,
)
raise
from socket import error as SocketError
import typing
from socket import timeout as SocketTimeout
from ..connection import HTTPConnection, HTTPSConnection
@ -69,7 +69,21 @@ from ..util.url import parse_url
try:
import ssl
except ImportError:
ssl = None
ssl = None # type: ignore[assignment]
try:
from typing import TypedDict
class _TYPE_SOCKS_OPTIONS(TypedDict):
socks_version: int
proxy_host: str | None
proxy_port: str | None
username: str | None
password: str | None
rdns: bool
except ImportError: # Python 3.7
_TYPE_SOCKS_OPTIONS = typing.Dict[str, typing.Any] # type: ignore[misc, assignment]
class SOCKSConnection(HTTPConnection):
@ -77,15 +91,20 @@ class SOCKSConnection(HTTPConnection):
A plain-text HTTP connection that connects via a SOCKS proxy.
"""
def __init__(self, *args, **kwargs):
self._socks_options = kwargs.pop("_socks_options")
super(SOCKSConnection, self).__init__(*args, **kwargs)
def __init__(
self,
_socks_options: _TYPE_SOCKS_OPTIONS,
*args: typing.Any,
**kwargs: typing.Any,
) -> None:
self._socks_options = _socks_options
super().__init__(*args, **kwargs)
def _new_conn(self):
def _new_conn(self) -> socks.socksocket:
"""
Establish a new connection via the SOCKS proxy.
"""
extra_kw = {}
extra_kw: dict[str, typing.Any] = {}
if self.source_address:
extra_kw["source_address"] = self.source_address
@ -102,15 +121,14 @@ class SOCKSConnection(HTTPConnection):
proxy_password=self._socks_options["password"],
proxy_rdns=self._socks_options["rdns"],
timeout=self.timeout,
**extra_kw
**extra_kw,
)
except SocketTimeout:
except SocketTimeout as e:
raise ConnectTimeoutError(
self,
"Connection to %s timed out. (connect timeout=%s)"
% (self.host, self.timeout),
)
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
except socks.ProxyError as e:
# This is fragile as hell, but it seems to be the only way to raise
@ -120,22 +138,23 @@ class SOCKSConnection(HTTPConnection):
if isinstance(error, SocketTimeout):
raise ConnectTimeoutError(
self,
"Connection to %s timed out. (connect timeout=%s)"
% (self.host, self.timeout),
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
else:
# Adding `from e` messes with coverage somehow, so it's omitted.
# See #2386.
raise NewConnectionError(
self, f"Failed to establish a new connection: {error}"
)
else:
raise NewConnectionError(
self, "Failed to establish a new connection: %s" % error
)
else:
raise NewConnectionError(
self, "Failed to establish a new connection: %s" % e
)
self, f"Failed to establish a new connection: {e}"
) from e
except SocketError as e: # Defensive: PySocks should catch all these.
except OSError as e: # Defensive: PySocks should catch all these.
raise NewConnectionError(
self, "Failed to establish a new connection: %s" % e
)
self, f"Failed to establish a new connection: {e}"
) from e
return conn
@ -169,12 +188,12 @@ class SOCKSProxyManager(PoolManager):
def __init__(
self,
proxy_url,
username=None,
password=None,
num_pools=10,
headers=None,
**connection_pool_kw
proxy_url: str,
username: str | None = None,
password: str | None = None,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
**connection_pool_kw: typing.Any,
):
parsed = parse_url(proxy_url)
@ -195,7 +214,7 @@ class SOCKSProxyManager(PoolManager):
socks_version = socks.PROXY_TYPE_SOCKS4
rdns = True
else:
raise ValueError("Unable to determine SOCKS version from %s" % proxy_url)
raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
self.proxy_url = proxy_url
@ -209,8 +228,6 @@ class SOCKSProxyManager(PoolManager):
}
connection_pool_kw["_socks_options"] = socks_options
super(SOCKSProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw
)
super().__init__(num_pools, headers, **connection_pool_kw)
self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme

View file

@ -1,6 +1,16 @@
from __future__ import absolute_import
from __future__ import annotations
from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead
import socket
import typing
import warnings
from email.errors import MessageDefect
from http.client import IncompleteRead as httplib_IncompleteRead
if typing.TYPE_CHECKING:
from .connection import HTTPConnection
from .connectionpool import ConnectionPool
from .response import HTTPResponse
from .util.retry import Retry
# Base Exceptions
@ -8,23 +18,24 @@ from .packages.six.moves.http_client import IncompleteRead as httplib_Incomplete
class HTTPError(Exception):
"""Base exception used by this module."""
pass
class HTTPWarning(Warning):
"""Base warning used by this module."""
pass
_TYPE_REDUCE_RESULT = typing.Tuple[
typing.Callable[..., object], typing.Tuple[object, ...]
]
class PoolError(HTTPError):
"""Base exception for errors caused within a pool."""
def __init__(self, pool, message):
def __init__(self, pool: ConnectionPool, message: str) -> None:
self.pool = pool
HTTPError.__init__(self, "%s: %s" % (pool, message))
super().__init__(f"{pool}: {message}")
def __reduce__(self):
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, None)
@ -32,11 +43,11 @@ class PoolError(HTTPError):
class RequestError(PoolError):
"""Base exception for PoolErrors that have associated URLs."""
def __init__(self, pool, url, message):
def __init__(self, pool: ConnectionPool, url: str, message: str) -> None:
self.url = url
PoolError.__init__(self, pool, message)
super().__init__(pool, message)
def __reduce__(self):
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, self.url, None)
@ -44,28 +55,25 @@ class RequestError(PoolError):
class SSLError(HTTPError):
"""Raised when SSL certificate fails in an HTTPS connection."""
pass
class ProxyError(HTTPError):
"""Raised when the connection to a proxy fails."""
def __init__(self, message, error, *args):
super(ProxyError, self).__init__(message, error, *args)
# The original error is also available as __cause__.
original_error: Exception
def __init__(self, message: str, error: Exception) -> None:
super().__init__(message, error)
self.original_error = error
class DecodeError(HTTPError):
"""Raised when automatic decoding based on Content-Type fails."""
pass
class ProtocolError(HTTPError):
"""Raised when something unexpected happens mid-request/response."""
pass
#: Renamed to ProtocolError but aliased for backwards compatibility.
ConnectionError = ProtocolError
@ -79,33 +87,36 @@ class MaxRetryError(RequestError):
:param pool: The connection pool
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
:param string url: The requested Url
:param exceptions.Exception reason: The underlying error
:param str url: The requested Url
:param reason: The underlying error
:type reason: :class:`Exception`
"""
def __init__(self, pool, url, reason=None):
def __init__(
self, pool: ConnectionPool, url: str, reason: Exception | None = None
) -> None:
self.reason = reason
message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason)
message = f"Max retries exceeded with url: {url} (Caused by {reason!r})"
RequestError.__init__(self, pool, url, message)
super().__init__(pool, url, message)
class HostChangedError(RequestError):
"""Raised when an existing pool gets a request for a foreign host."""
def __init__(self, pool, url, retries=3):
message = "Tried to open a foreign host with url: %s" % url
RequestError.__init__(self, pool, url, message)
def __init__(
self, pool: ConnectionPool, url: str, retries: Retry | int = 3
) -> None:
message = f"Tried to open a foreign host with url: {url}"
super().__init__(pool, url, message)
self.retries = retries
class TimeoutStateError(HTTPError):
"""Raised when passing an invalid state to a timeout"""
pass
class TimeoutError(HTTPError):
"""Raised when a socket timeout error occurs.
@ -114,53 +125,66 @@ class TimeoutError(HTTPError):
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
"""
pass
class ReadTimeoutError(TimeoutError, RequestError):
"""Raised when a socket timeout occurs while receiving data from a server"""
pass
# This timeout error does not have a URL attached and needs to inherit from the
# base HTTPError
class ConnectTimeoutError(TimeoutError):
"""Raised when a socket timeout occurs while connecting to a server"""
pass
class NewConnectionError(ConnectTimeoutError, PoolError):
class NewConnectionError(ConnectTimeoutError, HTTPError):
"""Raised when we fail to establish a new connection. Usually ECONNREFUSED."""
pass
def __init__(self, conn: HTTPConnection, message: str) -> None:
self.conn = conn
super().__init__(f"{conn}: {message}")
@property
def pool(self) -> HTTPConnection:
warnings.warn(
"The 'pool' property is deprecated and will be removed "
"in urllib3 v2.1.0. Use 'conn' instead.",
DeprecationWarning,
stacklevel=2,
)
return self.conn
class NameResolutionError(NewConnectionError):
"""Raised when host name resolution fails."""
def __init__(self, host: str, conn: HTTPConnection, reason: socket.gaierror):
message = f"Failed to resolve '{host}' ({reason})"
super().__init__(conn, message)
class EmptyPoolError(PoolError):
"""Raised when a pool runs out of connections and no more are allowed."""
pass
class FullPoolError(PoolError):
"""Raised when we try to add a connection to a full pool in blocking mode."""
class ClosedPoolError(PoolError):
"""Raised when a request enters a pool after the pool has been closed."""
pass
class LocationValueError(ValueError, HTTPError):
"""Raised when there is something wrong with a given URL input."""
pass
class LocationParseError(LocationValueError):
"""Raised when get_host or similar fails to parse the URL input."""
def __init__(self, location):
message = "Failed to parse: %s" % location
HTTPError.__init__(self, message)
def __init__(self, location: str) -> None:
message = f"Failed to parse: {location}"
super().__init__(message)
self.location = location
@ -168,9 +192,9 @@ class LocationParseError(LocationValueError):
class URLSchemeUnknown(LocationValueError):
"""Raised when a URL input has an unsupported scheme."""
def __init__(self, scheme):
message = "Not supported URL scheme %s" % scheme
super(URLSchemeUnknown, self).__init__(message)
def __init__(self, scheme: str):
message = f"Not supported URL scheme {scheme}"
super().__init__(message)
self.scheme = scheme
@ -185,38 +209,22 @@ class ResponseError(HTTPError):
class SecurityWarning(HTTPWarning):
"""Warned when performing security reducing actions"""
pass
class SubjectAltNameWarning(SecurityWarning):
"""Warned when connecting to a host with a certificate missing a SAN."""
pass
class InsecureRequestWarning(SecurityWarning):
"""Warned when making an unverified HTTPS request."""
pass
class NotOpenSSLWarning(SecurityWarning):
"""Warned when using unsupported SSL library"""
class SystemTimeWarning(SecurityWarning):
"""Warned when system time is suspected to be wrong"""
pass
class InsecurePlatformWarning(SecurityWarning):
"""Warned when certain TLS/SSL configuration is not available on a platform."""
pass
class SNIMissingWarning(HTTPWarning):
"""Warned when making a HTTPS request without SNI available."""
pass
class DependencyWarning(HTTPWarning):
"""
@ -224,14 +232,10 @@ class DependencyWarning(HTTPWarning):
dependencies.
"""
pass
class ResponseNotChunked(ProtocolError, ValueError):
"""Response needs to be chunked in order to read it as chunks."""
pass
class BodyNotHttplibCompatible(HTTPError):
"""
@ -239,8 +243,6 @@ class BodyNotHttplibCompatible(HTTPError):
(have an fp attribute which returns raw chunks) for read_chunked().
"""
pass
class IncompleteRead(HTTPError, httplib_IncompleteRead):
"""
@ -250,12 +252,13 @@ class IncompleteRead(HTTPError, httplib_IncompleteRead):
for ``partial`` to avoid creating large objects on streamed reads.
"""
def __init__(self, partial, expected):
super(IncompleteRead, self).__init__(partial, expected)
def __init__(self, partial: int, expected: int) -> None:
self.partial = partial # type: ignore[assignment]
self.expected = expected
def __repr__(self):
def __repr__(self) -> str:
return "IncompleteRead(%i bytes read, %i more expected)" % (
self.partial,
self.partial, # type: ignore[str-format]
self.expected,
)
@ -263,14 +266,13 @@ class IncompleteRead(HTTPError, httplib_IncompleteRead):
class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
"""Invalid chunk length in a chunked response."""
def __init__(self, response, length):
super(InvalidChunkLength, self).__init__(
response.tell(), response.length_remaining
)
def __init__(self, response: HTTPResponse, length: bytes) -> None:
self.partial: int = response.tell() # type: ignore[assignment]
self.expected: int | None = response.length_remaining
self.response = response
self.length = length
def __repr__(self):
def __repr__(self) -> str:
return "InvalidChunkLength(got length %r, %i bytes read)" % (
self.length,
self.partial,
@ -280,15 +282,13 @@ class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
class InvalidHeader(HTTPError):
"""The header provided was somehow invalid."""
pass
class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
"""ProxyManager does not support the supplied scheme"""
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
def __init__(self, scheme):
def __init__(self, scheme: str | None) -> None:
# 'localhost' is here because our URL parser parses
# localhost:8080 -> scheme=localhost, remove if we fix this.
if scheme == "localhost":
@ -296,28 +296,23 @@ class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
if scheme is None:
message = "Proxy URL had no scheme, should start with http:// or https://"
else:
message = (
"Proxy URL had unsupported scheme %s, should use http:// or https://"
% scheme
)
super(ProxySchemeUnknown, self).__init__(message)
message = f"Proxy URL had unsupported scheme {scheme}, should use http:// or https://"
super().__init__(message)
class ProxySchemeUnsupported(ValueError):
"""Fetching HTTPS resources through HTTPS proxies is unsupported"""
pass
class HeaderParsingError(HTTPError):
"""Raised by assert_header_parsing, but we convert it to a log.warning statement."""
def __init__(self, defects, unparsed_data):
message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data)
super(HeaderParsingError, self).__init__(message)
def __init__(
self, defects: list[MessageDefect], unparsed_data: bytes | str | None
) -> None:
message = f"{defects or 'Unknown'}, unparsed data: {unparsed_data!r}"
super().__init__(message)
class UnrewindableBodyError(HTTPError):
"""urllib3 encountered an error when trying to rewind a body"""
pass

View file

@ -1,13 +1,20 @@
from __future__ import absolute_import
from __future__ import annotations
import email.utils
import mimetypes
import re
import typing
from .packages import six
_TYPE_FIELD_VALUE = typing.Union[str, bytes]
_TYPE_FIELD_VALUE_TUPLE = typing.Union[
_TYPE_FIELD_VALUE,
typing.Tuple[str, _TYPE_FIELD_VALUE],
typing.Tuple[str, _TYPE_FIELD_VALUE, str],
]
def guess_content_type(filename, default="application/octet-stream"):
def guess_content_type(
filename: str | None, default: str = "application/octet-stream"
) -> str:
"""
Guess the "Content-Type" of a file.
@ -21,7 +28,7 @@ def guess_content_type(filename, default="application/octet-stream"):
return default
def format_header_param_rfc2231(name, value):
def format_header_param_rfc2231(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Helper function to format and quote a single header parameter using the
strategy defined in RFC 2231.
@ -34,14 +41,28 @@ def format_header_param_rfc2231(name, value):
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as ``bytes`` or `str``.
:ret:
:returns:
An RFC-2231-formatted unicode string.
.. deprecated:: 2.0.0
Will be removed in urllib3 v2.1.0. This is not valid for
``multipart/form-data`` header parameters.
"""
if isinstance(value, six.binary_type):
import warnings
warnings.warn(
"'format_header_param_rfc2231' is deprecated and will be "
"removed in urllib3 v2.1.0. This is not valid for "
"multipart/form-data header parameters.",
DeprecationWarning,
stacklevel=2,
)
if isinstance(value, bytes):
value = value.decode("utf-8")
if not any(ch in value for ch in '"\\\r\n'):
result = u'%s="%s"' % (name, value)
result = f'{name}="{value}"'
try:
result.encode("ascii")
except (UnicodeEncodeError, UnicodeDecodeError):
@ -49,81 +70,87 @@ def format_header_param_rfc2231(name, value):
else:
return result
if six.PY2: # Python 2:
value = value.encode("utf-8")
# encode_rfc2231 accepts an encoded string and returns an ascii-encoded
# string in Python 2 but accepts and returns unicode strings in Python 3
value = email.utils.encode_rfc2231(value, "utf-8")
value = "%s*=%s" % (name, value)
if six.PY2: # Python 2:
value = value.decode("utf-8")
value = f"{name}*={value}"
return value
_HTML5_REPLACEMENTS = {
u"\u0022": u"%22",
# Replace "\" with "\\".
u"\u005C": u"\u005C\u005C",
}
# All control characters from 0x00 to 0x1F *except* 0x1B.
_HTML5_REPLACEMENTS.update(
{
six.unichr(cc): u"%{:02X}".format(cc)
for cc in range(0x00, 0x1F + 1)
if cc not in (0x1B,)
}
)
def _replace_multiple(value, needles_and_replacements):
def replacer(match):
return needles_and_replacements[match.group(0)]
pattern = re.compile(
r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
)
result = pattern.sub(replacer, value)
return result
def format_header_param_html5(name, value):
def format_multipart_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Helper function to format and quote a single header parameter using the
HTML5 strategy.
Format and quote a single multipart header parameter.
Particularly useful for header parameters which might contain
non-ASCII values, like file names. This follows the `HTML5 Working Draft
Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
This follows the `WHATWG HTML Standard`_ as of 2021/06/10, matching
the behavior of current browser and curl versions. Values are
assumed to be UTF-8. The ``\\n``, ``\\r``, and ``"`` characters are
percent encoded.
.. _HTML5 Working Draft Section 4.10.22.7:
https://w3c.github.io/html/sec-forms.html#multipart-form-data
.. _WHATWG HTML Standard:
https://html.spec.whatwg.org/multipage/
form-control-infrastructure.html#multipart-form-data
:param name:
The name of the parameter, a string expected to be ASCII only.
The name of the parameter, an ASCII-only ``str``.
:param value:
The value of the parameter, provided as ``bytes`` or `str``.
:ret:
A unicode string, stripped of troublesome characters.
The value of the parameter, a ``str`` or UTF-8 encoded
``bytes``.
:returns:
A string ``name="value"`` with the escaped value.
.. versionchanged:: 2.0.0
Matches the WHATWG HTML Standard as of 2021/06/10. Control
characters are no longer percent encoded.
.. versionchanged:: 2.0.0
Renamed from ``format_header_param_html5`` and
``format_header_param``. The old names will be removed in
urllib3 v2.1.0.
"""
if isinstance(value, six.binary_type):
if isinstance(value, bytes):
value = value.decode("utf-8")
value = _replace_multiple(value, _HTML5_REPLACEMENTS)
return u'%s="%s"' % (name, value)
# percent encode \n \r "
value = value.translate({10: "%0A", 13: "%0D", 34: "%22"})
return f'{name}="{value}"'
# For backwards-compatibility.
format_header_param = format_header_param_html5
def format_header_param_html5(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param_html5' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
class RequestField(object):
def format_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
class RequestField:
"""
A data container for request body parameters.
@ -135,29 +162,47 @@ class RequestField(object):
An optional filename of the request field. Must be unicode.
:param headers:
An optional dict-like object of headers to initially use for the field.
:param header_formatter:
An optional callable that is used to encode and format the headers. By
default, this is :func:`format_header_param_html5`.
.. versionchanged:: 2.0.0
The ``header_formatter`` parameter is deprecated and will
be removed in urllib3 v2.1.0.
"""
def __init__(
self,
name,
data,
filename=None,
headers=None,
header_formatter=format_header_param_html5,
name: str,
data: _TYPE_FIELD_VALUE,
filename: str | None = None,
headers: typing.Mapping[str, str] | None = None,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
):
self._name = name
self._filename = filename
self.data = data
self.headers = {}
self.headers: dict[str, str | None] = {}
if headers:
self.headers = dict(headers)
if header_formatter is not None:
import warnings
warnings.warn(
"The 'header_formatter' parameter is deprecated and "
"will be removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
self.header_formatter = header_formatter
else:
self.header_formatter = format_multipart_header_param
@classmethod
def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5):
def from_tuples(
cls,
fieldname: str,
value: _TYPE_FIELD_VALUE_TUPLE,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
) -> RequestField:
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
@ -174,11 +219,19 @@ class RequestField(object):
Field names and filenames must be unicode.
"""
filename: str | None
content_type: str | None
data: _TYPE_FIELD_VALUE
if isinstance(value, tuple):
if len(value) == 3:
filename, data, content_type = value
filename, data, content_type = typing.cast(
typing.Tuple[str, _TYPE_FIELD_VALUE, str], value
)
else:
filename, data = value
filename, data = typing.cast(
typing.Tuple[str, _TYPE_FIELD_VALUE], value
)
content_type = guess_content_type(filename)
else:
filename = None
@ -192,20 +245,29 @@ class RequestField(object):
return request_param
def _render_part(self, name, value):
def _render_part(self, name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Overridable helper function to format a single header parameter. By
default, this calls ``self.header_formatter``.
Override this method to change how each multipart header
parameter is formatted. By default, this calls
:func:`format_multipart_header_param`.
:param name:
The name of the parameter, a string expected to be ASCII only.
The name of the parameter, an ASCII-only ``str``.
:param value:
The value of the parameter, provided as a unicode string.
"""
The value of the parameter, a ``str`` or UTF-8 encoded
``bytes``.
:meta public:
"""
return self.header_formatter(name, value)
def _render_parts(self, header_parts):
def _render_parts(
self,
header_parts: (
dict[str, _TYPE_FIELD_VALUE | None]
| typing.Sequence[tuple[str, _TYPE_FIELD_VALUE | None]]
),
) -> str:
"""
Helper function to format and quote a single header.
@ -216,18 +278,21 @@ class RequestField(object):
A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`.
"""
iterable: typing.Iterable[tuple[str, _TYPE_FIELD_VALUE | None]]
parts = []
iterable = header_parts
if isinstance(header_parts, dict):
iterable = header_parts.items()
else:
iterable = header_parts
for name, value in iterable:
if value is not None:
parts.append(self._render_part(name, value))
return u"; ".join(parts)
return "; ".join(parts)
def render_headers(self):
def render_headers(self) -> str:
"""
Renders the headers for this request field.
"""
@ -236,39 +301,45 @@ class RequestField(object):
sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
for sort_key in sort_keys:
if self.headers.get(sort_key, False):
lines.append(u"%s: %s" % (sort_key, self.headers[sort_key]))
lines.append(f"{sort_key}: {self.headers[sort_key]}")
for header_name, header_value in self.headers.items():
if header_name not in sort_keys:
if header_value:
lines.append(u"%s: %s" % (header_name, header_value))
lines.append(f"{header_name}: {header_value}")
lines.append(u"\r\n")
return u"\r\n".join(lines)
lines.append("\r\n")
return "\r\n".join(lines)
def make_multipart(
self, content_disposition=None, content_type=None, content_location=None
):
self,
content_disposition: str | None = None,
content_type: str | None = None,
content_location: str | None = None,
) -> None:
"""
Makes this request field into a multipart request field.
This method overrides "Content-Disposition", "Content-Type" and
"Content-Location" headers to the request parameter.
:param content_disposition:
The 'Content-Disposition' of the request body. Defaults to 'form-data'
:param content_type:
The 'Content-Type' of the request body.
:param content_location:
The 'Content-Location' of the request body.
"""
self.headers["Content-Disposition"] = content_disposition or u"form-data"
self.headers["Content-Disposition"] += u"; ".join(
content_disposition = (content_disposition or "form-data") + "; ".join(
[
u"",
"",
self._render_parts(
((u"name", self._name), (u"filename", self._filename))
(("name", self._name), ("filename", self._filename))
),
]
)
self.headers["Content-Disposition"] = content_disposition
self.headers["Content-Type"] = content_type
self.headers["Content-Location"] = content_location

View file

@ -1,28 +1,32 @@
from __future__ import absolute_import
from __future__ import annotations
import binascii
import codecs
import os
import typing
from io import BytesIO
from .fields import RequestField
from .packages import six
from .packages.six import b
from .fields import _TYPE_FIELD_VALUE_TUPLE, RequestField
writer = codecs.lookup("utf-8")[3]
_TYPE_FIELDS_SEQUENCE = typing.Sequence[
typing.Union[typing.Tuple[str, _TYPE_FIELD_VALUE_TUPLE], RequestField]
]
_TYPE_FIELDS = typing.Union[
_TYPE_FIELDS_SEQUENCE,
typing.Mapping[str, _TYPE_FIELD_VALUE_TUPLE],
]
def choose_boundary():
def choose_boundary() -> str:
"""
Our embarrassingly-simple replacement for mimetools.choose_boundary.
"""
boundary = binascii.hexlify(os.urandom(16))
if not six.PY2:
boundary = boundary.decode("ascii")
return boundary
return binascii.hexlify(os.urandom(16)).decode()
def iter_field_objects(fields):
def iter_field_objects(fields: _TYPE_FIELDS) -> typing.Iterable[RequestField]:
"""
Iterate over fields.
@ -30,42 +34,29 @@ def iter_field_objects(fields):
:class:`~urllib3.fields.RequestField`.
"""
if isinstance(fields, dict):
i = six.iteritems(fields)
else:
i = iter(fields)
iterable: typing.Iterable[RequestField | tuple[str, _TYPE_FIELD_VALUE_TUPLE]]
for field in i:
if isinstance(fields, typing.Mapping):
iterable = fields.items()
else:
iterable = fields
for field in iterable:
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
def iter_fields(fields):
"""
.. deprecated:: 1.6
Iterate over fields.
The addition of :class:`~urllib3.fields.RequestField` makes this function
obsolete. Instead, use :func:`iter_field_objects`, which returns
:class:`~urllib3.fields.RequestField` objects.
Supports list of (k, v) tuples and dicts.
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
return ((k, v) for k, v in fields)
def encode_multipart_formdata(fields, boundary=None):
def encode_multipart_formdata(
fields: _TYPE_FIELDS, boundary: str | None = None
) -> tuple[bytes, str]:
"""
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
:param fields:
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
Values are processed by :func:`urllib3.fields.RequestField.from_tuples`.
:param boundary:
If not specified, then a random boundary will be generated using
@ -76,7 +67,7 @@ def encode_multipart_formdata(fields, boundary=None):
boundary = choose_boundary()
for field in iter_field_objects(fields):
body.write(b("--%s\r\n" % (boundary)))
body.write(f"--{boundary}\r\n".encode("latin-1"))
writer(body).write(field.render_headers())
data = field.data
@ -84,15 +75,15 @@ def encode_multipart_formdata(fields, boundary=None):
if isinstance(data, int):
data = str(data) # Backwards compatibility
if isinstance(data, six.text_type):
if isinstance(data, str):
writer(body).write(data)
else:
body.write(data)
body.write(b"\r\n")
body.write(b("--%s--\r\n" % (boundary)))
body.write(f"--{boundary}--\r\n".encode("latin-1"))
content_type = str("multipart/form-data; boundary=%s" % boundary)
content_type = f"multipart/form-data; boundary={boundary}"
return body.getvalue(), content_type

Some files were not shown because too many files have changed in this diff Show more