Merge branch 'nightly' into dependabot/pip/nightly/tempora-5.5.0

This commit is contained in:
JonnyWong16 2023-08-23 21:43:14 -07:00 committed by GitHub
commit af75759186
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
122 changed files with 7064 additions and 5942 deletions

View file

@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
""" """
__author__ = "Leonard Richardson (leonardr@segfault.org)" __author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.11.2" __version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson" __copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license. # Use of this source code is governed by the MIT license.
__license__ = "MIT" __license__ = "MIT"
@ -38,11 +38,13 @@ from .builder import (
builder_registry, builder_registry,
ParserRejectedMarkup, ParserRejectedMarkup,
XMLParsedAsHTMLWarning, XMLParsedAsHTMLWarning,
HTMLParserTreeBuilder
) )
from .dammit import UnicodeDammit from .dammit import UnicodeDammit
from .element import ( from .element import (
CData, CData,
Comment, Comment,
CSS,
DEFAULT_OUTPUT_ENCODING, DEFAULT_OUTPUT_ENCODING,
Declaration, Declaration,
Doctype, Doctype,
@ -348,26 +350,50 @@ class BeautifulSoup(Tag):
self.markup = None self.markup = None
self.builder.soup = None self.builder.soup = None
def __copy__(self): def _clone(self):
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again.""" """Create a new BeautifulSoup object with the same TreeBuilder,
copy = type(self)( but not associated with any markup.
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
# Although we encoded the tree to UTF-8, that may not have This is the first step of the deepcopy process.
# been the encoding of the original markup. Set the copy's """
# .original_encoding to reflect the original object's clone = type(self)("", None, self.builder)
# .original_encoding.
copy.original_encoding = self.original_encoding # Keep track of the encoding of the original document,
return copy # since we won't be parsing it again.
clone.original_encoding = self.original_encoding
return clone
def __getstate__(self): def __getstate__(self):
# Frequently a tree builder can't be pickled. # Frequently a tree builder can't be pickled.
d = dict(self.__dict__) d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable: if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
d['builder'] = None d['builder'] = type(self.builder)
# Store the contents as a Unicode string.
d['contents'] = []
d['markup'] = self.decode()
# If _most_recent_element is present, it's a Tag object left
# over from initial parse. It might not be picklable and we
# don't need it.
if '_most_recent_element' in d:
del d['_most_recent_element']
return d return d
def __setstate__(self, state):
# If necessary, restore the TreeBuilder by looking it up.
self.__dict__ = state
if isinstance(self.builder, type):
self.builder = self.builder()
elif not self.builder:
# We don't know which builder was used to build this
# parse tree, so use a default we know is always available.
self.builder = HTMLParserTreeBuilder()
self.builder.soup = self
self.reset()
self._feed()
return state
@classmethod @classmethod
def _decode_markup(cls, markup): def _decode_markup(cls, markup):
"""Ensure `markup` is bytes so it's safe to send into warnings.warn. """Ensure `markup` is bytes so it's safe to send into warnings.warn.
@ -468,6 +494,7 @@ class BeautifulSoup(Tag):
self.open_tag_counter = Counter() self.open_tag_counter = Counter()
self.preserve_whitespace_tag_stack = [] self.preserve_whitespace_tag_stack = []
self.string_container_stack = [] self.string_container_stack = []
self._most_recent_element = None
self.pushTag(self) self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@ -749,7 +776,7 @@ class BeautifulSoup(Tag):
def decode(self, pretty_print=False, def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING, eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"): formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree """Returns a string or Unicode representation of the parse tree
as an HTML or XML document. as an HTML or XML document.
@ -776,7 +803,7 @@ class BeautifulSoup(Tag):
else: else:
indent_level = 0 indent_level = 0
return prefix + super(BeautifulSoup, self).decode( return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter) indent_level, eventual_encoding, formatter, iterator)
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup' # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup _s = BeautifulSoup

View file

@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import ( from bs4.builder import (
DetectsXMLParsedAsHTML, DetectsXMLParsedAsHTML,
ParserRejectedMarkup,
HTML, HTML,
HTMLTreeBuilder, HTMLTreeBuilder,
STRICT, STRICT,
@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
self._initialize_xml_detector() self._initialize_xml_detector()
def error(self, message):
# NOTE: This method is required so long as Python 3.9 is
# supported. The corresponding code is removed from HTMLParser
# in 3.5, but not removed from ParserBase until 3.10.
# https://github.com/python/cpython/issues/76025
#
# The original implementation turned the error into a warning,
# but in every case I discovered, this made HTMLParser
# immediately crash with an error message that was less
# helpful than the warning. The new implementation makes it
# more clear that html.parser just can't parse this
# markup. The 3.10 implementation does the same, though it
# raises AssertionError rather than calling a method. (We
# catch this error and wrap it in a ParserRejectedMarkup.)
raise ParserRejectedMarkup(message)
def handle_startendtag(self, name, attrs): def handle_startendtag(self, name, attrs):
"""Handle an incoming empty-element tag. """Handle an incoming empty-element tag.
@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
args, kwargs = self.parser_args args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs) parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup parser.soup = self.soup
try:
parser.feed(markup) parser.feed(markup)
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close() parser.close()
parser.already_closed_empty_element = [] parser.already_closed_empty_element = []

280
lib/bs4/css.py Normal file
View file

@ -0,0 +1,280 @@
"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
class CSS(object):
"""A proxy object against the soupsieve library, to simplify its
CSS selector API.
Acquire this object through the .css attribute on the
BeautifulSoup object, or on the Tag you want to use as the
starting point for a CSS selector.
The main advantage of doing this is that the tag to be selected
against doesn't need to be explicitly specified in the function
calls, since it's already scoped to a tag.
"""
def __init__(self, tag, api=soupsieve):
"""Constructor.
You don't need to instantiate this class yourself; instead,
access the .css attribute on the BeautifulSoup object, or on
the Tag you want to use as the starting point for your CSS
selector.
:param tag: All CSS selectors will use this as their starting
point.
:param api: A plug-in replacement for the soupsieve module,
designed mainly for use in tests.
"""
if api is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
self.api = api
self.tag = tag
def escape(self, ident):
"""Escape a CSS identifier.
This is a simple wrapper around soupselect.escape(). See the
documentation for that function for more information.
"""
if soupsieve is None:
raise NotImplementedError(
"Cannot escape CSS identifiers because the soupsieve package is not installed."
)
return self.api.escape(ident)
def _ns(self, ns, select):
"""Normalize a dictionary of namespaces."""
if not isinstance(select, self.api.SoupSieve) and ns is None:
# If the selector is a precompiled pattern, it already has
# a namespace context compiled in, which cannot be
# replaced.
ns = self.tag._namespaces
return ns
def _rs(self, results):
"""Normalize a list of results to a Resultset.
A ResultSet is more consistent with the rest of Beautiful
Soup's API, and ResultSet.__getattr__ has a helpful error
message if you try to treat a list of results as a single
result (a common mistake).
"""
# Import here to avoid circular import
from bs4.element import ResultSet
return ResultSet(None, results)
def compile(self, select, namespaces=None, flags=0, **kwargs):
"""Pre-compile a selector and return the compiled object.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.compile() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.compile() method.
:return: A precompiled selector object.
:rtype: soupsieve.SoupSieve
"""
return self.api.compile(
select, self._ns(namespaces, select), flags, **kwargs
)
def select_one(self, select, namespaces=None, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag and return the
first result.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select_one()
method.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select_one() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select_one() method.
:return: A Tag, or None if the selector has no match.
:rtype: bs4.element.Tag
"""
return self.api.select_one(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
if limit is None:
limit = 0
return self._rs(
self.api.select(
select, self.tag, self._ns(namespaces, select), limit, flags,
**kwargs
)
)
def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.iselect()
method. It is the same as select(), but it returns a generator
instead of a list.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.iselect() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.iselect() method.
:return: A generator
:rtype: types.GeneratorType
"""
return self.api.iselect(
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
)
def closest(self, select, namespaces=None, flags=0, **kwargs):
"""Find the Tag closest to this one that matches the given selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.closest()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.closest() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.closest() method.
:return: A Tag, or None if there is no match.
:rtype: bs4.Tag
"""
return self.api.closest(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def match(self, select, namespaces=None, flags=0, **kwargs):
"""Check whether this Tag matches the given CSS selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.match()
method.
:param: a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.match() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.match() method.
:return: True if this Tag matches the selector; False otherwise.
:rtype: bool
"""
return self.api.match(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def filter(self, select, namespaces=None, flags=0, **kwargs):
"""Filter this Tag's direct children based on the given CSS selector.
This uses the Soup Sieve library. It works the same way as
passing this Tag into that library's soupsieve.filter()
method. More information, for more information see the
documentation for soupsieve.filter().
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.filter() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.filter() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
return self._rs(
self.api.filter(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
)

View file

@ -59,21 +59,6 @@ def diagnose(data):
if hasattr(data, 'read'): if hasattr(data, 'read'):
data = data.read() data = data.read()
elif data.startswith("http:") or data.startswith("https:"):
print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
else:
try:
if os.path.exists(data):
print(('"%s" looks like a filename. Reading data from the file.' % data))
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
print("")
for parser in basic_parsers: for parser in basic_parsers:
print(("Trying to parse your markup with %s" % parser)) print(("Trying to parse your markup with %s" % parser))

View file

@ -8,14 +8,8 @@ except ImportError as e:
import re import re
import sys import sys
import warnings import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
from bs4.css import CSS
from bs4.formatter import ( from bs4.formatter import (
Formatter, Formatter,
HTMLFormatter, HTMLFormatter,
@ -154,6 +148,11 @@ class PageElement(object):
NavigableString, Tag, etc. are all subclasses of PageElement. NavigableString, Tag, etc. are all subclasses of PageElement.
""" """
# In general, we can't tell just by looking at an element whether
# it's contained in an XML document or an HTML document. But for
# Tags (q.v.) we can store this information at parse time.
known_xml = None
def setup(self, parent=None, previous_element=None, next_element=None, def setup(self, parent=None, previous_element=None, next_element=None,
previous_sibling=None, next_sibling=None): previous_sibling=None, next_sibling=None):
"""Sets up the initial relations between this element and """Sets up the initial relations between this element and
@ -941,11 +940,6 @@ class NavigableString(str, PageElement):
PREFIX = '' PREFIX = ''
SUFFIX = '' SUFFIX = ''
# We can't tell just by looking at a string whether it's contained
# in an XML document or an HTML document.
known_xml = None
def __new__(cls, value): def __new__(cls, value):
"""Create a new NavigableString. """Create a new NavigableString.
@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
u.setup() u.setup()
return u return u
def __copy__(self): def __deepcopy__(self, memo, recursive=False):
"""A copy of a NavigableString has the same contents and class """A copy of a NavigableString has the same contents and class
as the original, but it is not connected to the parse tree. as the original, but it is not connected to the parse tree.
:param recursive: This parameter is ignored; it's only defined
so that NavigableString.__deepcopy__ implements the same
signature as Tag.__deepcopy__.
""" """
return type(self)(self) return type(self)(self)
def __copy__(self):
"""A copy of a NavigableString can only be a deep copy, because
only one PageElement can occupy a given place in a parse tree.
"""
return self.__deepcopy__({})
def __getnewargs__(self): def __getnewargs__(self):
return (str(self),) return (str(self),)
@ -1311,10 +1315,46 @@ class Tag(PageElement):
parserClass = _alias("parser_class") # BS3 parserClass = _alias("parser_class") # BS3
def __copy__(self): def __deepcopy__(self, memo, recursive=True):
"""A copy of a Tag is a new Tag, unconnected to the parse tree. """A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
Its contents are a copy of the old Tag's contents. Its contents are a copy of the old Tag's contents.
""" """
clone = self._clone()
if recursive:
# Clone this tag's descendants recursively, but without
# making any recursive function calls.
tag_stack = [clone]
for event, element in self._event_stream(self.descendants):
if event is Tag.END_ELEMENT_EVENT:
# Stop appending incoming Tags to the Tag that was
# just closed.
tag_stack.pop()
else:
descendant_clone = element.__deepcopy__(
memo, recursive=False
)
# Add to its parent's .contents
tag_stack[-1].append(descendant_clone)
if event is Tag.START_ELEMENT_EVENT:
# Add the Tag itself to the stack so that its
# children will be .appended to it.
tag_stack.append(descendant_clone)
return clone
def __copy__(self):
"""A copy of a Tag must always be a deep copy, because a Tag's
children can only have one parent at a time.
"""
return self.__deepcopy__({})
def _clone(self):
"""Create a new Tag just like this one, but with no
contents and unattached to any parse tree.
This is the first step in the deepcopy process.
"""
clone = type(self)( clone = type(self)(
None, self.builder, self.name, self.namespace, None, self.builder, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml, self.prefix, self.attrs, is_xml=self._is_xml,
@ -1326,8 +1366,6 @@ class Tag(PageElement):
) )
for attr in ('can_be_empty_element', 'hidden'): for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr)) setattr(clone, attr, getattr(self, attr))
for child in self.contents:
clone.append(child.__copy__())
return clone return clone
@property @property
@ -1650,28 +1688,178 @@ class Tag(PageElement):
def decode(self, indent_level=None, def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING, eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"): formatter="minimal",
"""Render a Unicode representation of this PageElement and its iterator=None):
contents. pieces = []
:param indent_level: Each line of the rendering will be
indented this many spaces. Used internally in
recursive calls while pretty-printing.
:param eventual_encoding: The tag is destined to be
encoded into this encoding. This method is _not_
responsible for performing that encoding. This information
is passed in so that it can be substituted in if the
document contains a <META> tag that mentions the document's
encoding.
:param formatter: A Formatter object, or a string naming one of
the standard formatters.
"""
# First off, turn a non-Formatter `formatter` into a Formatter # First off, turn a non-Formatter `formatter` into a Formatter
# object. This will stop the lookup from happening over and # object. This will stop the lookup from happening over and
# over again. # over again.
if not isinstance(formatter, Formatter): if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter) formatter = self.formatter_for_name(formatter)
if indent_level is True:
indent_level = 0
# The currently active tag that put us into string literal
# mode. Until this element is closed, children will be treated
# as string literals and not pretty-printed. String literal
# mode is turned on immediately after this tag begins, and
# turned off immediately before it's closed. This means there
# will be whitespace before and after the tag itself.
string_literal_tag = None
for event, element in self._event_stream(iterator):
if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
piece = element._format_tag(
eventual_encoding, formatter, opening=True
)
elif event is Tag.END_ELEMENT_EVENT:
piece = element._format_tag(
eventual_encoding, formatter, opening=False
)
if indent_level is not None:
indent_level -= 1
else:
piece = element.output_ready(formatter)
# Now we need to apply the 'prettiness' -- extra
# whitespace before and/or after this tag. This can get
# complicated because certain tags, like <pre> and
# <script>, can't be prettified, since adding whitespace would
# change the meaning of the content.
# The default behavior is to add whitespace before and
# after an element when string literal mode is off, and to
# leave things as they are when string literal mode is on.
if string_literal_tag:
indent_before = indent_after = False
else:
indent_before = indent_after = True
# The only time the behavior is more complex than that is
# when we encounter an opening or closing tag that might
# put us into or out of string literal mode.
if (event is Tag.START_ELEMENT_EVENT
and not string_literal_tag
and not element._should_pretty_print()):
# We are about to enter string literal mode. Add
# whitespace before this tag, but not after. We
# will stay in string literal mode until this tag
# is closed.
indent_before = True
indent_after = False
string_literal_tag = element
elif (event is Tag.END_ELEMENT_EVENT
and element is string_literal_tag):
# We are about to exit string literal mode by closing
# the tag that sent us into that mode. Add whitespace
# after this tag, but not before.
indent_before = False
indent_after = True
string_literal_tag = None
# Now we know whether to add whitespace before and/or
# after this element.
if indent_level is not None:
if (indent_before or indent_after):
if isinstance(element, NavigableString):
piece = piece.strip()
if piece:
piece = self._indent_string(
piece, indent_level, formatter,
indent_before, indent_after
)
if event == Tag.START_ELEMENT_EVENT:
indent_level += 1
pieces.append(piece)
return "".join(pieces)
# Names for the different events yielded by _event_stream
START_ELEMENT_EVENT = object()
END_ELEMENT_EVENT = object()
EMPTY_ELEMENT_EVENT = object()
STRING_ELEMENT_EVENT = object()
def _event_stream(self, iterator=None):
"""Yield a sequence of events that can be used to reconstruct the DOM
for this element.
This lets us recreate the nested structure of this element
(e.g. when formatting it as a string) without using recursive
method calls.
This is similar in concept to the SAX API, but it's a simpler
interface designed for internal use. The events are different
from SAX and the arguments associated with the events are Tags
and other Beautiful Soup objects.
:param iterator: An alternate iterator to use when traversing
the tree.
"""
tag_stack = []
iterator = iterator or self.self_and_descendants
for c in iterator:
# If the parent of the element we're about to yield is not
# the tag currently on the stack, it means that the tag on
# the stack closed before this element appeared.
while tag_stack and c.parent != tag_stack[-1]:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
if isinstance(c, Tag):
if c.is_empty_element:
yield Tag.EMPTY_ELEMENT_EVENT, c
else:
yield Tag.START_ELEMENT_EVENT, c
tag_stack.append(c)
continue
else:
yield Tag.STRING_ELEMENT_EVENT, c
while tag_stack:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
def _indent_string(self, s, indent_level, formatter,
indent_before, indent_after):
"""Add indentation whitespace before and/or after a string.
:param s: The string to amend with whitespace.
:param indent_level: The indentation level; affects how much
whitespace goes before the string.
:param indent_before: Whether or not to add whitespace
before the string.
:param indent_after: Whether or not to add whitespace
(a newline) after the string.
"""
space_before = ''
if indent_before and indent_level:
space_before = (formatter.indent * indent_level)
space_after = ''
if indent_after:
space_after = "\n"
return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening):
# A tag starts with the < character (see below).
# Then the / character, if this is a closing tag.
closing_slash = ''
if not opening:
closing_slash = '/'
# Then an optional namespace prefix.
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
# Then a list of attribute values, if this is an opening tag.
attribute_string = ''
if opening:
attributes = formatter.attributes(self) attributes = formatter.attributes(self)
attrs = [] attrs = []
for key, val in attributes: for key, val in attributes:
@ -1693,63 +1881,19 @@ class Tag(PageElement):
str(key) + '=' str(key) + '='
+ formatter.quoted_attribute_value(text)) + formatter.quoted_attribute_value(text))
attrs.append(decoded) attrs.append(decoded)
close = ''
closeTag = ''
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
if self.is_empty_element:
close = formatter.void_element_close_prefix or ''
else:
closeTag = '</%s%s>' % (prefix, self.name)
pretty_print = self._should_pretty_print(indent_level)
space = ''
indent_space = ''
if indent_level is not None:
indent_space = (formatter.indent * (indent_level - 1))
if pretty_print:
space = indent_space
indent_contents = indent_level + 1
else:
indent_contents = None
contents = self.decode_contents(
indent_contents, eventual_encoding, formatter
)
if self.hidden:
# This is the 'document root' object.
s = contents
else:
s = []
attribute_string = ''
if attrs: if attrs:
attribute_string = ' ' + ' '.join(attrs) attribute_string = ' ' + ' '.join(attrs)
if indent_level is not None:
# Even if this particular tag is not pretty-printed,
# we should indent up to the start of the tag.
s.append(indent_space)
s.append('<%s%s%s%s>' % (
prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)
if pretty_print and contents and contents[-1] != "\n":
s.append("\n")
if pretty_print and closeTag:
s.append(space)
s.append(closeTag)
if indent_level is not None and closeTag and self.next_sibling:
# Even if this particular tag is not pretty-printed,
# we're now done with the tag, and we should add a
# newline if appropriate.
s.append("\n")
s = ''.join(s)
return s
def _should_pretty_print(self, indent_level): # Then an optional closing slash (for a void element in an
# XML document).
void_element_closing_slash = ''
if self.is_empty_element:
void_element_closing_slash = formatter.void_element_close_prefix or ''
# Put it all together.
return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
def _should_pretty_print(self, indent_level=1):
"""Should this tag be pretty-printed? """Should this tag be pretty-printed?
Most of them should, but some (such as <pre> in HTML Most of them should, but some (such as <pre> in HTML
@ -1800,32 +1944,8 @@ class Tag(PageElement):
the standard Formatters. the standard Formatters.
""" """
# First off, turn a string formatter into a Formatter object. This return self.decode(indent_level, eventual_encoding, formatter,
# will stop the lookup from happening over and over again. iterator=self.descendants)
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
pretty_print = (indent_level is not None)
s = []
for c in self:
text = None
if isinstance(c, NavigableString):
text = c.output_ready(formatter)
elif isinstance(c, Tag):
s.append(c.decode(indent_level, eventual_encoding,
formatter))
preserve_whitespace = (
self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
)
if text and indent_level and not preserve_whitespace:
text = text.strip()
if text:
if pretty_print and not preserve_whitespace:
s.append(formatter.indent * (indent_level - 1))
s.append(text)
if pretty_print and not preserve_whitespace:
s.append("\n")
return ''.join(s)
def encode_contents( def encode_contents(
self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING, self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
@ -1922,6 +2042,18 @@ class Tag(PageElement):
# return iter() to make the purpose of the method clear # return iter() to make the purpose of the method clear
return iter(self.contents) # XXX This seems to be untested. return iter(self.contents) # XXX This seems to be untested.
@property
def self_and_descendants(self):
"""Iterate over this PageElement and its children in a
breadth-first sequence.
:yield: A sequence of PageElements.
"""
if not self.hidden:
yield self
for i in self.descendants:
yield i
@property @property
def descendants(self): def descendants(self):
"""Iterate over all children of this PageElement in a """Iterate over all children of this PageElement in a
@ -1954,10 +2086,7 @@ class Tag(PageElement):
:return: A Tag. :return: A Tag.
:rtype: bs4.element.Tag :rtype: bs4.element.Tag
""" """
value = self.select(selector, namespaces, 1, **kwargs) return self.css.select_one(selector, namespaces, **kwargs)
if value:
return value[0]
return None
def select(self, selector, namespaces=None, limit=None, **kwargs): def select(self, selector, namespaces=None, limit=None, **kwargs):
"""Perform a CSS selection operation on the current element. """Perform a CSS selection operation on the current element.
@ -1979,21 +2108,12 @@ class Tag(PageElement):
:return: A ResultSet of Tags. :return: A ResultSet of Tags.
:rtype: bs4.element.ResultSet :rtype: bs4.element.ResultSet
""" """
if namespaces is None: return self.css.select(selector, namespaces, limit, **kwargs)
namespaces = self._namespaces
if limit is None: @property
limit = 0 def css(self):
if soupsieve is None: """Return an interface to the CSS selector API."""
raise NotImplementedError( return CSS(self)
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
# We do this because it's more consistent and because
# ResultSet.__getattr__ has a helpful error message.
return ResultSet(None, results)
# Old names for backwards compatibility # Old names for backwards compatibility
def childGenerator(self): def childGenerator(self):

View file

@ -298,37 +298,11 @@ class TreeBuilderSmokeTest(object):
) )
assert soup.a['class'] == ['a', 'b', 'c'] assert soup.a['class'] == ['a', 'b', 'c']
def test_fuzzed_input(self): def test_invalid_doctype(self):
# This test centralizes in one place the various fuzz tests markup = '<![if word]>content<![endif]>'
# for Beautiful Soup created by the oss-fuzz project. markup = '<!DOCTYPE html]ff>'
# These strings superficially resemble markup, but they
# generally can't be parsed into anything. The best we can
# hope for is that parsing these strings won't crash the
# parser.
#
# n.b. This markup is commented out because these fuzz tests
# _do_ crash the parser. However the crashes are due to bugs
# in html.parser, not Beautiful Soup -- otherwise I'd fix the
# bugs!
bad_markup = [
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
# https://bugs.python.org/issue37747
#
#b'\n<![\xff\xfe\xfe\xcd\x00',
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
# https://bugs.python.org/issue34480
#
#b'<![n\x00'
]
for markup in bad_markup:
with warnings.catch_warnings(record=False):
soup = self.soup(markup) soup = self.soup(markup)
class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
"""A basic test of a treebuilder's competence. """A basic test of a treebuilder's competence.
@ -577,8 +551,8 @@ Hello, world!
"""Whitespace must be preserved in <pre> and <textarea> tags, """Whitespace must be preserved in <pre> and <textarea> tags,
even if that would mean not prettifying the markup. even if that would mean not prettifying the markup.
""" """
pre_markup = "<pre> </pre>" pre_markup = "<pre>a z</pre>\n"
textarea_markup = "<textarea> woo\nwoo </textarea>" textarea_markup = "<textarea> woo\nwoo </textarea>\n"
self.assert_soup(pre_markup) self.assert_soup(pre_markup)
self.assert_soup(textarea_markup) self.assert_soup(textarea_markup)
@ -589,7 +563,7 @@ Hello, world!
assert soup.textarea.prettify() == textarea_markup assert soup.textarea.prettify() == textarea_markup
soup = self.soup("<textarea></textarea>") soup = self.soup("<textarea></textarea>")
assert soup.textarea.prettify() == "<textarea></textarea>" assert soup.textarea.prettify() == "<textarea></textarea>\n"
def test_nested_inline_elements(self): def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely.""" """Inline elements can be nested indefinitely."""

View file

@ -0,0 +1 @@
˙<!DOCTyPEV PUBLIC'''Đ'

View file

@ -0,0 +1 @@
)<a><math><TR><a><mI><a><p><a>

View file

@ -0,0 +1 @@
-<math><sElect><mi><sElect><sElect>

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
ñ<table><svg><html>

487
lib/bs4/tests/test_css.py Normal file
View file

@ -0,0 +1,487 @@
import pytest
import types
from unittest.mock import MagicMock
from bs4 import (
CSS,
BeautifulSoup,
ResultSet,
)
from . import (
SoupTest,
SOUP_SIEVE_PRESENT,
)
if SOUP_SIEVE_PRESENT:
from soupsieve import SelectorSyntaxError
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assert_selects(self, selector, expected_ids, **kwargs):
results = self.soup.select(selector, **kwargs)
assert isinstance(results, ResultSet)
el_ids = [el['id'] for el in results]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
assertSelect = assert_selects
def assert_select_multiple(self, *tests):
for selector, expected_ids in tests:
self.assert_selects(selector, expected_ids)
def test_precompiled(self):
sel = self.soup.css.compile('div')
els = self.soup.select(sel)
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one(sel)
assert 'main' == el['id']
def test_one_tag_one(self):
els = self.soup.select('title')
assert len(els) == 1
assert els[0].name == 'title'
assert els[0].contents == ['The title']
def test_one_tag_many(self):
els = self.soup.select('div')
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one('div')
assert 'main' == el['id']
def test_select_one_returns_none_if_no_match(self):
match = self.soup.select_one('nonexistenttag')
assert None == match
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assert_selects('div div', ['inner', 'data1'])
def test_tag_in_tag_many(self):
for selector in ('html div', 'html body div', 'body div'):
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assert_selects('html div', ['main'], limit=1)
self.assert_selects('html body div', ['inner', 'main'], limit=2)
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
assert len(self.soup.select('del')) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('tag%t')
def test_select_dashed_tag_ids(self):
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
def test_select_dashed_by_id(self):
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
assert dashed[0].name == 'custom-dashed-tag'
assert dashed[0]['id'] == 'dash2'
def test_dashed_tag_text(self):
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
def test_select_dashed_matches_find_all(self):
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
def test_header_tags(self):
self.assert_select_multiple(
('h1', ['header1']),
('h2', ['header2', 'header3']),
)
def test_class_one(self):
for selector in ('.onep', 'p.onep', 'html p.onep'):
els = self.soup.select(selector)
assert len(els) == 1
assert els[0].name == 'p'
assert els[0]['class'] == ['onep']
def test_class_mismatched_tag(self):
els = self.soup.select('div.onep')
assert len(els) == 0
def test_one_id(self):
for selector in ('div#inner', '#inner', 'div div#inner'):
self.assert_selects(selector, ['inner'])
def test_bad_id(self):
els = self.soup.select('#doesnotexist')
assert len(els) == 0
def test_items_in_id(self):
els = self.soup.select('div#inner p')
assert len(els) == 3
for el in els:
assert el.name == 'p'
assert els[1]['class'] == ['onep']
assert not els[0].has_attr('class')
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
assert len(self.soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assert_selects(selector, ['pmulti'])
def test_multi_class_selection(self):
for selector in ('.class1.class3', '.class3.class2',
'.class1.class2.class3'):
self.assert_selects(selector, ['pmulti'])
def test_child_selector(self):
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
self.assert_selects('.s1 > a span', ['s1a2s1'])
def test_child_selector_id(self):
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
def test_attribute_equals(self):
self.assert_select_multiple(
('p[class="onep"]', ['p1']),
('p[id="p1"]', ['p1']),
('[class="onep"]', ['p1']),
('[id="p1"]', ['p1']),
('link[rel="stylesheet"]', ['l1']),
('link[type="text/css"]', ['l1']),
('link[href="blah.css"]', ['l1']),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ['l1']),
('[type="text/css"]', ['l1']),
('[href="blah.css"]', ['l1']),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_select_multiple(
('p[class~="class1"]', ['pmulti']),
('p[class~="class2"]', ['pmulti']),
('p[class~="class3"]', ['pmulti']),
('[class~="class1"]', ['pmulti']),
('[class~="class2"]', ['pmulti']),
('[class~="class3"]', ['pmulti']),
('a[rel~="friend"]', ['bob']),
('a[rel~="met"]', ['bob']),
('[rel~="friend"]', ['bob']),
('[rel~="met"]', ['bob']),
)
def test_attribute_startswith(self):
self.assert_select_multiple(
('[rel^="style"]', ['l1']),
('link[rel^="style"]', ['l1']),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ['l1']),
('a[href^="http://"]', ['bob', 'me']),
('[href^="http://"]', ['bob', 'me']),
('[id^="p"]', ['pmulti', 'p1']),
('[id^="m"]', ['me', 'main']),
('div[id^="m"]', ['main']),
('a[id^="m"]', ['me']),
('div[data-tag^="dashed"]', ['data1'])
)
def test_attribute_endswith(self):
self.assert_select_multiple(
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', ['data1']),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ['l1']),
('link[rel*="style"]', ['l1']),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
('a[id*="m"]', ['me']),
# From test_attribute_endswith
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', ['data1']),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ['bob', 'me', 'l1']),
('a[href*="."]', ['bob', 'me']),
('link[href*="."]', ['l1']),
('div[id*="n"]', ['main', 'inner']),
('div[id*="nn"]', ['inner']),
('div[data-tag*="edval"]', ['data1'])
)
def test_attribute_exact_or_hypen(self):
self.assert_select_multiple(
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('p[lang|="fr"]', ['lang-fr']),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_select_multiple(
('[rel]', ['l1', 'bob', 'me']),
('link[rel]', ['l1']),
('a[rel]', ['bob', 'me']),
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
('p[class]', ['p1', 'pmulti']),
('[blah]', []),
('p[blah]', []),
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(NotImplementedError):
self.soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self.soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
assert len(els) == 1
assert els[0].string == 'Another'
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
assert len(els) == 0
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
def test_id_child_selector_nth_of_type(self):
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self.soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ['inner', 'data1'])
def test_overspecified_child_id(self):
self.assert_selects(".fancy #inner", ['inner'])
self.assert_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_selects('#p1 + h2', ['header2'])
self.assert_selects('#p1 + h2 + p', ['pmulti'])
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
assert [] == self.soup.select('#p1 + p')
def test_general_sibling_selector(self):
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
self.assert_selects('#p1 ~ #header2', ['header2'])
self.assert_selects('#p1 ~ h2 + a', ['me'])
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
assert [] == self.soup.select('#inner ~ h2')
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_with_no_space(self):
self.assert_selects('x,y', ['xid', 'yid'])
def test_multiple_select_with_more_space(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_duplicated(self):
self.assert_selects('x, x', ['xid'])
def test_multiple_select_sibling(self):
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_selects('x, y > z', ['xid', 'zidb'])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_multiple_select_indirect_descendant(self):
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select(',x, y')
with pytest.raises(SelectorSyntaxError):
self.soup.select('x,,y')
def test_multiple_select_attrs(self):
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
def test_multiple_select_ids(self):
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
def test_multiple_select_nested(self):
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected
def test_closest(self):
inner = self.soup.find("div", id="inner")
closest = inner.css.closest("div[id=main]")
assert closest == self.soup.find("div", id="main")
def test_match(self):
inner = self.soup.find("div", id="inner")
main = self.soup.find("div", id="main")
assert inner.css.match("div[id=main]") == False
assert main.css.match("div[id=main]") == True
def test_iselect(self):
gen = self.soup.css.iselect("h2")
assert isinstance(gen, types.GeneratorType)
[header2, header3] = gen
assert header2['id'] == 'header2'
assert header3['id'] == 'header3'
def test_filter(self):
inner = self.soup.find("div", id="inner")
results = inner.css.filter("h2")
assert len(inner.css.filter("h2")) == 2
results = inner.css.filter("h2[id=header3]")
assert isinstance(results, ResultSet)
[result] = results
assert result['id'] == 'header3'
def test_escape(self):
m = self.soup.css.escape
assert m(".foo#bar") == '\\.foo\\#bar'
assert m("()[]{}") == '\\(\\)\\[\\]\\{\\}'
assert m(".foo") == self.soup.css.escape(".foo")

View file

@ -80,20 +80,20 @@ class TestFormatter(SoupTest):
@pytest.mark.parametrize( @pytest.mark.parametrize(
"indent,expect", "indent,expect",
[ [
(None, '<a>\n<b>\ntext\n</b>\n</a>'), (None, '<a>\n<b>\ntext\n</b>\n</a>\n'),
(-1, '<a>\n<b>\ntext\n</b>\n</a>'), (-1, '<a>\n<b>\ntext\n</b>\n</a>\n'),
(0, '<a>\n<b>\ntext\n</b>\n</a>'), (0, '<a>\n<b>\ntext\n</b>\n</a>\n'),
("", '<a>\n<b>\ntext\n</b>\n</a>'), ("", '<a>\n<b>\ntext\n</b>\n</a>\n'),
(1, '<a>\n <b>\n text\n </b>\n</a>'), (1, '<a>\n <b>\n text\n </b>\n</a>\n'),
(2, '<a>\n <b>\n text\n </b>\n</a>'), (2, '<a>\n <b>\n text\n </b>\n</a>\n'),
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'), ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'),
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'), ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'),
# Some invalid inputs -- the default behavior is used. # Some invalid inputs -- the default behavior is used.
(object(), '<a>\n <b>\n text\n </b>\n</a>'), (object(), '<a>\n <b>\n text\n </b>\n</a>\n'),
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>'), (b'bytes', '<a>\n <b>\n text\n </b>\n</a>\n'),
] ]
) )
def test_indent(self, indent, expect): def test_indent(self, indent, expect):

View file

@ -0,0 +1,91 @@
"""This file contains test cases reported by third parties using
fuzzing tools, primarily from Google's oss-fuzz project. Some of these
represent real problems with Beautiful Soup, but many are problems in
libraries that Beautiful Soup depends on, and many of the test cases
represent different ways of triggering the same problem.
Grouping these test cases together makes it easy to see which test
cases represent the same problem, and puts the test cases in close
proximity to code that can trigger the problems.
"""
import os
import pytest
from bs4 import (
BeautifulSoup,
ParserRejectedMarkup,
)
class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so
# they can be included in builds.
TESTCASE_SUFFIX = ".testcase"
# This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup
# instead.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
]
)
def test_rejected_markup(self, filename):
markup = self.__markup(filename)
with pytest.raises(ParserRejectedMarkup):
BeautifulSoup(markup, 'html.parser')
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
"clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
]
)
def test_deeply_nested_document(self, filename):
# Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has
# been fixed.
markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode()
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip("html5lib problems")
@pytest.mark.parametrize(
"filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
"clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320",
# b')<a><math><TR><a><mI><a><p><a>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456",
# b'-<math><sElect><mi><sElect><sElect>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
# b'ñ<table><svg><html>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
# <TABLE>, some ^@ characters, some <math> tags.
"clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744",
# Nested table
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
]
)
def test_html5lib_parse_errors(self, filename):
markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode())
def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX
this_dir = os.path.split(__file__)[0]
path = os.path.join(this_dir, 'fuzz', filename)
return open(path, 'rb').read()

View file

@ -3,9 +3,11 @@ trees."""
from pdb import set_trace from pdb import set_trace
import pickle import pickle
import pytest
import warnings import warnings
from bs4.builder import ( from bs4.builder import (
HTMLParserTreeBuilder, HTMLParserTreeBuilder,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning, XMLParsedAsHTMLWarning,
) )
from bs4.builder._htmlparser import BeautifulSoupHTMLParser from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@ -15,6 +17,28 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
default_builder = HTMLParserTreeBuilder default_builder = HTMLParserTreeBuilder
def test_rejected_input(self):
# Python's html.parser will occasionally reject markup,
# especially when there is a problem with the initial DOCTYPE
# declaration. Different versions of Python sound the alarm in
# different ways, but Beautiful Soup consistently raises
# errors as ParserRejectedMarkup exceptions.
bad_markup = [
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
# https://github.com/python/cpython/issues/81928
b'\n<![\xff\xfe\xfe\xcd\x00',
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
# https://github.com/python/cpython/issues/78661
#
b'<![n\x00',
b"<![UNKNOWN[]]>",
]
for markup in bad_markup:
with pytest.raises(ParserRejectedMarkup):
soup = self.soup(markup)
def test_namespaced_system_doctype(self): def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one. # html.parser can't handle namespaced doctypes, so skip this one.
pass pass

View file

@ -189,13 +189,15 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
assert soup.find('prefix:tag3').name == 'tag3' assert soup.find('prefix:tag3').name == 'tag3'
assert soup.subtag.find('prefix:tag3').name == 'tag3' assert soup.subtag.find('prefix:tag3').name == 'tag3'
def test_pickle_removes_builder(self): def test_pickle_restores_builder(self):
# The lxml TreeBuilder is not picklable, so it won't be # The lxml TreeBuilder is not picklable, so when unpickling
# preserved in a pickle/unpickle operation. # a document created with it, a new TreeBuilder of the
# appropriate class is created.
soup = self.soup("<a>some markup</a>") soup = self.soup("<a>some markup</a>")
assert isinstance(soup.builder, self.default_builder) assert isinstance(soup.builder, self.default_builder)
pickled = pickle.dumps(soup) pickled = pickle.dumps(soup)
unpickled = pickle.loads(pickled) unpickled = pickle.loads(pickled)
assert "some markup" == unpickled.a.string assert "some markup" == unpickled.a.string
assert unpickled.builder is None assert unpickled.builder != soup.builder
assert isinstance(unpickled.builder, self.default_builder)

View file

@ -2,20 +2,18 @@
import copy import copy
import pickle import pickle
import pytest import pytest
import sys
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import ( from bs4.element import (
Comment, Comment,
ResultSet,
SoupStrainer, SoupStrainer,
) )
from . import ( from . import (
SoupTest, SoupTest,
SOUP_SIEVE_PRESENT,
) )
if SOUP_SIEVE_PRESENT:
from soupsieve import SelectorSyntaxError
class TestEncoding(SoupTest): class TestEncoding(SoupTest):
"""Test the ability to encode objects into strings.""" """Test the ability to encode objects into strings."""
@ -52,9 +50,20 @@ class TestEncoding(SoupTest):
encoding="utf8" encoding="utf8"
) )
def test_encode_deeply_nested_document(self):
# This test verifies that encoding a string doesn't involve
# any recursive function calls. If it did, this test would
# overflow the Python interpreter stack.
limit = sys.getrecursionlimit() + 1
markup = "<span>" * limit
soup = self.soup(markup)
encoded = soup.encode()
assert limit == encoded.count(b"<span>")
def test_deprecated_renderContents(self): def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>" html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html) soup = self.soup(html)
soup.renderContents()
assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents() assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
def test_repr(self): def test_repr(self):
@ -159,7 +168,31 @@ class TestFormatters(SoupTest):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>") soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
# Everything outside the <pre> tag is reformatted, but everything # Everything outside the <pre> tag is reformatted, but everything
# inside is left alone. # inside is left alone.
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify() assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
def test_prettify_handles_nested_string_literal_tags(self):
# Most of this markup is inside a <pre> tag, so prettify()
# only does three things to it:
# 1. Add a newline and a space between the <div> and the <pre>
# 2. Add a newline after the </pre>
# 3. Add a newline at the end.
#
# The contents of the <pre> tag are left completely alone. In
# particular, we don't start adding whitespace again once we
# encounter the first </pre> tag, because we know it's not
# the one that put us into string literal mode.
markup = """<div><pre><code>some
<script><pre>code</pre></script> for you
</code></pre></div>"""
expect = """<div>
<pre><code>some
<script><pre>code</pre></script> for you
</code></pre>
</div>
"""
soup = self.soup(markup)
assert expect == soup.div.prettify()
def test_prettify_accepts_formatter_function(self): def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser') soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
@ -216,429 +249,6 @@ class TestFormatters(SoupTest):
assert soup.contents[0].name == 'pre' assert soup.contents[0].name == 'pre'
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assert_selects(self, selector, expected_ids, **kwargs):
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
assertSelect = assert_selects
def assert_select_multiple(self, *tests):
for selector, expected_ids in tests:
self.assert_selects(selector, expected_ids)
def test_one_tag_one(self):
els = self.soup.select('title')
assert len(els) == 1
assert els[0].name == 'title'
assert els[0].contents == ['The title']
def test_one_tag_many(self):
els = self.soup.select('div')
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one('div')
assert 'main' == el['id']
def test_select_one_returns_none_if_no_match(self):
match = self.soup.select_one('nonexistenttag')
assert None == match
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assert_selects('div div', ['inner', 'data1'])
def test_tag_in_tag_many(self):
for selector in ('html div', 'html body div', 'body div'):
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assert_selects('html div', ['main'], limit=1)
self.assert_selects('html body div', ['inner', 'main'], limit=2)
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
assert len(self.soup.select('del')) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('tag%t')
def test_select_dashed_tag_ids(self):
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
def test_select_dashed_by_id(self):
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
assert dashed[0].name == 'custom-dashed-tag'
assert dashed[0]['id'] == 'dash2'
def test_dashed_tag_text(self):
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
def test_select_dashed_matches_find_all(self):
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
def test_header_tags(self):
self.assert_select_multiple(
('h1', ['header1']),
('h2', ['header2', 'header3']),
)
def test_class_one(self):
for selector in ('.onep', 'p.onep', 'html p.onep'):
els = self.soup.select(selector)
assert len(els) == 1
assert els[0].name == 'p'
assert els[0]['class'] == ['onep']
def test_class_mismatched_tag(self):
els = self.soup.select('div.onep')
assert len(els) == 0
def test_one_id(self):
for selector in ('div#inner', '#inner', 'div div#inner'):
self.assert_selects(selector, ['inner'])
def test_bad_id(self):
els = self.soup.select('#doesnotexist')
assert len(els) == 0
def test_items_in_id(self):
els = self.soup.select('div#inner p')
assert len(els) == 3
for el in els:
assert el.name == 'p'
assert els[1]['class'] == ['onep']
assert not els[0].has_attr('class')
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
assert len(self.soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assert_selects(selector, ['pmulti'])
def test_multi_class_selection(self):
for selector in ('.class1.class3', '.class3.class2',
'.class1.class2.class3'):
self.assert_selects(selector, ['pmulti'])
def test_child_selector(self):
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
self.assert_selects('.s1 > a span', ['s1a2s1'])
def test_child_selector_id(self):
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
def test_attribute_equals(self):
self.assert_select_multiple(
('p[class="onep"]', ['p1']),
('p[id="p1"]', ['p1']),
('[class="onep"]', ['p1']),
('[id="p1"]', ['p1']),
('link[rel="stylesheet"]', ['l1']),
('link[type="text/css"]', ['l1']),
('link[href="blah.css"]', ['l1']),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ['l1']),
('[type="text/css"]', ['l1']),
('[href="blah.css"]', ['l1']),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_select_multiple(
('p[class~="class1"]', ['pmulti']),
('p[class~="class2"]', ['pmulti']),
('p[class~="class3"]', ['pmulti']),
('[class~="class1"]', ['pmulti']),
('[class~="class2"]', ['pmulti']),
('[class~="class3"]', ['pmulti']),
('a[rel~="friend"]', ['bob']),
('a[rel~="met"]', ['bob']),
('[rel~="friend"]', ['bob']),
('[rel~="met"]', ['bob']),
)
def test_attribute_startswith(self):
self.assert_select_multiple(
('[rel^="style"]', ['l1']),
('link[rel^="style"]', ['l1']),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ['l1']),
('a[href^="http://"]', ['bob', 'me']),
('[href^="http://"]', ['bob', 'me']),
('[id^="p"]', ['pmulti', 'p1']),
('[id^="m"]', ['me', 'main']),
('div[id^="m"]', ['main']),
('a[id^="m"]', ['me']),
('div[data-tag^="dashed"]', ['data1'])
)
def test_attribute_endswith(self):
self.assert_select_multiple(
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', ['data1']),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ['l1']),
('link[rel*="style"]', ['l1']),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
('a[id*="m"]', ['me']),
# From test_attribute_endswith
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', ['data1']),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ['bob', 'me', 'l1']),
('a[href*="."]', ['bob', 'me']),
('link[href*="."]', ['l1']),
('div[id*="n"]', ['main', 'inner']),
('div[id*="nn"]', ['inner']),
('div[data-tag*="edval"]', ['data1'])
)
def test_attribute_exact_or_hypen(self):
self.assert_select_multiple(
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('p[lang|="fr"]', ['lang-fr']),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_select_multiple(
('[rel]', ['l1', 'bob', 'me']),
('link[rel]', ['l1']),
('a[rel]', ['bob', 'me']),
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
('p[class]', ['p1', 'pmulti']),
('[blah]', []),
('p[blah]', []),
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(NotImplementedError):
self.soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self.soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
assert len(els) == 1
assert els[0].string == 'Another'
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
assert len(els) == 0
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
def test_id_child_selector_nth_of_type(self):
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self.soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ['inner', 'data1'])
def test_overspecified_child_id(self):
self.assert_selects(".fancy #inner", ['inner'])
self.assert_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_selects('#p1 + h2', ['header2'])
self.assert_selects('#p1 + h2 + p', ['pmulti'])
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
assert [] == self.soup.select('#p1 + p')
def test_general_sibling_selector(self):
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
self.assert_selects('#p1 ~ #header2', ['header2'])
self.assert_selects('#p1 ~ h2 + a', ['me'])
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
assert [] == self.soup.select('#inner ~ h2')
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_with_no_space(self):
self.assert_selects('x,y', ['xid', 'yid'])
def test_multiple_select_with_more_space(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_duplicated(self):
self.assert_selects('x, x', ['xid'])
def test_multiple_select_sibling(self):
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_selects('x, y > z', ['xid', 'zidb'])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_multiple_select_indirect_descendant(self):
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select(',x, y')
with pytest.raises(SelectorSyntaxError):
self.soup.select('x,,y')
def test_multiple_select_attrs(self):
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
def test_multiple_select_ids(self):
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
def test_multiple_select_nested(self):
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected
class TestPersistence(SoupTest): class TestPersistence(SoupTest):
"Testing features like pickle and deepcopy." "Testing features like pickle and deepcopy."
@ -674,6 +284,18 @@ class TestPersistence(SoupTest):
copied = copy.deepcopy(self.tree) copied = copy.deepcopy(self.tree)
assert copied.decode() == self.tree.decode() assert copied.decode() == self.tree.decode()
def test_copy_deeply_nested_document(self):
# This test verifies that copy and deepcopy don't involve any
# recursive function calls. If they did, this test would
# overflow the Python interpreter stack.
limit = sys.getrecursionlimit() + 1
markup = "<span>" * limit
soup = self.soup(markup)
copied = copy.copy(soup)
copied = copy.deepcopy(soup)
def test_copy_preserves_encoding(self): def test_copy_preserves_encoding(self):
soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser') soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
encoding = soup.original_encoding encoding = soup.original_encoding

View file

@ -24,6 +24,7 @@ from bs4.builder import (
from bs4.element import ( from bs4.element import (
Comment, Comment,
SoupStrainer, SoupStrainer,
PYTHON_SPECIFIC_ENCODINGS,
Tag, Tag,
NavigableString, NavigableString,
) )
@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
assert [] == soup.string_container_stack assert [] == soup.string_container_stack
class TestOutput(SoupTest):
@pytest.mark.parametrize(
"eventual_encoding,actual_encoding", [
("utf-8", "utf-8"),
("utf-16", "utf-16"),
]
)
def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
# Most of the time, calling decode() on an XML document will
# give you a document declaration that mentions the encoding
# you intend to use when encoding the document as a
# bytestring.
soup = self.soup("<tag></tag>")
soup.is_xml = True
assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
== soup.decode(eventual_encoding=eventual_encoding))
@pytest.mark.parametrize(
"eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
)
def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
# But if you pass a Python internal encoding into decode(), or
# omit the eventual_encoding altogether, the document
# declaration won't mention any particular encoding.
soup = BeautifulSoup("<tag></tag>", "html.parser")
soup.is_xml = True
assert (f'<?xml version="1.0"?>\n<tag></tag>'
== soup.decode(eventual_encoding=eventual_encoding))
def test(self):
# BeautifulSoup subclasses Tag and extends the decode() method.
# Make sure the other Tag methods which call decode() call
# it correctly.
soup = self.soup("<tag></tag>")
assert b"<tag></tag>" == soup.encode(encoding="utf-8")
assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
assert "<tag></tag>" == soup.decode_contents()
assert "<tag>\n</tag>\n" == soup.prettify()
class TestWarnings(SoupTest): class TestWarnings(SoupTest):
# Note that some of the tests in this class create BeautifulSoup # Note that some of the tests in this class create BeautifulSoup
# objects directly rather than using self.soup(). That's # objects directly rather than using self.soup(). That's

View file

@ -1,4 +1,4 @@
from .core import contents, where from .core import contents, where
__all__ = ["contents", "where"] __all__ = ["contents", "where"]
__version__ = "2022.12.07" __version__ = "2023.07.22"

View file

@ -791,34 +791,6 @@ uLjbvrW5KfnaNwUASZQDhETnv0Mxz3WLJdH0pmT1kvarBes96aULNmLazAZfNou2
XjG4Kvte9nHfRCaexOYNkbQudZWAUWpLMKawYqGT8ZvYzsRjdT9ZR7E= XjG4Kvte9nHfRCaexOYNkbQudZWAUWpLMKawYqGT8ZvYzsRjdT9ZR7E=
-----END CERTIFICATE----- -----END CERTIFICATE-----
# Issuer: CN=Hongkong Post Root CA 1 O=Hongkong Post
# Subject: CN=Hongkong Post Root CA 1 O=Hongkong Post
# Label: "Hongkong Post Root CA 1"
# Serial: 1000
# MD5 Fingerprint: a8:0d:6f:39:78:b9:43:6d:77:42:6d:98:5a:cc:23:ca
# SHA1 Fingerprint: d6:da:a8:20:8d:09:d2:15:4d:24:b5:2f:cb:34:6e:b2:58:b2:8a:58
# SHA256 Fingerprint: f9:e6:7d:33:6c:51:00:2a:c0:54:c6:32:02:2d:66:dd:a2:e7:e3:ff:f1:0a:d0:61:ed:31:d8:bb:b4:10:cf:b2
-----BEGIN CERTIFICATE-----
MIIDMDCCAhigAwIBAgICA+gwDQYJKoZIhvcNAQEFBQAwRzELMAkGA1UEBhMCSEsx
FjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdrb25nIFBvc3Qg
Um9vdCBDQSAxMB4XDTAzMDUxNTA1MTMxNFoXDTIzMDUxNTA0NTIyOVowRzELMAkG
A1UEBhMCSEsxFjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdr
b25nIFBvc3QgUm9vdCBDQSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
AQEArP84tulmAknjorThkPlAj3n54r15/gK97iSSHSL22oVyaf7XPwnU3ZG1ApzQ
jVrhVcNQhrkpJsLj2aDxaQMoIIBFIi1WpztUlVYiWR8o3x8gPW2iNr4joLFutbEn
PzlTCeqrauh0ssJlXI6/fMN4hM2eFvz1Lk8gKgifd/PFHsSaUmYeSF7jEAaPIpjh
ZY4bXSNmO7ilMlHIhqqhqZ5/dpTCpmy3QfDVyAY45tQM4vM7TG1QjMSDJ8EThFk9
nnV0ttgCXjqQesBCNnLsak3c78QA3xMYV18meMjWCnl3v/evt3a5pQuEF10Q6m/h
q5URX208o1xNg1vysxmKgIsLhwIDAQABoyYwJDASBgNVHRMBAf8ECDAGAQH/AgED
MA4GA1UdDwEB/wQEAwIBxjANBgkqhkiG9w0BAQUFAAOCAQEADkbVPK7ih9legYsC
mEEIjEy82tvuJxuC52pF7BaLT4Wg87JwvVqWuspube5Gi27nKi6Wsxkz67SfqLI3
7piol7Yutmcn1KZJ/RyTZXaeQi/cImyaT/JaFTmxcdcrUehtHJjA2Sr0oYJ71clB
oiMBdDhViw+5LmeiIAQ32pwL0xch4I+XeTRvhEgCIDMb5jREn5Fw9IBehEPCKdJs
EhTkYY2sEJCehFC78JZvRZ+K88psT/oROhUVRsPNH4NbLUES7VBnQRM9IauUiqpO
fMGx+6fWtScvl6tu4B3i0RwsH0Ti/L6RoZz71ilTc4afU9hDDl3WY4JxHYB0yvbi
AmvZWg==
-----END CERTIFICATE-----
# Issuer: CN=SecureSign RootCA11 O=Japan Certification Services, Inc. # Issuer: CN=SecureSign RootCA11 O=Japan Certification Services, Inc.
# Subject: CN=SecureSign RootCA11 O=Japan Certification Services, Inc. # Subject: CN=SecureSign RootCA11 O=Japan Certification Services, Inc.
# Label: "SecureSign RootCA11" # Label: "SecureSign RootCA11"
@ -1676,50 +1648,6 @@ HL/EVlP6Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVx
SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY= SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY=
-----END CERTIFICATE----- -----END CERTIFICATE-----
# Issuer: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi
# Subject: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi
# Label: "E-Tugra Certification Authority"
# Serial: 7667447206703254355
# MD5 Fingerprint: b8:a1:03:63:b0:bd:21:71:70:8a:6f:13:3a:bb:79:49
# SHA1 Fingerprint: 51:c6:e7:08:49:06:6e:f3:92:d4:5c:a0:0d:6d:a3:62:8f:c3:52:39
# SHA256 Fingerprint: b0:bf:d5:2b:b0:d7:d9:bd:92:bf:5d:4d:c1:3d:a2:55:c0:2c:54:2f:37:83:65:ea:89:39:11:f5:5e:55:f2:3c
-----BEGIN CERTIFICATE-----
MIIGSzCCBDOgAwIBAgIIamg+nFGby1MwDQYJKoZIhvcNAQELBQAwgbIxCzAJBgNV
BAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+BgNVBAoMN0UtVHXEn3JhIEVCRyBC
aWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhpem1ldGxlcmkgQS7Fni4xJjAkBgNV
BAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBNZXJrZXppMSgwJgYDVQQDDB9FLVR1
Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTEzMDMwNTEyMDk0OFoXDTIz
MDMwMzEyMDk0OFowgbIxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+
BgNVBAoMN0UtVHXEn3JhIEVCRyBCaWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhp
em1ldGxlcmkgQS7Fni4xJjAkBgNVBAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBN
ZXJrZXppMSgwJgYDVQQDDB9FLVR1Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5
MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA4vU/kwVRHoViVF56C/UY
B4Oufq9899SKa6VjQzm5S/fDxmSJPZQuVIBSOTkHS0vdhQd2h8y/L5VMzH2nPbxH
D5hw+IyFHnSOkm0bQNGZDbt1bsipa5rAhDGvykPL6ys06I+XawGb1Q5KCKpbknSF
Q9OArqGIW66z6l7LFpp3RMih9lRozt6Plyu6W0ACDGQXwLWTzeHxE2bODHnv0ZEo
q1+gElIwcxmOj+GMB6LDu0rw6h8VqO4lzKRG+Bsi77MOQ7osJLjFLFzUHPhdZL3D
k14opz8n8Y4e0ypQBaNV2cvnOVPAmJ6MVGKLJrD3fY185MaeZkJVgkfnsliNZvcH
fC425lAcP9tDJMW/hkd5s3kc91r0E+xs+D/iWR+V7kI+ua2oMoVJl0b+SzGPWsut
dEcf6ZG33ygEIqDUD13ieU/qbIWGvaimzuT6w+Gzrt48Ue7LE3wBf4QOXVGUnhMM
ti6lTPk5cDZvlsouDERVxcr6XQKj39ZkjFqzAQqptQpHF//vkUAqjqFGOjGY5RH8
zLtJVor8udBhmm9lbObDyz51Sf6Pp+KJxWfXnUYTTjF2OySznhFlhqt/7x3U+Lzn
rFpct1pHXFXOVbQicVtbC/DP3KBhZOqp12gKY6fgDT+gr9Oq0n7vUaDmUStVkhUX
U8u3Zg5mTPj5dUyQ5xJwx0UCAwEAAaNjMGEwHQYDVR0OBBYEFC7j27JJ0JxUeVz6
Jyr+zE7S6E5UMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAULuPbsknQnFR5
XPonKv7MTtLoTlQwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBCwUAA4ICAQAF
Nzr0TbdF4kV1JI+2d1LoHNgQk2Xz8lkGpD4eKexd0dCrfOAKkEh47U6YA5n+KGCR
HTAduGN8qOY1tfrTYXbm1gdLymmasoR6d5NFFxWfJNCYExL/u6Au/U5Mh/jOXKqY
GwXgAEZKgoClM4so3O0409/lPun++1ndYYRP0lSWE2ETPo+Aab6TR7U1Q9Jauz1c
77NCR807VRMGsAnb/WP2OogKmW9+4c4bU2pEZiNRCHu8W1Ki/QY3OEBhj0qWuJA3
+GbHeJAAFS6LrVE1Uweoa2iu+U48BybNCAVwzDk/dr2l02cmAYamU9JgO3xDf1WK
vJUawSg5TB9D0pH0clmKuVb8P7Sd2nCcdlqMQ1DujjByTd//SffGqWfZbawCEeI6
FiWnWAjLb1NBnEg4R2gz0dfHj9R0IdTDBZB6/86WiLEVKV0jq9BgoRJP3vQXzTLl
yb/IQ639Lo7xr+L0mPoSHyDYwKcMhcWQ9DstliaxLL5Mq+ux0orJ23gTDx4JnW2P
AJ8C2sH6H3p6CcRK5ogql5+Ji/03X186zjhZhkuvcQu02PJwT58yE+Owp1fl2tpD
y4Q08ijE6m30Ku/Ba3ba+367hTzSU8JNvnHhRdH9I2cNE3X7z2VnIp2usAnRCf8d
NL/+I5c30jn6PQ0GC7TbO6Orb1wdtn7os4I07QZcJA==
-----END CERTIFICATE-----
# Issuer: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center # Issuer: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center
# Subject: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center # Subject: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center
# Label: "T-TeleSec GlobalRoot Class 2" # Label: "T-TeleSec GlobalRoot Class 2"
@ -4397,73 +4325,6 @@ ut6Dacpps6kFtZaSF4fC0urQe87YQVt8rgIwRt7qy12a7DLCZRawTDBcMPPaTnOG
BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR
-----END CERTIFICATE----- -----END CERTIFICATE-----
# Issuer: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Subject: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Label: "E-Tugra Global Root CA RSA v3"
# Serial: 75951268308633135324246244059508261641472512052
# MD5 Fingerprint: 22:be:10:f6:c2:f8:03:88:73:5f:33:29:47:28:47:a4
# SHA1 Fingerprint: e9:a8:5d:22:14:52:1c:5b:aa:0a:b4:be:24:6a:23:8a:c9:ba:e2:a9
# SHA256 Fingerprint: ef:66:b0:b1:0a:3c:db:9f:2e:36:48:c7:6b:d2:af:18:ea:d2:bf:e6:f1:17:65:5e:28:c4:06:0d:a1:a3:f4:c2
-----BEGIN CERTIFICATE-----
MIIF8zCCA9ugAwIBAgIUDU3FzRYilZYIfrgLfxUGNPt5EDQwDQYJKoZIhvcNAQEL
BQAwgYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUt
VHVncmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYw
JAYDVQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIFJTQSB2MzAeFw0yMDAzMTgw
OTA3MTdaFw00NTAzMTIwOTA3MTdaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMG
QW5rYXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1
Z3JhIFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBD
QSBSU0EgdjMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCiZvCJt3J7
7gnJY9LTQ91ew6aEOErxjYG7FL1H6EAX8z3DeEVypi6Q3po61CBxyryfHUuXCscx
uj7X/iWpKo429NEvx7epXTPcMHD4QGxLsqYxYdE0PD0xesevxKenhOGXpOhL9hd8
7jwH7eKKV9y2+/hDJVDqJ4GohryPUkqWOmAalrv9c/SF/YP9f4RtNGx/ardLAQO/
rWm31zLZ9Vdq6YaCPqVmMbMWPcLzJmAy01IesGykNz709a/r4d+ABs8qQedmCeFL
l+d3vSFtKbZnwy1+7dZ5ZdHPOrbRsV5WYVB6Ws5OUDGAA5hH5+QYfERaxqSzO8bG
wzrwbMOLyKSRBfP12baqBqG3q+Sx6iEUXIOk/P+2UNOMEiaZdnDpwA+mdPy70Bt4
znKS4iicvObpCdg604nmvi533wEKb5b25Y08TVJ2Glbhc34XrD2tbKNSEhhw5oBO
M/J+JjKsBY04pOZ2PJ8QaQ5tndLBeSBrW88zjdGUdjXnXVXHt6woq0bM5zshtQoK
5EpZ3IE1S0SVEgpnpaH/WwAH0sDM+T/8nzPyAPiMbIedBi3x7+PmBvrFZhNb/FAH
nnGGstpvdDDPk1Po3CLW3iAfYY2jLqN4MpBs3KwytQXk9TwzDdbgh3cXTJ2w2Amo
DVf3RIXwyAS+XF1a4xeOVGNpf0l0ZAWMowIDAQABo2MwYTAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFLK0ruYt9ybVqnUtdkvAG1Mh0EjvMB0GA1UdDgQWBBSy
tK7mLfcm1ap1LXZLwBtTIdBI7zAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEL
BQADggIBAImocn+M684uGMQQgC0QDP/7FM0E4BQ8Tpr7nym/Ip5XuYJzEmMmtcyQ
6dIqKe6cLcwsmb5FJ+Sxce3kOJUxQfJ9emN438o2Fi+CiJ+8EUdPdk3ILY7r3y18
Tjvarvbj2l0Upq7ohUSdBm6O++96SmotKygY/r+QLHUWnw/qln0F7psTpURs+APQ
3SPh/QMSEgj0GDSz4DcLdxEBSL9htLX4GdnLTeqjjO/98Aa1bZL0SmFQhO3sSdPk
vmjmLuMxC1QLGpLWgti2omU8ZgT5Vdps+9u1FGZNlIM7zR6mK7L+d0CGq+ffCsn9
9t2HVhjYsCxVYJb6CH5SkPVLpi6HfMsg2wY+oF0Dd32iPBMbKaITVaA9FCKvb7jQ
mhty3QUBjYZgv6Rn7rWlDdF/5horYmbDB7rnoEgcOMPpRfunf/ztAmgayncSd6YA
VSgU7NbHEqIbZULpkejLPoeJVF3Zr52XnGnnCv8PWniLYypMfUeUP95L6VPQMPHF
9p5J3zugkaOj/s1YzOrfr28oO6Bpm4/srK4rVJ2bBLFHIK+WEj5jlB0E5y67hscM
moi/dkfv97ALl2bSRM9gUgfh1SxKOidhd8rXj+eHDjD/DLsE4mHDosiXYY60MGo8
bcIHX0pzLz/5FooBZu+6kcpSV3uu1OYP3Qt6f4ueJiDPO++BcYNZ
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Subject: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Label: "E-Tugra Global Root CA ECC v3"
# Serial: 218504919822255052842371958738296604628416471745
# MD5 Fingerprint: 46:bc:81:bb:f1:b5:1e:f7:4b:96:bc:14:e2:e7:27:64
# SHA1 Fingerprint: 8a:2f:af:57:53:b1:b0:e6:a1:04:ec:5b:6a:69:71:6d:f6:1c:e2:84
# SHA256 Fingerprint: 87:3f:46:85:fa:7f:56:36:25:25:2e:6d:36:bc:d7:f1:6f:c2:49:51:f2:64:e4:7e:1b:95:4f:49:08:cd:ca:13
-----BEGIN CERTIFICATE-----
MIICpTCCAiqgAwIBAgIUJkYZdzHhT28oNt45UYbm1JeIIsEwCgYIKoZIzj0EAwMw
gYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUtVHVn
cmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYwJAYD
VQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIEVDQyB2MzAeFw0yMDAzMTgwOTQ2
NThaFw00NTAzMTIwOTQ2NThaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMGQW5r
YXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1Z3Jh
IFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBDQSBF
Q0MgdjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASOmCm/xxAeJ9urA8woLNheSBkQ
KczLWYHMjLiSF4mDKpL2w6QdTGLVn9agRtwcvHbB40fQWxPa56WzZkjnIZpKT4YK
fWzqTTKACrJ6CZtpS5iB4i7sAnCWH/31Rs7K3IKjYzBhMA8GA1UdEwEB/wQFMAMB
Af8wHwYDVR0jBBgwFoAU/4Ixcj75xGZsrTie0bBRiKWQzPUwHQYDVR0OBBYEFP+C
MXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNp
ADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/6
7W4WAie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFx
vmjkI6TZraE3
-----END CERTIFICATE-----
# Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. # Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD.
# Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. # Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD.
# Label: "Security Communication RootCA3" # Label: "Security Communication RootCA3"
@ -4525,3 +4386,250 @@ BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu
9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3LsnNdo4gIxwwCMQDAqy0O 9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3LsnNdo4gIxwwCMQDAqy0O
be0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70eN9k= be0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70eN9k=
-----END CERTIFICATE----- -----END CERTIFICATE-----
# Issuer: CN=BJCA Global Root CA1 O=BEIJING CERTIFICATE AUTHORITY
# Subject: CN=BJCA Global Root CA1 O=BEIJING CERTIFICATE AUTHORITY
# Label: "BJCA Global Root CA1"
# Serial: 113562791157148395269083148143378328608
# MD5 Fingerprint: 42:32:99:76:43:33:36:24:35:07:82:9b:28:f9:d0:90
# SHA1 Fingerprint: d5:ec:8d:7b:4c:ba:79:f4:e7:e8:cb:9d:6b:ae:77:83:10:03:21:6a
# SHA256 Fingerprint: f3:89:6f:88:fe:7c:0a:88:27:66:a7:fa:6a:d2:74:9f:b5:7a:7f:3e:98:fb:76:9c:1f:a7:b0:9c:2c:44:d5:ae
-----BEGIN CERTIFICATE-----
MIIFdDCCA1ygAwIBAgIQVW9l47TZkGobCdFsPsBsIDANBgkqhkiG9w0BAQsFADBU
MQswCQYDVQQGEwJDTjEmMCQGA1UECgwdQkVJSklORyBDRVJUSUZJQ0FURSBBVVRI
T1JJVFkxHTAbBgNVBAMMFEJKQ0EgR2xvYmFsIFJvb3QgQ0ExMB4XDTE5MTIxOTAz
MTYxN1oXDTQ0MTIxMjAzMTYxN1owVDELMAkGA1UEBhMCQ04xJjAkBgNVBAoMHUJF
SUpJTkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZMR0wGwYDVQQDDBRCSkNBIEdsb2Jh
bCBSb290IENBMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPFmCL3Z
xRVhy4QEQaVpN3cdwbB7+sN3SJATcmTRuHyQNZ0YeYjjlwE8R4HyDqKYDZ4/N+AZ
spDyRhySsTphzvq3Rp4Dhtczbu33RYx2N95ulpH3134rhxfVizXuhJFyV9xgw8O5
58dnJCNPYwpj9mZ9S1WnP3hkSWkSl+BMDdMJoDIwOvqfwPKcxRIqLhy1BDPapDgR
at7GGPZHOiJBhyL8xIkoVNiMpTAK+BcWyqw3/XmnkRd4OJmtWO2y3syJfQOcs4ll
5+M7sSKGjwZteAf9kRJ/sGsciQ35uMt0WwfCyPQ10WRjeulumijWML3mG90Vr4Tq
nMfK9Q7q8l0ph49pczm+LiRvRSGsxdRpJQaDrXpIhRMsDQa4bHlW/KNnMoH1V6XK
V0Jp6VwkYe/iMBhORJhVb3rCk9gZtt58R4oRTklH2yiUAguUSiz5EtBP6DF+bHq/
pj+bOT0CFqMYs2esWz8sgytnOYFcuX6U1WTdno9uruh8W7TXakdI136z1C2OVnZO
z2nxbkRs1CTqjSShGL+9V/6pmTW12xB3uD1IutbB5/EjPtffhZ0nPNRAvQoMvfXn
jSXWgXSHRtQpdaJCbPdzied9v3pKH9MiyRVVz99vfFXQpIsHETdfg6YmV6YBW37+
WGgHqel62bno/1Afq8K0wM7o6v0PvY1NuLxxAgMBAAGjQjBAMB0GA1UdDgQWBBTF
7+3M2I0hxkjk49cULqcWk+WYATAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQE
AwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAUoKsITQfI/Ki2Pm4rzc2IInRNwPWaZ+4
YRC6ojGYWUfo0Q0lHhVBDOAqVdVXUsv45Mdpox1NcQJeXyFFYEhcCY5JEMEE3Kli
awLwQ8hOnThJdMkycFRtwUf8jrQ2ntScvd0g1lPJGKm1Vrl2i5VnZu69mP6u775u
+2D2/VnGKhs/I0qUJDAnyIm860Qkmss9vk/Ves6OF8tiwdneHg56/0OGNFK8YT88
X7vZdrRTvJez/opMEi4r89fO4aL/3Xtw+zuhTaRjAv04l5U/BXCga99igUOLtFkN
SoxUnMW7gZ/NfaXvCyUeOiDbHPwfmGcCCtRzRBPbUYQaVQNW4AB+dAb/OMRyHdOo
P2gxXdMJxy6MW2Pg6Nwe0uxhHvLe5e/2mXZgLR6UcnHGCyoyx5JO1UbXHfmpGQrI
+pXObSOYqgs4rZpWDW+N8TEAiMEXnM0ZNjX+VVOg4DwzX5Ze4jLp3zO7Bkqp2IRz
znfSxqxx4VyjHQy7Ct9f4qNx2No3WqB4K/TUfet27fJhcKVlmtOJNBir+3I+17Q9
eVzYH6Eze9mCUAyTF6ps3MKCuwJXNq+YJyo5UOGwifUll35HaBC07HPKs5fRJNz2
YqAo07WjuGS3iGJCz51TzZm+ZGiPTx4SSPfSKcOYKMryMguTjClPPGAyzQWWYezy
r/6zcCwupvI=
-----END CERTIFICATE-----
# Issuer: CN=BJCA Global Root CA2 O=BEIJING CERTIFICATE AUTHORITY
# Subject: CN=BJCA Global Root CA2 O=BEIJING CERTIFICATE AUTHORITY
# Label: "BJCA Global Root CA2"
# Serial: 58605626836079930195615843123109055211
# MD5 Fingerprint: 5e:0a:f6:47:5f:a6:14:e8:11:01:95:3f:4d:01:eb:3c
# SHA1 Fingerprint: f4:27:86:eb:6e:b8:6d:88:31:67:02:fb:ba:66:a4:53:00:aa:7a:a6
# SHA256 Fingerprint: 57:4d:f6:93:1e:27:80:39:66:7b:72:0a:fd:c1:60:0f:c2:7e:b6:6d:d3:09:29:79:fb:73:85:64:87:21:28:82
-----BEGIN CERTIFICATE-----
MIICJTCCAaugAwIBAgIQLBcIfWQqwP6FGFkGz7RK6zAKBggqhkjOPQQDAzBUMQsw
CQYDVQQGEwJDTjEmMCQGA1UECgwdQkVJSklORyBDRVJUSUZJQ0FURSBBVVRIT1JJ
VFkxHTAbBgNVBAMMFEJKQ0EgR2xvYmFsIFJvb3QgQ0EyMB4XDTE5MTIxOTAzMTgy
MVoXDTQ0MTIxMjAzMTgyMVowVDELMAkGA1UEBhMCQ04xJjAkBgNVBAoMHUJFSUpJ
TkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZMR0wGwYDVQQDDBRCSkNBIEdsb2JhbCBS
b290IENBMjB2MBAGByqGSM49AgEGBSuBBAAiA2IABJ3LgJGNU2e1uVCxA/jlSR9B
IgmwUVJY1is0j8USRhTFiy8shP8sbqjV8QnjAyEUxEM9fMEsxEtqSs3ph+B99iK+
+kpRuDCK/eHeGBIK9ke35xe/J4rUQUyWPGCWwf0VHKNCMEAwHQYDVR0OBBYEFNJK
sVF/BvDRgh9Obl+rg/xI1LCRMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQD
AgEGMAoGCCqGSM49BAMDA2gAMGUCMBq8W9f+qdJUDkpd0m2xQNz0Q9XSSpkZElaA
94M04TVOSG0ED1cxMDAtsaqdAzjbBgIxAMvMh1PLet8gUXOQwKhbYdDFUDn9hf7B
43j4ptZLvZuHjw/l1lOWqzzIQNph91Oj9w==
-----END CERTIFICATE-----
# Issuer: CN=Sectigo Public Server Authentication Root E46 O=Sectigo Limited
# Subject: CN=Sectigo Public Server Authentication Root E46 O=Sectigo Limited
# Label: "Sectigo Public Server Authentication Root E46"
# Serial: 88989738453351742415770396670917916916
# MD5 Fingerprint: 28:23:f8:b2:98:5c:37:16:3b:3e:46:13:4e:b0:b3:01
# SHA1 Fingerprint: ec:8a:39:6c:40:f0:2e:bc:42:75:d4:9f:ab:1c:1a:5b:67:be:d2:9a
# SHA256 Fingerprint: c9:0f:26:f0:fb:1b:40:18:b2:22:27:51:9b:5c:a2:b5:3e:2c:a5:b3:be:5c:f1:8e:fe:1b:ef:47:38:0c:53:83
-----BEGIN CERTIFICATE-----
MIICOjCCAcGgAwIBAgIQQvLM2htpN0RfFf51KBC49DAKBggqhkjOPQQDAzBfMQsw
CQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1T
ZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBFNDYwHhcN
MjEwMzIyMDAwMDAwWhcNNDYwMzIxMjM1OTU5WjBfMQswCQYDVQQGEwJHQjEYMBYG
A1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1TZWN0aWdvIFB1YmxpYyBT
ZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBFNDYwdjAQBgcqhkjOPQIBBgUrgQQA
IgNiAAR2+pmpbiDt+dd34wc7qNs9Xzjoq1WmVk/WSOrsfy2qw7LFeeyZYX8QeccC
WvkEN/U0NSt3zn8gj1KjAIns1aeibVvjS5KToID1AZTc8GgHHs3u/iVStSBDHBv+
6xnOQ6OjQjBAMB0GA1UdDgQWBBTRItpMWfFLXyY4qp3W7usNw/upYTAOBgNVHQ8B
Af8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNnADBkAjAn7qRa
qCG76UeXlImldCBteU/IvZNeWBj7LRoAasm4PdCkT0RHlAFWovgzJQxC36oCMB3q
4S6ILuH5px0CMk7yn2xVdOOurvulGu7t0vzCAxHrRVxgED1cf5kDW21USAGKcw==
-----END CERTIFICATE-----
# Issuer: CN=Sectigo Public Server Authentication Root R46 O=Sectigo Limited
# Subject: CN=Sectigo Public Server Authentication Root R46 O=Sectigo Limited
# Label: "Sectigo Public Server Authentication Root R46"
# Serial: 156256931880233212765902055439220583700
# MD5 Fingerprint: 32:10:09:52:00:d5:7e:6c:43:df:15:c0:b1:16:93:e5
# SHA1 Fingerprint: ad:98:f9:f3:e4:7d:75:3b:65:d4:82:b3:a4:52:17:bb:6e:f5:e4:38
# SHA256 Fingerprint: 7b:b6:47:a6:2a:ee:ac:88:bf:25:7a:a5:22:d0:1f:fe:a3:95:e0:ab:45:c7:3f:93:f6:56:54:ec:38:f2:5a:06
-----BEGIN CERTIFICATE-----
MIIFijCCA3KgAwIBAgIQdY39i658BwD6qSWn4cetFDANBgkqhkiG9w0BAQwFADBf
MQswCQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQD
Ey1TZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBSNDYw
HhcNMjEwMzIyMDAwMDAwWhcNNDYwMzIxMjM1OTU5WjBfMQswCQYDVQQGEwJHQjEY
MBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1TZWN0aWdvIFB1Ymxp
YyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBSNDYwggIiMA0GCSqGSIb3DQEB
AQUAA4ICDwAwggIKAoICAQCTvtU2UnXYASOgHEdCSe5jtrch/cSV1UgrJnwUUxDa
ef0rty2k1Cz66jLdScK5vQ9IPXtamFSvnl0xdE8H/FAh3aTPaE8bEmNtJZlMKpnz
SDBh+oF8HqcIStw+KxwfGExxqjWMrfhu6DtK2eWUAtaJhBOqbchPM8xQljeSM9xf
iOefVNlI8JhD1mb9nxc4Q8UBUQvX4yMPFF1bFOdLvt30yNoDN9HWOaEhUTCDsG3X
ME6WW5HwcCSrv0WBZEMNvSE6Lzzpng3LILVCJ8zab5vuZDCQOc2TZYEhMbUjUDM3
IuM47fgxMMxF/mL50V0yeUKH32rMVhlATc6qu/m1dkmU8Sf4kaWD5QazYw6A3OAS
VYCmO2a0OYctyPDQ0RTp5A1NDvZdV3LFOxxHVp3i1fuBYYzMTYCQNFu31xR13NgE
SJ/AwSiItOkcyqex8Va3e0lMWeUgFaiEAin6OJRpmkkGj80feRQXEgyDet4fsZfu
+Zd4KKTIRJLpfSYFplhym3kT2BFfrsU4YjRosoYwjviQYZ4ybPUHNs2iTG7sijbt
8uaZFURww3y8nDnAtOFr94MlI1fZEoDlSfB1D++N6xybVCi0ITz8fAr/73trdf+L
HaAZBav6+CuBQug4urv7qv094PPK306Xlynt8xhW6aWWrL3DkJiy4Pmi1KZHQ3xt
zwIDAQABo0IwQDAdBgNVHQ4EFgQUVnNYZJX5khqwEioEYnmhQBWIIUkwDgYDVR0P
AQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAC9c
mTz8Bl6MlC5w6tIyMY208FHVvArzZJ8HXtXBc2hkeqK5Duj5XYUtqDdFqij0lgVQ
YKlJfp/imTYpE0RHap1VIDzYm/EDMrraQKFz6oOht0SmDpkBm+S8f74TlH7Kph52
gDY9hAaLMyZlbcp+nv4fjFg4exqDsQ+8FxG75gbMY/qB8oFM2gsQa6H61SilzwZA
Fv97fRheORKkU55+MkIQpiGRqRxOF3yEvJ+M0ejf5lG5Nkc/kLnHvALcWxxPDkjB
JYOcCj+esQMzEhonrPcibCTRAUH4WAP+JWgiH5paPHxsnnVI84HxZmduTILA7rpX
DhjvLpr3Etiga+kFpaHpaPi8TD8SHkXoUsCjvxInebnMMTzD9joiFgOgyY9mpFui
TdaBJQbpdqQACj7LzTWb4OE4y2BThihCQRxEV+ioratF4yUQvNs+ZUH7G6aXD+u5
dHn5HrwdVw1Hr8Mvn4dGp+smWg9WY7ViYG4A++MnESLn/pmPNPW56MORcr3Ywx65
LvKRRFHQV80MNNVIIb/bE/FmJUNS0nAiNs2fxBx1IK1jcmMGDw4nztJqDby1ORrp
0XZ60Vzk50lJLVU3aPAaOpg+VBeHVOmmJ1CJeyAvP/+/oYtKR5j/K3tJPsMpRmAY
QqszKbrAKbkTidOIijlBO8n9pu0f9GBj39ItVQGL
-----END CERTIFICATE-----
# Issuer: CN=SSL.com TLS RSA Root CA 2022 O=SSL Corporation
# Subject: CN=SSL.com TLS RSA Root CA 2022 O=SSL Corporation
# Label: "SSL.com TLS RSA Root CA 2022"
# Serial: 148535279242832292258835760425842727825
# MD5 Fingerprint: d8:4e:c6:59:30:d8:fe:a0:d6:7a:5a:2c:2c:69:78:da
# SHA1 Fingerprint: ec:2c:83:40:72:af:26:95:10:ff:0e:f2:03:ee:31:70:f6:78:9d:ca
# SHA256 Fingerprint: 8f:af:7d:2e:2c:b4:70:9b:b8:e0:b3:36:66:bf:75:a5:dd:45:b5:de:48:0f:8e:a8:d4:bf:e6:be:bc:17:f2:ed
-----BEGIN CERTIFICATE-----
MIIFiTCCA3GgAwIBAgIQb77arXO9CEDii02+1PdbkTANBgkqhkiG9w0BAQsFADBO
MQswCQYDVQQGEwJVUzEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMSUwIwYDVQQD
DBxTU0wuY29tIFRMUyBSU0EgUm9vdCBDQSAyMDIyMB4XDTIyMDgyNTE2MzQyMloX
DTQ2MDgxOTE2MzQyMVowTjELMAkGA1UEBhMCVVMxGDAWBgNVBAoMD1NTTCBDb3Jw
b3JhdGlvbjElMCMGA1UEAwwcU1NMLmNvbSBUTFMgUlNBIFJvb3QgQ0EgMjAyMjCC
AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANCkCXJPQIgSYT41I57u9nTP
L3tYPc48DRAokC+X94xI2KDYJbFMsBFMF3NQ0CJKY7uB0ylu1bUJPiYYf7ISf5OY
t6/wNr/y7hienDtSxUcZXXTzZGbVXcdotL8bHAajvI9AI7YexoS9UcQbOcGV0ins
S657Lb85/bRi3pZ7QcacoOAGcvvwB5cJOYF0r/c0WRFXCsJbwST0MXMwgsadugL3
PnxEX4MN8/HdIGkWCVDi1FW24IBydm5MR7d1VVm0U3TZlMZBrViKMWYPHqIbKUBO
L9975hYsLfy/7PO0+r4Y9ptJ1O4Fbtk085zx7AGL0SDGD6C1vBdOSHtRwvzpXGk3
R2azaPgVKPC506QVzFpPulJwoxJF3ca6TvvC0PeoUidtbnm1jPx7jMEWTO6Af77w
dr5BUxIzrlo4QqvXDz5BjXYHMtWrifZOZ9mxQnUjbvPNQrL8VfVThxc7wDNY8VLS
+YCk8OjwO4s4zKTGkH8PnP2L0aPP2oOnaclQNtVcBdIKQXTbYxE3waWglksejBYS
d66UNHsef8JmAOSqg+qKkK3ONkRN0VHpvB/zagX9wHQfJRlAUW7qglFA35u5CCoG
AtUjHBPW6dvbxrB6y3snm/vg1UYk7RBLY0ulBY+6uB0rpvqR4pJSvezrZ5dtmi2f
gTIFZzL7SAg/2SW4BCUvAgMBAAGjYzBhMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0j
BBgwFoAU+y437uOEeicuzRk1sTN8/9REQrkwHQYDVR0OBBYEFPsuN+7jhHonLs0Z
NbEzfP/UREK5MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAjYlt
hEUY8U+zoO9opMAdrDC8Z2awms22qyIZZtM7QbUQnRC6cm4pJCAcAZli05bg4vsM
QtfhWsSWTVTNj8pDU/0quOr4ZcoBwq1gaAafORpR2eCNJvkLTqVTJXojpBzOCBvf
R4iyrT7gJ4eLSYwfqUdYe5byiB0YrrPRpgqU+tvT5TgKa3kSM/tKWTcWQA673vWJ
DPFs0/dRa1419dvAJuoSc06pkZCmF8NsLzjUo3KUQyxi4U5cMj29TH0ZR6LDSeeW
P4+a0zvkEdiLA9z2tmBVGKaBUfPhqBVq6+AL8BQx1rmMRTqoENjwuSfr98t67wVy
lrXEj5ZzxOhWc5y8aVFjvO9nHEMaX3cZHxj4HCUp+UmZKbaSPaKDN7EgkaibMOlq
bLQjk2UEqxHzDh1TJElTHaE/nUiSEeJ9DU/1172iWD54nR4fK/4huxoTtrEoZP2w
AgDHbICivRZQIA9ygV/MlP+7mea6kMvq+cYMwq7FGc4zoWtcu358NFcXrfA/rs3q
r5nsLFR+jM4uElZI7xc7P0peYNLcdDa8pUNjyw9bowJWCZ4kLOGGgYz+qxcs+sji
Mho6/4UIyYOf8kpIEFR3N+2ivEC+5BB09+Rbu7nzifmPQdjH5FCQNYA+HLhNkNPU
98OwoX6EyneSMSy4kLGCenROmxMmtNVQZlR4rmA=
-----END CERTIFICATE-----
# Issuer: CN=SSL.com TLS ECC Root CA 2022 O=SSL Corporation
# Subject: CN=SSL.com TLS ECC Root CA 2022 O=SSL Corporation
# Label: "SSL.com TLS ECC Root CA 2022"
# Serial: 26605119622390491762507526719404364228
# MD5 Fingerprint: 99:d7:5c:f1:51:36:cc:e9:ce:d9:19:2e:77:71:56:c5
# SHA1 Fingerprint: 9f:5f:d9:1a:54:6d:f5:0c:71:f0:ee:7a:bd:17:49:98:84:73:e2:39
# SHA256 Fingerprint: c3:2f:fd:9f:46:f9:36:d1:6c:36:73:99:09:59:43:4b:9a:d6:0a:af:bb:9e:7c:f3:36:54:f1:44:cc:1b:a1:43
-----BEGIN CERTIFICATE-----
MIICOjCCAcCgAwIBAgIQFAP1q/s3ixdAW+JDsqXRxDAKBggqhkjOPQQDAzBOMQsw
CQYDVQQGEwJVUzEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMSUwIwYDVQQDDBxT
U0wuY29tIFRMUyBFQ0MgUm9vdCBDQSAyMDIyMB4XDTIyMDgyNTE2MzM0OFoXDTQ2
MDgxOTE2MzM0N1owTjELMAkGA1UEBhMCVVMxGDAWBgNVBAoMD1NTTCBDb3Jwb3Jh
dGlvbjElMCMGA1UEAwwcU1NMLmNvbSBUTFMgRUNDIFJvb3QgQ0EgMjAyMjB2MBAG
ByqGSM49AgEGBSuBBAAiA2IABEUpNXP6wrgjzhR9qLFNoFs27iosU8NgCTWyJGYm
acCzldZdkkAZDsalE3D07xJRKF3nzL35PIXBz5SQySvOkkJYWWf9lCcQZIxPBLFN
SeR7T5v15wj4A4j3p8OSSxlUgaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAfBgNVHSME
GDAWgBSJjy+j6CugFFR781a4Jl9nOAuc0DAdBgNVHQ4EFgQUiY8vo+groBRUe/NW
uCZfZzgLnNAwDgYDVR0PAQH/BAQDAgGGMAoGCCqGSM49BAMDA2gAMGUCMFXjIlbp
15IkWE8elDIPDAI2wv2sdDJO4fscgIijzPvX6yv/N33w7deedWo1dlJF4AIxAMeN
b0Igj762TVntd00pxCAgRWSGOlDGxK0tk/UYfXLtqc/ErFc2KAhl3zx5Zn6g6g==
-----END CERTIFICATE-----
# Issuer: CN=Atos TrustedRoot Root CA ECC TLS 2021 O=Atos
# Subject: CN=Atos TrustedRoot Root CA ECC TLS 2021 O=Atos
# Label: "Atos TrustedRoot Root CA ECC TLS 2021"
# Serial: 81873346711060652204712539181482831616
# MD5 Fingerprint: 16:9f:ad:f1:70:ad:79:d6:ed:29:b4:d1:c5:79:70:a8
# SHA1 Fingerprint: 9e:bc:75:10:42:b3:02:f3:81:f4:f7:30:62:d4:8f:c3:a7:51:b2:dd
# SHA256 Fingerprint: b2:fa:e5:3e:14:cc:d7:ab:92:12:06:47:01:ae:27:9c:1d:89:88:fa:cb:77:5f:a8:a0:08:91:4e:66:39:88:a8
-----BEGIN CERTIFICATE-----
MIICFTCCAZugAwIBAgIQPZg7pmY9kGP3fiZXOATvADAKBggqhkjOPQQDAzBMMS4w
LAYDVQQDDCVBdG9zIFRydXN0ZWRSb290IFJvb3QgQ0EgRUNDIFRMUyAyMDIxMQ0w
CwYDVQQKDARBdG9zMQswCQYDVQQGEwJERTAeFw0yMTA0MjIwOTI2MjNaFw00MTA0
MTcwOTI2MjJaMEwxLjAsBgNVBAMMJUF0b3MgVHJ1c3RlZFJvb3QgUm9vdCBDQSBF
Q0MgVExTIDIwMjExDTALBgNVBAoMBEF0b3MxCzAJBgNVBAYTAkRFMHYwEAYHKoZI
zj0CAQYFK4EEACIDYgAEloZYKDcKZ9Cg3iQZGeHkBQcfl+3oZIK59sRxUM6KDP/X
tXa7oWyTbIOiaG6l2b4siJVBzV3dscqDY4PMwL502eCdpO5KTlbgmClBk1IQ1SQ4
AjJn8ZQSb+/Xxd4u/RmAo0IwQDAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBR2
KCXWfeBmmnoJsmo7jjPXNtNPojAOBgNVHQ8BAf8EBAMCAYYwCgYIKoZIzj0EAwMD
aAAwZQIwW5kp85wxtolrbNa9d+F851F+uDrNozZffPc8dz7kUK2o59JZDCaOMDtu
CCrCp1rIAjEAmeMM56PDr9NJLkaCI2ZdyQAUEv049OGYa3cpetskz2VAv9LcjBHo
9H1/IISpQuQo
-----END CERTIFICATE-----
# Issuer: CN=Atos TrustedRoot Root CA RSA TLS 2021 O=Atos
# Subject: CN=Atos TrustedRoot Root CA RSA TLS 2021 O=Atos
# Label: "Atos TrustedRoot Root CA RSA TLS 2021"
# Serial: 111436099570196163832749341232207667876
# MD5 Fingerprint: d4:d3:46:b8:9a:c0:9c:76:5d:9e:3a:c3:b9:99:31:d2
# SHA1 Fingerprint: 18:52:3b:0d:06:37:e4:d6:3a:df:23:e4:98:fb:5b:16:fb:86:74:48
# SHA256 Fingerprint: 81:a9:08:8e:a5:9f:b3:64:c5:48:a6:f8:55:59:09:9b:6f:04:05:ef:bf:18:e5:32:4e:c9:f4:57:ba:00:11:2f
-----BEGIN CERTIFICATE-----
MIIFZDCCA0ygAwIBAgIQU9XP5hmTC/srBRLYwiqipDANBgkqhkiG9w0BAQwFADBM
MS4wLAYDVQQDDCVBdG9zIFRydXN0ZWRSb290IFJvb3QgQ0EgUlNBIFRMUyAyMDIx
MQ0wCwYDVQQKDARBdG9zMQswCQYDVQQGEwJERTAeFw0yMTA0MjIwOTIxMTBaFw00
MTA0MTcwOTIxMDlaMEwxLjAsBgNVBAMMJUF0b3MgVHJ1c3RlZFJvb3QgUm9vdCBD
QSBSU0EgVExTIDIwMjExDTALBgNVBAoMBEF0b3MxCzAJBgNVBAYTAkRFMIICIjAN
BgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAtoAOxHm9BYx9sKOdTSJNy/BBl01Z
4NH+VoyX8te9j2y3I49f1cTYQcvyAh5x5en2XssIKl4w8i1mx4QbZFc4nXUtVsYv
Ye+W/CBGvevUez8/fEc4BKkbqlLfEzfTFRVOvV98r61jx3ncCHvVoOX3W3WsgFWZ
kmGbzSoXfduP9LVq6hdKZChmFSlsAvFr1bqjM9xaZ6cF4r9lthawEO3NUDPJcFDs
GY6wx/J0W2tExn2WuZgIWWbeKQGb9Cpt0xU6kGpn8bRrZtkh68rZYnxGEFzedUln
nkL5/nWpo63/dgpnQOPF943HhZpZnmKaau1Fh5hnstVKPNe0OwANwI8f4UDErmwh
3El+fsqyjW22v5MvoVw+j8rtgI5Y4dtXz4U2OLJxpAmMkokIiEjxQGMYsluMWuPD
0xeqqxmjLBvk1cbiZnrXghmmOxYsL3GHX0WelXOTwkKBIROW1527k2gV+p2kHYzy
geBYBr3JtuP2iV2J+axEoctr+hbxx1A9JNr3w+SH1VbxT5Aw+kUJWdo0zuATHAR8
ANSbhqRAvNncTFd+rrcztl524WWLZt+NyteYr842mIycg5kDcPOvdO3GDjbnvezB
c6eUWsuSZIKmAMFwoW4sKeFYV+xafJlrJaSQOoD0IJ2azsct+bJLKZWD6TWNp0lI
pw9MGZHQ9b8Q4HECAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU
dEmZ0f+0emhFdcN+tNzMzjkz2ggwDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEB
DAUAA4ICAQAjQ1MkYlxt/T7Cz1UAbMVWiLkO3TriJQ2VSpfKgInuKs1l+NsW4AmS
4BjHeJi78+xCUvuppILXTdiK/ORO/auQxDh1MoSf/7OwKwIzNsAQkG8dnK/haZPs
o0UvFJ/1TCplQ3IM98P4lYsU84UgYt1UU90s3BiVaU+DR3BAM1h3Egyi61IxHkzJ
qM7F78PRreBrAwA0JrRUITWXAdxfG/F851X6LWh3e9NpzNMOa7pNdkTWwhWaJuyw
xfW70Xp0wmzNxbVe9kzmWy2B27O3Opee7c9GslA9hGCZcbUztVdF5kJHdWoOsAgM
rr3e97sPWD2PAzHoPYJQyi9eDF20l74gNAf0xBLh7tew2VktafcxBPTy+av5EzH4
AXcOPUIjJsyacmdRIXrMPIWo6iFqO9taPKU0nprALN+AnCng33eU0aKAQv9qTFsR
0PXNor6uzFFcw9VUewyu1rkGd4Di7wcaaMxZUa1+XGdrudviB0JbuAEFWDlN5LuY
o7Ey7Nmj1m+UI/87tyll5gfp77YZ6ufCOB0yiJA8EytuzO+rdwY0d4RPcuSBhPm5
dDTedk+SKlOxJTnbPP/lPqYO5Wue/9vsL3SD3460s6neFE3/MaNFcyT6lSnMEpcE
oji2jbDwN/zIIX8/syQbPYtuzE2wFg2WHYMfRsCbvUOZ58SWLs5fyQ==
-----END CERTIFICATE-----

View file

@ -21,7 +21,7 @@ at <https://github.com/Ousret/charset_normalizer>.
""" """
import logging import logging
from .api import from_bytes, from_fp, from_path from .api import from_bytes, from_fp, from_path, is_binary
from .legacy import detect from .legacy import detect
from .models import CharsetMatch, CharsetMatches from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler from .utils import set_logging_handler
@ -31,6 +31,7 @@ __all__ = (
"from_fp", "from_fp",
"from_path", "from_path",
"from_bytes", "from_bytes",
"is_binary",
"detect", "detect",
"CharsetMatch", "CharsetMatch",
"CharsetMatches", "CharsetMatches",

View file

@ -1,6 +1,6 @@
import logging import logging
from os import PathLike from os import PathLike
from typing import Any, BinaryIO, List, Optional, Set from typing import BinaryIO, List, Optional, Set, Union
from .cd import ( from .cd import (
coherence_ratio, coherence_ratio,
@ -31,7 +31,7 @@ explain_handler.setFormatter(
def from_bytes( def from_bytes(
sequences: bytes, sequences: Union[bytes, bytearray],
steps: int = 5, steps: int = 5,
chunk_size: int = 512, chunk_size: int = 512,
threshold: float = 0.2, threshold: float = 0.2,
@ -40,6 +40,7 @@ def from_bytes(
preemptive_behaviour: bool = True, preemptive_behaviour: bool = True,
explain: bool = False, explain: bool = False,
language_threshold: float = 0.1, language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches: ) -> CharsetMatches:
""" """
Given a raw bytes sequence, return the best possibles charset usable to render str objects. Given a raw bytes sequence, return the best possibles charset usable to render str objects.
@ -361,7 +362,8 @@ def from_bytes(
) )
# Preparing those fallbacks in case we got nothing. # Preparing those fallbacks in case we got nothing.
if ( if (
encoding_iana in ["ascii", "utf_8", specified_encoding] enable_fallback
and encoding_iana in ["ascii", "utf_8", specified_encoding]
and not lazy_str_hard_failure and not lazy_str_hard_failure
): ):
fallback_entry = CharsetMatch( fallback_entry = CharsetMatch(
@ -507,6 +509,7 @@ def from_fp(
preemptive_behaviour: bool = True, preemptive_behaviour: bool = True,
explain: bool = False, explain: bool = False,
language_threshold: float = 0.1, language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches: ) -> CharsetMatches:
""" """
Same thing than the function from_bytes but using a file pointer that is already ready. Same thing than the function from_bytes but using a file pointer that is already ready.
@ -522,11 +525,12 @@ def from_fp(
preemptive_behaviour, preemptive_behaviour,
explain, explain,
language_threshold, language_threshold,
enable_fallback,
) )
def from_path( def from_path(
path: "PathLike[Any]", path: Union[str, bytes, PathLike], # type: ignore[type-arg]
steps: int = 5, steps: int = 5,
chunk_size: int = 512, chunk_size: int = 512,
threshold: float = 0.20, threshold: float = 0.20,
@ -535,6 +539,7 @@ def from_path(
preemptive_behaviour: bool = True, preemptive_behaviour: bool = True,
explain: bool = False, explain: bool = False,
language_threshold: float = 0.1, language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches: ) -> CharsetMatches:
""" """
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode. Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -551,4 +556,71 @@ def from_path(
preemptive_behaviour, preemptive_behaviour,
explain, explain,
language_threshold, language_threshold,
enable_fallback,
) )
def is_binary(
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = False,
) -> bool:
"""
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
"""
if isinstance(fp_or_path_or_payload, (str, PathLike)):
guesses = from_path(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
elif isinstance(
fp_or_path_or_payload,
(
bytes,
bytearray,
),
):
guesses = from_bytes(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
else:
guesses = from_fp(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
return not guesses

View file

@ -294,12 +294,23 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
if buffer_length >= 4: if buffer_length >= 4:
if self._buffer_accent_count / buffer_length > 0.34: if self._buffer_accent_count / buffer_length > 0.34:
self._is_current_word_bad = True self._is_current_word_bad = True
# Word/Buffer ending with a upper case accentuated letter are so rare, # Word/Buffer ending with an upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious. # that we will consider them all as suspicious. Same weight as foreign_long suspicious.
if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper(): if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
self._foreign_long_count += 1 self._foreign_long_count += 1
self._is_current_word_bad = True self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch: if buffer_length >= 24 and self._foreign_long_watch:
camel_case_dst = [
i
for c, i in zip(self._buffer, range(0, buffer_length))
if c.isupper()
]
probable_camel_cased: bool = False
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
probable_camel_cased = True
if not probable_camel_cased:
self._foreign_long_count += 1 self._foreign_long_count += 1
self._is_current_word_bad = True self._is_current_word_bad = True

View file

@ -120,12 +120,12 @@ def is_emoticon(character: str) -> bool:
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_separator(character: str) -> bool: def is_separator(character: str) -> bool:
if character.isspace() or character in {"", "+", ",", ";", "<", ">"}: if character.isspace() or character in {"", "+", "<", ">"}:
return True return True
character_category: str = unicodedata.category(character) character_category: str = unicodedata.category(character)
return "Z" in character_category return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) @lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)

View file

@ -2,5 +2,5 @@
Expose version Expose version
""" """
__version__ = "3.1.0" __version__ = "3.2.0"
VERSION = __version__.split(".") VERSION = __version__.split(".")

View file

@ -6,7 +6,7 @@ __title__ = "packaging"
__summary__ = "Core utilities for Python packages" __summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging" __uri__ = "https://github.com/pypa/packaging"
__version__ = "23.0" __version__ = "23.1"
__author__ = "Donald Stufft and individual contributors" __author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io" __email__ = "donald@stufft.io"

View file

@ -14,6 +14,8 @@ EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400 EF_ARM_ABI_FLOAT_HARD = 0x00000400
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager @contextlib.contextmanager
def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
try: try:

View file

@ -163,7 +163,11 @@ def _parse_extras(tokenizer: Tokenizer) -> List[str]:
if not tokenizer.check("LEFT_BRACKET", peek=True): if not tokenizer.check("LEFT_BRACKET", peek=True):
return [] return []
with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"): with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS") tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer) extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS") tokenizer.consume("WS")
@ -203,7 +207,11 @@ def _parse_specifier(tokenizer: Tokenizer) -> str:
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS? | WS? version_many WS?
""" """
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"): with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS") tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer) parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS") tokenizer.consume("WS")
@ -217,7 +225,20 @@ def _parse_version_many(tokenizer: Tokenizer) -> str:
""" """
parsed_specifiers = "" parsed_specifiers = ""
while tokenizer.check("SPECIFIER"): while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS") tokenizer.consume("WS")
if not tokenizer.check("COMMA"): if not tokenizer.check("COMMA"):
break break
@ -254,7 +275,11 @@ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
tokenizer.consume("WS") tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True): if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"): with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS") tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer) marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS") tokenizer.consume("WS")

View file

@ -78,6 +78,8 @@ DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
"AT": r"\@", "AT": r"\@",
"URL": r"[^ \t]+", "URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+", "WS": r"[ \t]+",
"END": r"$", "END": r"$",
} }
@ -167,21 +169,23 @@ class Tokenizer:
) )
@contextlib.contextmanager @contextlib.contextmanager
def enclosing_tokens(self, open_token: str, close_token: str) -> Iterator[bool]: def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token): if self.check(open_token):
open_position = self.position open_position = self.position
self.read() self.read()
else: else:
open_position = None open_position = None
yield open_position is not None yield
if open_position is None: if open_position is None:
return return
if not self.check(close_token): if not self.check(close_token):
self.raise_syntax_error( self.raise_syntax_error(
f"Expected closing {close_token}", f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position, span_start=open_position,
) )

View file

@ -8,7 +8,14 @@ import platform
import sys import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker from ._parser import (
MarkerAtom,
MarkerList,
Op,
Value,
Variable,
parse_marker as _parse_marker,
)
from ._tokenizer import ParserSyntaxError from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name from .utils import canonicalize_name
@ -189,7 +196,7 @@ class Marker:
# packaging.requirements.Requirement. If any additional logic is # packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement. # added here, make sure to mirror/adapt Requirement.
try: try:
self._markers = _normalize_extra_values(parse_marker(marker)) self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type: # The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
# #

408
lib/packaging/metadata.py Normal file
View file

@ -0,0 +1,408 @@
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import sys
import typing
from typing import Dict, List, Optional, Tuple, Union, cast
if sys.version_info >= (3, 8): # pragma: no cover
from typing import TypedDict
else: # pragma: no cover
if typing.TYPE_CHECKING:
from typing_extensions import TypedDict
else:
try:
from typing_extensions import TypedDict
except ImportError:
class TypedDict:
def __init_subclass__(*_args, **_kwargs):
pass
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: List[str]
summary: str
description: str
keywords: List[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: List[str]
download_url: str
classifiers: List[str]
requires: List[str]
provides: List[str]
obsoletes: List[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: List[str]
provides_dist: List[str]
obsoletes_dist: List[str]
requires_python: str
requires_external: List[str]
project_urls: Dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emiting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: List[str]
# Metadata 2.2 - PEP 643
dynamic: List[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_STRING_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
def _parse_keywords(data: str) -> List[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: List[str]) -> Dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
"""Parse a distribution's metadata.
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
unparsed: Dict[str, List[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name)
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: List[Tuple[bytes, Optional[str]]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_STRING_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed

View file

@ -5,7 +5,7 @@
import urllib.parse import urllib.parse
from typing import Any, List, Optional, Set from typing import Any, List, Optional, Set
from ._parser import parse_requirement from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet from .specifiers import SpecifierSet
@ -32,7 +32,7 @@ class Requirement:
def __init__(self, requirement_string: str) -> None: def __init__(self, requirement_string: str) -> None:
try: try:
parsed = parse_requirement(requirement_string) parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e: except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e raise InvalidRequirement(str(e)) from e

View file

@ -252,7 +252,8 @@ class Specifier(BaseSpecifier):
# Store whether or not this Specifier should accept prereleases # Store whether or not this Specifier should accept prereleases
self._prereleases = prereleases self._prereleases = prereleases
@property # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
@property # type: ignore[override]
def prereleases(self) -> bool: def prereleases(self) -> bool:
# If there is an explicit prereleases set for this, then we'll just # If there is an explicit prereleases set for this, then we'll just
# blindly use that. # blindly use that.
@ -398,7 +399,9 @@ class Specifier(BaseSpecifier):
# We need special logic to handle prefix matching # We need special logic to handle prefix matching
if spec.endswith(".*"): if spec.endswith(".*"):
# In the case of prefix matching we want to ignore local segment. # In the case of prefix matching we want to ignore local segment.
normalized_prospective = canonicalize_version(prospective.public) normalized_prospective = canonicalize_version(
prospective.public, strip_trailing_zero=False
)
# Get the normalized version string ignoring the trailing .* # Get the normalized version string ignoring the trailing .*
normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
# Split the spec out by dots, and pretend that there is an implicit # Split the spec out by dots, and pretend that there is an implicit

View file

@ -111,7 +111,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]:
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value = sysconfig.get_config_var(name) value: Union[int, str, None] = sysconfig.get_config_var(name)
if value is None and warn: if value is None and warn:
logger.debug( logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name "Config variable '%s' is unset, Python ABI tag may be incorrect", name
@ -120,7 +120,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
def _normalize_string(string: str) -> str: def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_") return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _abi3_applies(python_version: PythonVersion) -> bool: def _abi3_applies(python_version: PythonVersion) -> bool:

View file

@ -10,7 +10,7 @@
import collections import collections
import itertools import itertools
import re import re
from typing import Callable, Optional, SupportsInt, Tuple, Union from typing import Any, Callable, Optional, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
@ -63,7 +63,7 @@ class InvalidVersion(ValueError):
class _BaseVersion: class _BaseVersion:
_key: CmpKey _key: Tuple[Any, ...]
def __hash__(self) -> int: def __hash__(self) -> int:
return hash(self._key) return hash(self._key)
@ -179,6 +179,7 @@ class Version(_BaseVersion):
""" """
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None: def __init__(self, version: str) -> None:
"""Initialize a Version object. """Initialize a Version object.

View file

@ -66,10 +66,10 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
# Check urllib3 for compatibility. # Check urllib3 for compatibility.
major, minor, patch = urllib3_version # noqa: F811 major, minor, patch = urllib3_version # noqa: F811
major, minor, patch = int(major), int(minor), int(patch) major, minor, patch = int(major), int(minor), int(patch)
# urllib3 >= 1.21.1, <= 1.26 # urllib3 >= 1.21.1
assert major == 1 assert major >= 1
if major == 1:
assert minor >= 21 assert minor >= 21
assert minor <= 26
# Check charset_normalizer for compatibility. # Check charset_normalizer for compatibility.
if chardet_version: if chardet_version:

View file

@ -5,8 +5,8 @@
__title__ = "requests" __title__ = "requests"
__description__ = "Python HTTP for Humans." __description__ = "Python HTTP for Humans."
__url__ = "https://requests.readthedocs.io" __url__ = "https://requests.readthedocs.io"
__version__ = "2.28.2" __version__ = "2.31.0"
__build__ = 0x022802 __build__ = 0x023100
__author__ = "Kenneth Reitz" __author__ = "Kenneth Reitz"
__author_email__ = "me@kennethreitz.org" __author_email__ = "me@kennethreitz.org"
__license__ = "Apache 2.0" __license__ = "Apache 2.0"

View file

@ -14,9 +14,11 @@ _VALID_HEADER_NAME_RE_STR = re.compile(r"^[^:\s][^:\r\n]*$")
_VALID_HEADER_VALUE_RE_BYTE = re.compile(rb"^\S[^\r\n]*$|^$") _VALID_HEADER_VALUE_RE_BYTE = re.compile(rb"^\S[^\r\n]*$|^$")
_VALID_HEADER_VALUE_RE_STR = re.compile(r"^\S[^\r\n]*$|^$") _VALID_HEADER_VALUE_RE_STR = re.compile(r"^\S[^\r\n]*$|^$")
_HEADER_VALIDATORS_STR = (_VALID_HEADER_NAME_RE_STR, _VALID_HEADER_VALUE_RE_STR)
_HEADER_VALIDATORS_BYTE = (_VALID_HEADER_NAME_RE_BYTE, _VALID_HEADER_VALUE_RE_BYTE)
HEADER_VALIDATORS = { HEADER_VALIDATORS = {
bytes: (_VALID_HEADER_NAME_RE_BYTE, _VALID_HEADER_VALUE_RE_BYTE), bytes: _HEADER_VALIDATORS_BYTE,
str: (_VALID_HEADER_NAME_RE_STR, _VALID_HEADER_VALUE_RE_STR), str: _HEADER_VALIDATORS_STR,
} }

View file

@ -22,7 +22,6 @@ from urllib3.exceptions import ProxyError as _ProxyError
from urllib3.exceptions import ReadTimeoutError, ResponseError from urllib3.exceptions import ReadTimeoutError, ResponseError
from urllib3.exceptions import SSLError as _SSLError from urllib3.exceptions import SSLError as _SSLError
from urllib3.poolmanager import PoolManager, proxy_from_url from urllib3.poolmanager import PoolManager, proxy_from_url
from urllib3.response import HTTPResponse
from urllib3.util import Timeout as TimeoutSauce from urllib3.util import Timeout as TimeoutSauce
from urllib3.util import parse_url from urllib3.util import parse_url
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
@ -194,7 +193,6 @@ class HTTPAdapter(BaseAdapter):
num_pools=connections, num_pools=connections,
maxsize=maxsize, maxsize=maxsize,
block=block, block=block,
strict=True,
**pool_kwargs, **pool_kwargs,
) )
@ -485,7 +483,6 @@ class HTTPAdapter(BaseAdapter):
timeout = TimeoutSauce(connect=timeout, read=timeout) timeout = TimeoutSauce(connect=timeout, read=timeout)
try: try:
if not chunked:
resp = conn.urlopen( resp = conn.urlopen(
method=request.method, method=request.method,
url=url, url=url,
@ -497,52 +494,9 @@ class HTTPAdapter(BaseAdapter):
decode_content=False, decode_content=False,
retries=self.max_retries, retries=self.max_retries,
timeout=timeout, timeout=timeout,
chunked=chunked,
) )
# Send the request.
else:
if hasattr(conn, "proxy_pool"):
conn = conn.proxy_pool
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
try:
skip_host = "Host" in request.headers
low_conn.putrequest(
request.method,
url,
skip_accept_encoding=True,
skip_host=skip_host,
)
for header, value in request.headers.items():
low_conn.putheader(header, value)
low_conn.endheaders()
for i in request.body:
low_conn.send(hex(len(i))[2:].encode("utf-8"))
low_conn.send(b"\r\n")
low_conn.send(i)
low_conn.send(b"\r\n")
low_conn.send(b"0\r\n\r\n")
# Receive the response from the server
r = low_conn.getresponse()
resp = HTTPResponse.from_httplib(
r,
pool=conn,
connection=low_conn,
preload_content=False,
decode_content=False,
)
except Exception:
# If we hit any problems here, clean up the connection.
# Then, raise so that we can handle the actual exception.
low_conn.close()
raise
except (ProtocolError, OSError) as err: except (ProtocolError, OSError) as err:
raise ConnectionError(err, request=request) raise ConnectionError(err, request=request)

View file

@ -106,7 +106,7 @@ def post(url, data=None, json=None, **kwargs):
:param url: URL for the new :class:`Request` object. :param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, list of tuples, bytes, or file-like :param data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the :class:`Request`. object to send in the body of the :class:`Request`.
:param json: (optional) json data to send in the body of the :class:`Request`. :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes. :param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object :return: :class:`Response <Response>` object
:rtype: requests.Response :rtype: requests.Response
@ -121,7 +121,7 @@ def put(url, data=None, **kwargs):
:param url: URL for the new :class:`Request` object. :param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, list of tuples, bytes, or file-like :param data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the :class:`Request`. object to send in the body of the :class:`Request`.
:param json: (optional) json data to send in the body of the :class:`Request`. :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes. :param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object :return: :class:`Response <Response>` object
:rtype: requests.Response :rtype: requests.Response
@ -136,7 +136,7 @@ def patch(url, data=None, **kwargs):
:param url: URL for the new :class:`Request` object. :param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, list of tuples, bytes, or file-like :param data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the :class:`Request`. object to send in the body of the :class:`Request`.
:param json: (optional) json data to send in the body of the :class:`Request`. :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes. :param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object :return: :class:`Response <Response>` object
:rtype: requests.Response :rtype: requests.Response

View file

@ -324,7 +324,9 @@ class SessionRedirectMixin:
except KeyError: except KeyError:
username, password = None, None username, password = None, None
if username and password: # urllib3 handles proxy authorization for us in the standard adapter.
# Avoid appending this to TLS tunneled requests where it may be leaked.
if not scheme.startswith('https') and username and password:
headers["Proxy-Authorization"] = _basic_auth_str(username, password) headers["Proxy-Authorization"] = _basic_auth_str(username, password)
return new_proxies return new_proxies

View file

@ -25,7 +25,12 @@ from . import certs
from .__version__ import __version__ from .__version__ import __version__
# to_native_string is unused here, but imported here for backwards compatibility # to_native_string is unused here, but imported here for backwards compatibility
from ._internal_utils import HEADER_VALIDATORS, to_native_string # noqa: F401 from ._internal_utils import ( # noqa: F401
_HEADER_VALIDATORS_BYTE,
_HEADER_VALIDATORS_STR,
HEADER_VALIDATORS,
to_native_string,
)
from .compat import ( from .compat import (
Mapping, Mapping,
basestring, basestring,
@ -1031,20 +1036,23 @@ def check_header_validity(header):
:param header: tuple, in the format (name, value). :param header: tuple, in the format (name, value).
""" """
name, value = header name, value = header
_validate_header_part(header, name, 0)
_validate_header_part(header, value, 1)
for part in header:
if type(part) not in HEADER_VALIDATORS: def _validate_header_part(header, header_part, header_validator_index):
if isinstance(header_part, str):
validator = _HEADER_VALIDATORS_STR[header_validator_index]
elif isinstance(header_part, bytes):
validator = _HEADER_VALIDATORS_BYTE[header_validator_index]
else:
raise InvalidHeader( raise InvalidHeader(
f"Header part ({part!r}) from {{{name!r}: {value!r}}} must be " f"Header part ({header_part!r}) from {header} "
f"of type str or bytes, not {type(part)}" f"must be of type str or bytes, not {type(header_part)}"
) )
_validate_header_part(name, "name", HEADER_VALIDATORS[type(name)][0])
_validate_header_part(value, "value", HEADER_VALIDATORS[type(value)][1])
def _validate_header_part(header_part, header_kind, validator):
if not validator.match(header_part): if not validator.match(header_part):
header_kind = "name" if header_validator_index == 0 else "value"
raise InvalidHeader( raise InvalidHeader(
f"Invalid leading whitespace, reserved character(s), or return" f"Invalid leading whitespace, reserved character(s), or return"
f"character(s) in header {header_kind}: {header_part!r}" f"character(s) in header {header_kind}: {header_part!r}"

View file

@ -118,7 +118,7 @@ Serializing multiple objects to JSON lines (newline-delimited JSON)::
""" """
from __future__ import absolute_import from __future__ import absolute_import
__version__ = '3.18.3' __version__ = '3.19.1'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@ -149,28 +149,10 @@ def _import_c_make_encoder():
except ImportError: except ImportError:
return None return None
_default_encoder = JSONEncoder( _default_encoder = JSONEncoder()
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
use_decimal=True,
namedtuple_as_object=True,
tuple_as_array=True,
iterable_as_array=False,
bigint_as_string=False,
item_sort_key=None,
for_json=False,
ignore_nan=False,
int_as_string_bitcount=None,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True, encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True, namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None, bigint_as_string=False, sort_keys=False, item_sort_key=None,
@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
contain non-ASCII characters, so long as they do not need to be escaped contain non-ASCII characters, so long as they do not need to be escaped
by JSON. When it is true, all non-ASCII characters are escaped. by JSON. When it is true, all non-ASCII characters are escaped.
If *allow_nan* is false, then it will be a ``ValueError`` to If *allow_nan* is true (default: ``False``), then out of range ``float``
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) values (``nan``, ``inf``, ``-inf``) will be serialized to
in strict compliance of the original JSON specification, instead of using their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior. *ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members If *indent* is a string, then JSON array elements and object members
@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
""" """
# cached encoder # cached encoder
if (not skipkeys and ensure_ascii and if (not skipkeys and ensure_ascii and
check_circular and allow_nan and check_circular and not allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array and namedtuple_as_object and tuple_as_array and not iterable_as_array
@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True, encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True, namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None, bigint_as_string=False, sort_keys=False, item_sort_key=None,
@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
for container types will be skipped and a circular reference will for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse). result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to If *allow_nan* is true (default: ``False``), then out of range ``float``
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in values (``nan``, ``inf``, ``-inf``) will be serialized to
strict compliance of the JSON specification, instead of using the their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior.
If ``indent`` is a string, then JSON array elements and object members If ``indent`` is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated will be pretty-printed with a newline followed by that string repeated
@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
""" """
# cached encoder # cached encoder
if (not skipkeys and ensure_ascii and if (not skipkeys and ensure_ascii and
check_circular and allow_nan and check_circular and not allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array and namedtuple_as_object and tuple_as_array and not iterable_as_array
@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
**kw).encode(obj) **kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None, _default_decoder = JSONDecoder()
object_pairs_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, use_decimal=False, allow_nan=False, **kw):
**kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document as `str` or `bytes`) to a Python object. a JSON document as `str` or `bytes`) to a Python object.
@ -451,14 +432,18 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
``int(num_str)``. This can be used to use another datatype or parser ``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`). for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the *allow_nan*, if True (default false), will allow the parser to
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
can be used to raise an exception if invalid JSON numbers are and enable the use of the deprecated *parse_constant*.
encountered.
If *use_decimal* is true (default: ``False``) then it implies If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``. parse_float=decimal.Decimal for parity with ``dump``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible. of subclassing whenever possible.
@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
encoding=encoding, cls=cls, object_hook=object_hook, encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int, parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
use_decimal=use_decimal, **kw) use_decimal=use_decimal, allow_nan=allow_nan, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, **kw): use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object. document) to a Python object.
@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
``int(num_str)``. This can be used to use another datatype or parser ``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`). for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the *allow_nan*, if True (default false), will allow the parser to
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
can be used to raise an exception if invalid JSON numbers are and enable the use of the deprecated *parse_constant*.
encountered.
If *use_decimal* is true (default: ``False``) then it implies If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``. parse_float=decimal.Decimal for parity with ``dump``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible. of subclassing whenever possible.
@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if (cls is None and encoding is None and object_hook is None and if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None parse_constant is None and object_pairs_hook is None
and not use_decimal and not kw): and not use_decimal and not allow_nan and not kw):
return _default_decoder.decode(s) return _default_decoder.decode(s)
if cls is None: if cls is None:
cls = JSONDecoder cls = JSONDecoder
@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if parse_float is not None: if parse_float is not None:
raise TypeError("use_decimal=True implies parse_float=Decimal") raise TypeError("use_decimal=True implies parse_float=Decimal")
kw['parse_float'] = Decimal kw['parse_float'] = Decimal
if allow_nan:
kw['allow_nan'] = True
return cls(encoding=encoding, **kw).decode(s) return cls(encoding=encoding, **kw).decode(s)
@ -560,22 +551,9 @@ def _toggle_speedups(enabled):
scan.make_scanner = scan.py_make_scanner scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner dec.make_scanner = scan.make_scanner
global _default_decoder global _default_decoder
_default_decoder = JSONDecoder( _default_decoder = JSONDecoder()
encoding=None,
object_hook=None,
object_pairs_hook=None,
)
global _default_encoder global _default_encoder
_default_encoder = JSONEncoder( _default_encoder = JSONEncoder()
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
)
def simple_first(kv): def simple_first(kv):
"""Helper function to pass to item_sort_key to sort simple """Helper function to pass to item_sort_key to sort simple

View file

@ -46,9 +46,35 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8" DEFAULT_ENCODING = "utf-8"
if hasattr(sys, 'get_int_max_str_digits'):
bounded_int = int
else:
def bounded_int(s, INT_MAX_STR_DIGITS=4300):
"""Backport of the integer string length conversion limitation
https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
"""
if len(s) > INT_MAX_STR_DIGITS:
raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
return int(s)
def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
"""Scan a four digit hex number from s[end:end + 4]
"""
msg = "Invalid \\uXXXX escape sequence"
esc = s[end:end + 4]
if not _m(esc):
raise JSONDecodeError(msg, s, end - 2)
try:
return int(esc, 16), end + 4
except ValueError:
raise JSONDecodeError(msg, s, end - 2)
def py_scanstring(s, end, encoding=None, strict=True, def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
_PY3=PY3, _maxunicode=sys.maxunicode): _PY3=PY3, _maxunicode=sys.maxunicode,
_scan_four_digit_hex=scan_four_digit_hex):
"""Scan the string s for a JSON string. End is the index of the """Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string. character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError Unescapes all valid JSON string escape sequences and raises ValueError
@ -67,6 +93,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
if chunk is None: if chunk is None:
raise JSONDecodeError( raise JSONDecodeError(
"Unterminated string starting at", s, begin) "Unterminated string starting at", s, begin)
prev_end = end
end = chunk.end() end = chunk.end()
content, terminator = chunk.groups() content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters # Content is contains zero or more unescaped string characters
@ -81,7 +108,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
elif terminator != '\\': elif terminator != '\\':
if strict: if strict:
msg = "Invalid control character %r at" msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end) raise JSONDecodeError(msg, s, prev_end)
else: else:
_append(terminator) _append(terminator)
continue continue
@ -100,35 +127,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
end += 1 end += 1
else: else:
# Unicode escape sequence # Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence" uni, end = _scan_four_digit_hex(s, end + 1)
esc = s[end + 1:end + 5]
escX = esc[1:2]
if len(esc) != 4 or escX == 'x' or escX == 'X':
raise JSONDecodeError(msg, s, end - 1)
try:
uni = int(esc, 16)
except ValueError:
raise JSONDecodeError(msg, s, end - 1)
if uni < 0 or uni > _maxunicode:
raise JSONDecodeError(msg, s, end - 1)
end += 5
# Check for surrogate pair on UCS-4 systems # Check for surrogate pair on UCS-4 systems
# Note that this will join high/low surrogate pairs # Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through # but will also pass unpaired surrogates through
if (_maxunicode > 65535 and if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'): s[end:end + 2] == '\\u'):
esc2 = s[end + 2:end + 6] uni2, end2 = _scan_four_digit_hex(s, end + 2)
escX = esc2[1:2]
if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
try:
uni2 = int(esc2, 16)
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 == 0xdc00: if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) | uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00)) (uni2 - 0xdc00))
end += 6 end = end2
char = unichr(uni) char = unichr(uni)
# Append the unescaped character # Append the unescaped character
_append(char) _append(char)
@ -169,7 +179,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
return pairs, end + 1 return pairs, end + 1
elif nextchar != '"': elif nextchar != '"':
raise JSONDecodeError( raise JSONDecodeError(
"Expecting property name enclosed in double quotes", "Expecting property name enclosed in double quotes or '}'",
s, end) s, end)
end += 1 end += 1
while True: while True:
@ -296,14 +306,15 @@ class JSONDecoder(object):
| null | None | | null | None |
+---------------+-------------------+ +---------------+-------------------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as When allow_nan=True, it also understands
``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec. their corresponding ``float`` values, which is outside the JSON spec.
""" """
def __init__(self, encoding=None, object_hook=None, parse_float=None, def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True, parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None): object_pairs_hook=None, allow_nan=False):
""" """
*encoding* determines the encoding used to interpret any *encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by :class:`str` objects decoded by this instance (``'utf-8'`` by
@ -336,10 +347,13 @@ class JSONDecoder(object):
``int(num_str)``. This can be used to use another datatype or parser ``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`). for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the *allow_nan*, if True (default false), will allow the parser to
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
can be used to raise an exception if invalid JSON numbers are
encountered. *parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
*strict* controls the parser's behavior when it encounters an *strict* controls the parser's behavior when it encounters an
invalid control character in a string. The default setting of invalid control character in a string. The default setting of
@ -353,8 +367,8 @@ class JSONDecoder(object):
self.object_hook = object_hook self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float self.parse_float = parse_float or float
self.parse_int = parse_int or int self.parse_int = parse_int or bounded_int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__ self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
self.strict = strict self.strict = strict
self.parse_object = JSONObject self.parse_object = JSONObject
self.parse_array = JSONArray self.parse_array = JSONArray

View file

@ -5,7 +5,7 @@ import re
from operator import itemgetter from operator import itemgetter
# Do not import Decimal directly to avoid reload issues # Do not import Decimal directly to avoid reload issues
import decimal import decimal
from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3 from .compat import binary_type, text_type, string_types, integer_types, PY3
def _import_speedups(): def _import_speedups():
try: try:
from . import _speedups from . import _speedups
@ -140,7 +140,7 @@ class JSONEncoder(object):
key_separator = ': ' key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True, def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False, check_circular=True, allow_nan=False, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None, indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True, use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False, tuple_as_array=True, bigint_as_string=False,
@ -161,10 +161,11 @@ class JSONEncoder(object):
prevent an infinite recursion (which would cause an OverflowError). prevent an infinite recursion (which would cause an OverflowError).
Otherwise, no such check takes place. Otherwise, no such check takes place.
If allow_nan is true, then NaN, Infinity, and -Infinity will be If allow_nan is true (default: False), then out of range float
encoded as such. This behavior is not JSON specification compliant, values (nan, inf, -inf) will be serialized to
but is consistent with most JavaScript based encoders and decoders. their JavaScript equivalents (NaN, Infinity, -Infinity)
Otherwise, it will be a ValueError to encode such floats. instead of raising a ValueError. See
ignore_nan for ECMA-262 compliant behavior.
If sort_keys is true, then the output of dictionaries will be If sort_keys is true, then the output of dictionaries will be
sorted by key; this is useful for regression tests to ensure sorted by key; this is useful for regression tests to ensure
@ -294,7 +295,7 @@ class JSONEncoder(object):
# This doesn't pass the iterator directly to ''.join() because the # This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly # exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do. # equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True) chunks = self.iterencode(o)
if not isinstance(chunks, (list, tuple)): if not isinstance(chunks, (list, tuple)):
chunks = list(chunks) chunks = list(chunks)
if self.ensure_ascii: if self.ensure_ascii:
@ -302,7 +303,7 @@ class JSONEncoder(object):
else: else:
return u''.join(chunks) return u''.join(chunks)
def iterencode(self, o, _one_shot=False): def iterencode(self, o):
"""Encode the given object and yield each string """Encode the given object and yield each string
representation as available. representation as available.
@ -356,8 +357,7 @@ class JSONEncoder(object):
key_memo = {} key_memo = {}
int_as_string_bitcount = ( int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount) 53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None if (c_make_encoder is not None and self.indent is None):
and self.indent is None):
_iterencode = c_make_encoder( _iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent, markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
@ -370,7 +370,7 @@ class JSONEncoder(object):
_iterencode = _make_iterencode( _iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr, markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal, self.skipkeys, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array, self.namedtuple_as_object, self.tuple_as_array,
int_as_string_bitcount, int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json, self.item_sort_key, self.encoding, self.for_json,
@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder):
def encode(self, o): def encode(self, o):
# Override JSONEncoder.encode because it has hacks for # Override JSONEncoder.encode because it has hacks for
# performance that make things more complicated. # performance that make things more complicated.
chunks = self.iterencode(o, True) chunks = self.iterencode(o)
if self.ensure_ascii: if self.ensure_ascii:
return ''.join(chunks) return ''.join(chunks)
else: else:
return u''.join(chunks) return u''.join(chunks)
def iterencode(self, o, _one_shot=False): def iterencode(self, o):
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) chunks = super(JSONEncoderForHTML, self).iterencode(o)
for chunk in chunks: for chunk in chunks:
chunk = chunk.replace('&', '\\u0026') chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c') chunk = chunk.replace('<', '\\u003c')
@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _key_separator, _item_separator, _sort_keys, _skipkeys,
_use_decimal, _namedtuple_as_object, _tuple_as_array, _use_decimal, _namedtuple_as_object, _tuple_as_array,
_int_as_string_bitcount, _item_sort_key, _int_as_string_bitcount, _item_sort_key,
_encoding,_for_json, _encoding,_for_json,

View file

@ -60,11 +60,11 @@ def py_make_scanner(context):
else: else:
res = parse_int(integer) res = parse_int(integer)
return res, m.end() return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3 return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8 return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9 return parse_constant('-Infinity'), idx + 9
else: else:
raise JSONDecodeError(errmsg, string, idx) raise JSONDecodeError(errmsg, string, idx)

View file

@ -2,6 +2,7 @@ from __future__ import absolute_import
import decimal import decimal
from unittest import TestCase from unittest import TestCase
import sys
import simplejson as json import simplejson as json
from simplejson.compat import StringIO, b, binary_type from simplejson.compat import StringIO, b, binary_type
from simplejson import OrderedDict from simplejson import OrderedDict
@ -117,3 +118,10 @@ class TestDecode(TestCase):
diff = id(x) - id(y) diff = id(x) - id(y)
self.assertRaises(ValueError, j.scan_once, y, diff) self.assertRaises(ValueError, j.scan_once, y, diff)
self.assertRaises(ValueError, j.raw_decode, y, i) self.assertRaises(ValueError, j.raw_decode, y, i)
def test_bounded_int(self):
# SJ-PT-23-03, limit quadratic number parsing per Python 3.11
max_str_digits = getattr(sys, 'get_int_max_str_digits', lambda: 4300)()
s = '1' + '0' * (max_str_digits - 1)
self.assertEqual(json.loads(s), int(s))
self.assertRaises(ValueError, json.loads, s + '0')

View file

@ -145,7 +145,7 @@ class TestFail(TestCase):
('["spam', 'Unterminated string starting at', 1), ('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7), ('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8), ('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1), ('{', "Expecting property name enclosed in double quotes or '}'", 1),
('{"', 'Unterminated string starting at', 1), ('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1), ('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7), ('{"spam"', "Expecting ':' delimiter", 7),
@ -156,6 +156,8 @@ class TestFail(TestCase):
('"', 'Unterminated string starting at', 0), ('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0), ('"spam', 'Unterminated string starting at', 0),
('[,', "Expecting value", 1), ('[,', "Expecting value", 1),
('--', 'Expecting value', 0),
('"\x18d', "Invalid control character %r", 1),
] ]
for data, msg, idx in test_cases: for data, msg, idx in test_cases:
try: try:

View file

@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf
class TestFloat(TestCase): class TestFloat(TestCase):
def test_degenerates_allow(self): def test_degenerates_allow(self):
for inf in (PosInf, NegInf): for inf in (PosInf, NegInf):
self.assertEqual(json.loads(json.dumps(inf)), inf) self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf)
# Python 2.5 doesn't have math.isnan # Python 2.5 doesn't have math.isnan
nan = json.loads(json.dumps(NaN)) nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True)
self.assertTrue((0 + nan) != nan) self.assertTrue((0 + nan) != nan)
def test_degenerates_ignore(self): def test_degenerates_ignore(self):
@ -19,6 +19,9 @@ class TestFloat(TestCase):
def test_degenerates_deny(self): def test_degenerates_deny(self):
for f in (PosInf, NegInf, NaN): for f in (PosInf, NegInf, NaN):
self.assertRaises(ValueError, json.dumps, f, allow_nan=False) self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
for s in ('Infinity', '-Infinity', 'NaN'):
self.assertRaises(ValueError, json.loads, s, allow_nan=False)
self.assertRaises(ValueError, json.loads, s)
def test_floats(self): def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100, for num in [1617161771.7650001, math.pi, math.pi**100,

View file

@ -132,7 +132,9 @@ class TestScanString(TestCase):
self.assertRaises(ValueError, self.assertRaises(ValueError,
scanstring, '\\ud834\\x0123"', 0, None, True) scanstring, '\\ud834\\x0123"', 0, None, True)
self.assertRaises(json.JSONDecodeError, scanstring, "\\u-123", 0, None, True) self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
# SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
def test_issue3623(self): def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,

View file

@ -32,7 +32,7 @@ from . import css_match as cm
from . import css_types as ct from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401 from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import] import bs4 # type: ignore[import]
from typing import Optional, Any, Iterator, Iterable from typing import Any, Iterator, Iterable
__all__ = ( __all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve', 'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -45,10 +45,10 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001 def compile( # noqa: A001
pattern: str, pattern: str,
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> cm.SoupSieve: ) -> cm.SoupSieve:
"""Compile CSS pattern.""" """Compile CSS pattern."""
@ -79,10 +79,10 @@ def purge() -> None:
def closest( def closest(
select: str, select: str,
tag: 'bs4.Tag', tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> 'bs4.Tag':
"""Match closest ancestor.""" """Match closest ancestor."""
@ -93,10 +93,10 @@ def closest(
def match( def match(
select: str, select: str,
tag: 'bs4.Tag', tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> bool: ) -> bool:
"""Match node.""" """Match node."""
@ -107,10 +107,10 @@ def match(
def filter( # noqa: A001 def filter( # noqa: A001
select: str, select: str,
iterable: Iterable['bs4.Tag'], iterable: Iterable['bs4.Tag'],
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> list['bs4.Tag']: ) -> list['bs4.Tag']:
"""Filter list of nodes.""" """Filter list of nodes."""
@ -121,10 +121,10 @@ def filter( # noqa: A001
def select_one( def select_one(
select: str, select: str,
tag: 'bs4.Tag', tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> 'bs4.Tag':
"""Select a single tag.""" """Select a single tag."""
@ -135,11 +135,11 @@ def select_one(
def select( def select(
select: str, select: str,
tag: 'bs4.Tag', tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> list['bs4.Tag']: ) -> list['bs4.Tag']:
"""Select the specified tags.""" """Select the specified tags."""
@ -150,11 +150,11 @@ def select(
def iselect( def iselect(
select: str, select: str,
tag: 'bs4.Tag', tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> Iterator['bs4.Tag']: ) -> Iterator['bs4.Tag']:
"""Iterate the specified tags.""" """Iterate the specified tags."""

View file

@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev) return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 0, "final") __version_info__ = Version(2, 4, 1, "final")
__version__ = __version_info__._get_canonical() __version__ = __version_info__._get_canonical()

View file

@ -6,7 +6,7 @@ import re
from . import css_types as ct from . import css_types as ct
import unicodedata import unicodedata
import bs4 # type: ignore[import] import bs4 # type: ignore[import]
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401 from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay) # Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -171,7 +171,7 @@ class _DocumentNav:
def get_children( def get_children(
self, self,
el: bs4.Tag, el: bs4.Tag,
start: Optional[int] = None, start: int | None = None,
reverse: bool = False, reverse: bool = False,
tags: bool = True, tags: bool = True,
no_iframe: bool = False no_iframe: bool = False
@ -239,22 +239,22 @@ class _DocumentNav:
return parent return parent
@staticmethod @staticmethod
def get_tag_name(el: bs4.Tag) -> Optional[str]: def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag.""" """Get tag."""
return cast(Optional[str], el.name) return cast('str | None', el.name)
@staticmethod @staticmethod
def get_prefix_name(el: bs4.Tag) -> Optional[str]: def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix.""" """Get prefix."""
return cast(Optional[str], el.prefix) return cast('str | None', el.prefix)
@staticmethod @staticmethod
def get_uri(el: bs4.Tag) -> Optional[str]: def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`.""" """Get namespace `URI`."""
return cast(Optional[str], el.namespace) return cast('str | None', el.namespace)
@classmethod @classmethod
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement: def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
@ -287,7 +287,7 @@ class _DocumentNav:
return bool(ns and ns == NS_XHTML) return bool(ns and ns == NS_XHTML)
@staticmethod @staticmethod
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]: def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix.""" """Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None) return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@ -330,8 +330,8 @@ class _DocumentNav:
cls, cls,
el: bs4.Tag, el: bs4.Tag,
name: str, name: str,
default: Optional[str | Sequence[str]] = None default: str | Sequence[str] | None = None
) -> Optional[str | Sequence[str]]: ) -> str | Sequence[str] | None:
"""Get attribute by name.""" """Get attribute by name."""
value = default value = default
@ -348,7 +348,7 @@ class _DocumentNav:
return value return value
@classmethod @classmethod
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]: def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes.""" """Iterate attributes."""
for k, v in el.attrs.items(): for k, v in el.attrs.items():
@ -424,10 +424,10 @@ class Inputs:
return 0 <= minutes <= 59 return 0 <= minutes <= 59
@classmethod @classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]: def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value.""" """Parse the input value."""
parsed = None # type: Optional[tuple[float, ...]] parsed = None # type: tuple[float, ...] | None
if value is None: if value is None:
return value return value
if itype == "date": if itype == "date":
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
self, self,
selectors: ct.SelectorList, selectors: ct.SelectorList,
scope: bs4.Tag, scope: bs4.Tag,
namespaces: Optional[ct.Namespaces], namespaces: ct.Namespaces | None,
flags: int flags: int
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
return self.get_tag_ns(el) == NS_XHTML return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: bs4.Tag) -> Optional[str]: def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag.""" """Get tag."""
name = self.get_tag_name(el) name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: bs4.Tag) -> Optional[str]: def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix.""" """Get prefix."""
prefix = self.get_prefix_name(el) prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: bs4.Tag) -> Optional[int]: def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text.""" """Get directionality from element text."""
for node in self.get_children(el, tags=False): for node in self.get_children(el, tags=False):
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
self, self,
el: bs4.Tag, el: bs4.Tag,
attr: str, attr: str,
prefix: Optional[str] prefix: str | None
) -> Optional[str | Sequence[str]]: ) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists.""" """Match attribute name and return value if it exists."""
value = None value = None
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*') name not in (self.get_tag(el), '*')
) )
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool: def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag.""" """Match the tag."""
match = True match = True
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
"""Match element if it contains text.""" """Match element if it contains text."""
match = True match = True
content = None # type: Optional[str | Sequence[str]] content = None # type: str | Sequence[str] | None
for contain_list in contains: for contain_list in contains:
if content is None: if content is None:
if contain_list.own: if contain_list.own:
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
match = False match = False
name = cast(str, self.get_attribute_by_name(el, 'name')) name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]: def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form.""" """Find this input's form."""
form = None form = None
parent = self.get_parent(el, no_iframe=True) parent = self.get_parent(el, no_iframe=True)
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
if lim < 1: if lim < 1:
break break
def closest(self) -> Optional[bs4.Tag]: def closest(self) -> bs4.Tag | None:
"""Match closest ancestor.""" """Match closest ancestor."""
current = self.tag current = self.tag
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
pattern: str pattern: str
selectors: ct.SelectorList selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces] namespaces: ct.Namespaces | None
custom: dict[str, str] custom: dict[str, str]
flags: int flags: int
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
self, self,
pattern: str, pattern: str,
selectors: ct.SelectorList, selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces], namespaces: ct.Namespaces | None,
custom: Optional[ct.CustomSelectors], custom: ct.CustomSelectors | None,
flags: int flags: int
): ):
"""Initialize.""" """Initialize."""

View file

@ -7,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct from . import css_types as ct
from .util import SelectorSyntaxError from .util import SelectorSyntaxError
import warnings import warnings
from typing import Optional, Match, Any, Iterator, cast from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -113,7 +113,7 @@ VALUE = r'''
'''.format(nl=NEWLINE, ident=IDENTIFIER) '''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard. # Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r''' ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\] (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE) '''.format(ws=WSC, value=VALUE)
# Selector patterns # Selector patterns
@ -207,8 +207,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE) @lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile( def _cached_css_compile(
pattern: str, pattern: str,
namespaces: Optional[ct.Namespaces], namespaces: ct.Namespaces | None,
custom: Optional[ct.CustomSelectors], custom: ct.CustomSelectors | None,
flags: int flags: int
) -> cm.SoupSieve: ) -> cm.SoupSieve:
"""Cached CSS compile.""" """Cached CSS compile."""
@ -233,7 +233,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear() _cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]: def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom.""" """Process custom."""
custom_selectors = {} custom_selectors = {}
@ -317,7 +317,7 @@ class SelectorPattern:
return self.name return self.name
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]: def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector.""" """Match the selector."""
return self.re_pattern.match(selector, index) return self.re_pattern.match(selector, index)
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]: for pseudo in p[1]:
self.patterns[pseudo] = pattern self.patterns[pseudo] = pattern
self.matched_name = None # type: Optional[SelectorPattern] self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str: def get_name(self) -> str:
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
return '' if self.matched_name is None else self.matched_name.get_name() return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]: def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector.""" """Match the selector."""
pseudo = None pseudo = None
@ -372,14 +372,14 @@ class _Selector:
def __init__(self, **kwargs: Any) -> None: def __init__(self, **kwargs: Any) -> None:
"""Initialize.""" """Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag] self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: list[str] self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str] self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute] self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth] self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList] self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector] self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str] self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains] self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang] self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int self.flags = kwargs.get('flags', 0) # type: int
@ -462,7 +462,7 @@ class CSSParser:
def __init__( def __init__(
self, self,
selector: str, selector: str,
custom: Optional[dict[str, str | ct.SelectorList]] = None, custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0 flags: int = 0
) -> None: ) -> None:
"""Initialize.""" """Initialize."""

View file

@ -2,7 +2,7 @@
from __future__ import annotations from __future__ import annotations
import copyreg import copyreg
from .pretty import pretty from .pretty import pretty
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
__all__ = ( __all__ = (
'Selector', 'Selector',
@ -189,28 +189,28 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash' 'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
) )
tag: Optional[SelectorTag] tag: SelectorTag | None
ids: tuple[str, ...] ids: tuple[str, ...]
classes: tuple[str, ...] classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...] attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...] nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...] selectors: tuple[SelectorList, ...]
relation: SelectorList relation: SelectorList
rel_type: Optional[str] rel_type: str | None
contains: tuple[SelectorContains, ...] contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...] lang: tuple[SelectorLang, ...]
flags: int flags: int
def __init__( def __init__(
self, self,
tag: Optional[SelectorTag], tag: SelectorTag | None,
ids: tuple[str, ...], ids: tuple[str, ...],
classes: tuple[str, ...], classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...], attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...], nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...], selectors: tuple[SelectorList, ...],
relation: SelectorList, relation: SelectorList,
rel_type: Optional[str], rel_type: str | None,
contains: tuple[SelectorContains, ...], contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...], lang: tuple[SelectorLang, ...],
flags: int flags: int
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash") __slots__ = ("name", "prefix", "_hash")
name: str name: str
prefix: Optional[str] prefix: str | None
def __init__(self, name: str, prefix: Optional[str]) -> None: def __init__(self, name: str, prefix: str | None) -> None:
"""Initialize.""" """Initialize."""
super().__init__(name=name, prefix=prefix) super().__init__(name=name, prefix=prefix)
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
attribute: str attribute: str
prefix: str prefix: str
pattern: Optional[Pattern[str]] pattern: Pattern[str] | None
xml_type_pattern: Optional[Pattern[str]] xml_type_pattern: Pattern[str] | None
def __init__( def __init__(
self, self,
attribute: str, attribute: str,
prefix: str, prefix: str,
pattern: Optional[Pattern[str]], pattern: Pattern[str] | None,
xml_type_pattern: Optional[Pattern[str]] xml_type_pattern: Pattern[str] | None
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
@ -360,7 +360,7 @@ class SelectorList(Immutable):
def __init__( def __init__(
self, self,
selectors: Optional[Iterable[Selector | SelectorNull]] = None, selectors: Iterable[Selector | SelectorNull] | None = None,
is_not: bool = False, is_not: bool = False,
is_html: bool = False is_html: bool = False
) -> None: ) -> None:

View file

@ -3,7 +3,7 @@ from __future__ import annotations
from functools import wraps, lru_cache from functools import wraps, lru_cache
import warnings import warnings
import re import re
from typing import Callable, Any, Optional from typing import Callable, Any
DEBUG = 0x00001 DEBUG = 0x00001
@ -27,7 +27,7 @@ def lower(string: str) -> str:
class SelectorSyntaxError(Exception): class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector.""" """Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None: def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
"""Initialize.""" """Initialize."""
self.line = None self.line = None
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
col = 1 col = 1
text = [] # type: list[str] text = [] # type: list[str]
line = 1 line = 1
offset = None # type: Optional[int] offset = None # type: int | None
# Split pattern by newline and handle the text before the newline # Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern): for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):

View file

@ -1,6 +1,6 @@
# IANA versions like 2020a are not valid PEP 440 identifiers; the recommended # IANA versions like 2020a are not valid PEP 440 identifiers; the recommended
# way to translate the version is to use YYYY.n where `n` is a 0-based index. # way to translate the version is to use YYYY.n where `n` is a 0-based index.
__version__ = "2022.7" __version__ = "2023.3"
# This exposes the original IANA version number. # This exposes the original IANA version number.
IANA_VERSION = "2022g" IANA_VERSION = "2023c"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -238,7 +238,7 @@ SY Syria
SZ Eswatini (Swaziland) SZ Eswatini (Swaziland)
TC Turks & Caicos Is TC Turks & Caicos Is
TD Chad TD Chad
TF French Southern Territories TF French S. Terr.
TG Togo TG Togo
TH Thailand TH Thailand
TJ Tajikistan TJ Tajikistan

View file

@ -72,11 +72,11 @@ Leap 2016 Dec 31 23:59:60 + S
# Any additional leap seconds will come after this. # Any additional leap seconds will come after this.
# This Expires line is commented out for now, # This Expires line is commented out for now,
# so that pre-2020a zic implementations do not reject this file. # so that pre-2020a zic implementations do not reject this file.
#Expires 2023 Jun 28 00:00:00 #Expires 2023 Dec 28 00:00:00
# POSIX timestamps for the data in this file: # POSIX timestamps for the data in this file:
#updated 1467936000 (2016-07-08 00:00:00 UTC) #updated 1467936000 (2016-07-08 00:00:00 UTC)
#expires 1687910400 (2023-06-28 00:00:00 UTC) #expires 1703721600 (2023-12-28 00:00:00 UTC)
# Updated through IERS Bulletin C64 # Updated through IERS Bulletin C65
# File expires on: 28 June 2023 # File expires on: 28 December 2023

View file

@ -1,4 +1,4 @@
# version 2022g # version 2023c
# This zic input file is in the public domain. # This zic input file is in the public domain.
R d 1916 o - Jun 14 23s 1 S R d 1916 o - Jun 14 23s 1 S
R d 1916 1919 - O Su>=1 23s 0 - R d 1916 1919 - O Su>=1 23s 0 -
@ -75,6 +75,8 @@ R K 2014 o - May 15 24 1 S
R K 2014 o - Jun 26 24 0 - R K 2014 o - Jun 26 24 0 -
R K 2014 o - Jul 31 24 1 S R K 2014 o - Jul 31 24 1 S
R K 2014 o - S lastTh 24 0 - R K 2014 o - S lastTh 24 0 -
R K 2023 ma - Ap lastF 0 1 S
R K 2023 ma - O lastTh 24 0 -
Z Africa/Cairo 2:5:9 - LMT 1900 O Z Africa/Cairo 2:5:9 - LMT 1900 O
2 K EE%sT 2 K EE%sT
Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
@ -172,7 +174,7 @@ R M 2021 o - May 16 2 0 -
R M 2022 o - Mar 27 3 -1 - R M 2022 o - Mar 27 3 -1 -
R M 2022 o - May 8 2 0 - R M 2022 o - May 8 2 0 -
R M 2023 o - Mar 19 3 -1 - R M 2023 o - Mar 19 3 -1 -
R M 2023 o - Ap 30 2 0 - R M 2023 o - Ap 23 2 0 -
R M 2024 o - Mar 10 3 -1 - R M 2024 o - Mar 10 3 -1 -
R M 2024 o - Ap 14 2 0 - R M 2024 o - Ap 14 2 0 -
R M 2025 o - F 23 3 -1 - R M 2025 o - F 23 3 -1 -
@ -188,7 +190,7 @@ R M 2029 o - F 18 2 0 -
R M 2029 o - D 30 3 -1 - R M 2029 o - D 30 3 -1 -
R M 2030 o - F 10 2 0 - R M 2030 o - F 10 2 0 -
R M 2030 o - D 22 3 -1 - R M 2030 o - D 22 3 -1 -
R M 2031 o - F 2 2 0 - R M 2031 o - Ja 26 2 0 -
R M 2031 o - D 14 3 -1 - R M 2031 o - D 14 3 -1 -
R M 2032 o - Ja 18 2 0 - R M 2032 o - Ja 18 2 0 -
R M 2032 o - N 28 3 -1 - R M 2032 o - N 28 3 -1 -
@ -204,7 +206,7 @@ R M 2036 o - N 23 2 0 -
R M 2037 o - O 4 3 -1 - R M 2037 o - O 4 3 -1 -
R M 2037 o - N 15 2 0 - R M 2037 o - N 15 2 0 -
R M 2038 o - S 26 3 -1 - R M 2038 o - S 26 3 -1 -
R M 2038 o - N 7 2 0 - R M 2038 o - O 31 2 0 -
R M 2039 o - S 18 3 -1 - R M 2039 o - S 18 3 -1 -
R M 2039 o - O 23 2 0 - R M 2039 o - O 23 2 0 -
R M 2040 o - S 2 3 -1 - R M 2040 o - S 2 3 -1 -
@ -220,7 +222,7 @@ R M 2044 o - Au 28 2 0 -
R M 2045 o - Jul 9 3 -1 - R M 2045 o - Jul 9 3 -1 -
R M 2045 o - Au 20 2 0 - R M 2045 o - Au 20 2 0 -
R M 2046 o - Jul 1 3 -1 - R M 2046 o - Jul 1 3 -1 -
R M 2046 o - Au 12 2 0 - R M 2046 o - Au 5 2 0 -
R M 2047 o - Jun 23 3 -1 - R M 2047 o - Jun 23 3 -1 -
R M 2047 o - Jul 28 2 0 - R M 2047 o - Jul 28 2 0 -
R M 2048 o - Jun 7 3 -1 - R M 2048 o - Jun 7 3 -1 -
@ -236,7 +238,7 @@ R M 2052 o - Jun 2 2 0 -
R M 2053 o - Ap 13 3 -1 - R M 2053 o - Ap 13 3 -1 -
R M 2053 o - May 25 2 0 - R M 2053 o - May 25 2 0 -
R M 2054 o - Ap 5 3 -1 - R M 2054 o - Ap 5 3 -1 -
R M 2054 o - May 17 2 0 - R M 2054 o - May 10 2 0 -
R M 2055 o - Mar 28 3 -1 - R M 2055 o - Mar 28 3 -1 -
R M 2055 o - May 2 2 0 - R M 2055 o - May 2 2 0 -
R M 2056 o - Mar 12 3 -1 - R M 2056 o - Mar 12 3 -1 -
@ -252,7 +254,7 @@ R M 2060 o - Mar 7 2 0 -
R M 2061 o - Ja 16 3 -1 - R M 2061 o - Ja 16 3 -1 -
R M 2061 o - F 27 2 0 - R M 2061 o - F 27 2 0 -
R M 2062 o - Ja 8 3 -1 - R M 2062 o - Ja 8 3 -1 -
R M 2062 o - F 19 2 0 - R M 2062 o - F 12 2 0 -
R M 2062 o - D 31 3 -1 - R M 2062 o - D 31 3 -1 -
R M 2063 o - F 4 2 0 - R M 2063 o - F 4 2 0 -
R M 2063 o - D 16 3 -1 - R M 2063 o - D 16 3 -1 -
@ -268,7 +270,7 @@ R M 2067 o - D 11 2 0 -
R M 2068 o - O 21 3 -1 - R M 2068 o - O 21 3 -1 -
R M 2068 o - D 2 2 0 - R M 2068 o - D 2 2 0 -
R M 2069 o - O 13 3 -1 - R M 2069 o - O 13 3 -1 -
R M 2069 o - N 24 2 0 - R M 2069 o - N 17 2 0 -
R M 2070 o - O 5 3 -1 - R M 2070 o - O 5 3 -1 -
R M 2070 o - N 9 2 0 - R M 2070 o - N 9 2 0 -
R M 2071 o - S 20 3 -1 - R M 2071 o - S 20 3 -1 -
@ -284,7 +286,7 @@ R M 2075 o - S 15 2 0 -
R M 2076 o - Jul 26 3 -1 - R M 2076 o - Jul 26 3 -1 -
R M 2076 o - S 6 2 0 - R M 2076 o - S 6 2 0 -
R M 2077 o - Jul 18 3 -1 - R M 2077 o - Jul 18 3 -1 -
R M 2077 o - Au 29 2 0 - R M 2077 o - Au 22 2 0 -
R M 2078 o - Jul 10 3 -1 - R M 2078 o - Jul 10 3 -1 -
R M 2078 o - Au 14 2 0 - R M 2078 o - Au 14 2 0 -
R M 2079 o - Jun 25 3 -1 - R M 2079 o - Jun 25 3 -1 -
@ -294,13 +296,13 @@ R M 2080 o - Jul 21 2 0 -
R M 2081 o - Jun 1 3 -1 - R M 2081 o - Jun 1 3 -1 -
R M 2081 o - Jul 13 2 0 - R M 2081 o - Jul 13 2 0 -
R M 2082 o - May 24 3 -1 - R M 2082 o - May 24 3 -1 -
R M 2082 o - Jul 5 2 0 - R M 2082 o - Jun 28 2 0 -
R M 2083 o - May 16 3 -1 - R M 2083 o - May 16 3 -1 -
R M 2083 o - Jun 20 2 0 - R M 2083 o - Jun 20 2 0 -
R M 2084 o - Ap 30 3 -1 - R M 2084 o - Ap 30 3 -1 -
R M 2084 o - Jun 11 2 0 - R M 2084 o - Jun 11 2 0 -
R M 2085 o - Ap 22 3 -1 - R M 2085 o - Ap 22 3 -1 -
R M 2085 o - Jun 3 2 0 - R M 2085 o - May 27 2 0 -
R M 2086 o - Ap 14 3 -1 - R M 2086 o - Ap 14 3 -1 -
R M 2086 o - May 19 2 0 - R M 2086 o - May 19 2 0 -
R M 2087 o - Mar 30 3 -1 - R M 2087 o - Mar 30 3 -1 -
@ -997,8 +999,86 @@ R P 2020 2021 - Mar Sa<=30 0 1 S
R P 2020 o - O 24 1 0 - R P 2020 o - O 24 1 0 -
R P 2021 o - O 29 1 0 - R P 2021 o - O 29 1 0 -
R P 2022 o - Mar 27 0 1 S R P 2022 o - Mar 27 0 1 S
R P 2022 ma - O Sa<=30 2 0 - R P 2022 2035 - O Sa<=30 2 0 -
R P 2023 ma - Mar Sa<=30 2 1 S R P 2023 o - Ap 29 2 1 S
R P 2024 o - Ap 13 2 1 S
R P 2025 o - Ap 5 2 1 S
R P 2026 2054 - Mar Sa<=30 2 1 S
R P 2036 o - O 18 2 0 -
R P 2037 o - O 10 2 0 -
R P 2038 o - S 25 2 0 -
R P 2039 o - S 17 2 0 -
R P 2039 o - O 22 2 1 S
R P 2039 2067 - O Sa<=30 2 0 -
R P 2040 o - S 1 2 0 -
R P 2040 o - O 13 2 1 S
R P 2041 o - Au 24 2 0 -
R P 2041 o - S 28 2 1 S
R P 2042 o - Au 16 2 0 -
R P 2042 o - S 20 2 1 S
R P 2043 o - Au 1 2 0 -
R P 2043 o - S 12 2 1 S
R P 2044 o - Jul 23 2 0 -
R P 2044 o - Au 27 2 1 S
R P 2045 o - Jul 15 2 0 -
R P 2045 o - Au 19 2 1 S
R P 2046 o - Jun 30 2 0 -
R P 2046 o - Au 11 2 1 S
R P 2047 o - Jun 22 2 0 -
R P 2047 o - Jul 27 2 1 S
R P 2048 o - Jun 6 2 0 -
R P 2048 o - Jul 18 2 1 S
R P 2049 o - May 29 2 0 -
R P 2049 o - Jul 3 2 1 S
R P 2050 o - May 21 2 0 -
R P 2050 o - Jun 25 2 1 S
R P 2051 o - May 6 2 0 -
R P 2051 o - Jun 17 2 1 S
R P 2052 o - Ap 27 2 0 -
R P 2052 o - Jun 1 2 1 S
R P 2053 o - Ap 12 2 0 -
R P 2053 o - May 24 2 1 S
R P 2054 o - Ap 4 2 0 -
R P 2054 o - May 16 2 1 S
R P 2055 o - May 1 2 1 S
R P 2056 o - Ap 22 2 1 S
R P 2057 o - Ap 7 2 1 S
R P 2058 ma - Mar Sa<=30 2 1 S
R P 2068 o - O 20 2 0 -
R P 2069 o - O 12 2 0 -
R P 2070 o - O 4 2 0 -
R P 2071 o - S 19 2 0 -
R P 2072 o - S 10 2 0 -
R P 2072 o - O 15 2 1 S
R P 2073 o - S 2 2 0 -
R P 2073 o - O 7 2 1 S
R P 2074 o - Au 18 2 0 -
R P 2074 o - S 29 2 1 S
R P 2075 o - Au 10 2 0 -
R P 2075 o - S 14 2 1 S
R P 2075 ma - O Sa<=30 2 0 -
R P 2076 o - Jul 25 2 0 -
R P 2076 o - S 5 2 1 S
R P 2077 o - Jul 17 2 0 -
R P 2077 o - Au 28 2 1 S
R P 2078 o - Jul 9 2 0 -
R P 2078 o - Au 13 2 1 S
R P 2079 o - Jun 24 2 0 -
R P 2079 o - Au 5 2 1 S
R P 2080 o - Jun 15 2 0 -
R P 2080 o - Jul 20 2 1 S
R P 2081 o - Jun 7 2 0 -
R P 2081 o - Jul 12 2 1 S
R P 2082 o - May 23 2 0 -
R P 2082 o - Jul 4 2 1 S
R P 2083 o - May 15 2 0 -
R P 2083 o - Jun 19 2 1 S
R P 2084 o - Ap 29 2 0 -
R P 2084 o - Jun 10 2 1 S
R P 2085 o - Ap 21 2 0 -
R P 2085 o - Jun 2 2 1 S
R P 2086 o - Ap 13 2 0 -
R P 2086 o - May 18 2 1 S
Z Asia/Gaza 2:17:52 - LMT 1900 O Z Asia/Gaza 2:17:52 - LMT 1900 O
2 Z EET/EEST 1948 May 15 2 Z EET/EEST 1948 May 15
2 K EE%sT 1967 Jun 5 2 K EE%sT 1967 Jun 5
@ -1754,8 +1834,8 @@ Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
-1 E -01/+00 -1 E -01/+00
Z America/Nuuk -3:26:56 - LMT 1916 Jul 28 Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
-3 - -03 1980 Ap 6 2 -3 - -03 1980 Ap 6 2
-3 E -03/-02 2023 Mar 25 22 -3 E -03/-02 2023 O 29 1u
-2 - -02 -2 E -02/-01
Z America/Thule -4:35:8 - LMT 1916 Jul 28 Z America/Thule -4:35:8 - LMT 1916 Jul 28
-4 Th A%sT -4 Th A%sT
Z Europe/Tallinn 1:39 - LMT 1880 Z Europe/Tallinn 1:39 - LMT 1880
@ -2175,13 +2255,13 @@ Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
3 - +03 1930 Jun 21 3 - +03 1930 Jun 21
4 - +04 1961 N 11 4 - +04 1961 N 11
4 R +04/+05 1988 Mar 27 2s 4 R +04/+05 1988 Mar 27 2s
3 R +03/+04 1991 Mar 31 2s 3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s 4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s 3 R MSK/MSD 2011 Mar 27 2s
4 - +04 2014 O 26 2s 4 - MSK 2014 O 26 2s
3 - +03 2018 O 28 2s 3 - MSK 2018 O 28 2s
4 - +04 2020 D 27 2s 4 - +04 2020 D 27 2s
3 - +03 3 - MSK
Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21 3 - +03 1930 Jun 21
4 R +04/+05 1988 Mar 27 2s 4 R +04/+05 1988 Mar 27 2s
@ -2194,11 +2274,11 @@ Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21 3 - +03 1930 Jun 21
4 R +04/+05 1989 Mar 26 2s 4 R +04/+05 1989 Mar 26 2s
3 R +03/+04 1991 Mar 31 2s 3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s 4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s 3 R MSK/MSD 2011 Mar 27 2s
4 - +04 2014 O 26 2s 4 - MSK 2014 O 26 2s
3 - +03 3 - MSK
Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21 3 - +03 1930 Jun 21
4 - +04 1935 Ja 27 4 - +04 1935 Ja 27
@ -3070,9 +3150,6 @@ Z America/Cambridge_Bay 0 - -00 1920
-5 - EST 2000 N 5 -5 - EST 2000 N 5
-6 - CST 2001 Ap 1 3 -6 - CST 2001 Ap 1 3
-7 C M%sT -7 C M%sT
Z America/Yellowknife 0 - -00 1935
-7 Y M%sT 1980
-7 C M%sT
Z America/Inuvik 0 - -00 1953 Z America/Inuvik 0 - -00 1953
-8 Y P%sT 1979 Ap lastSu 2 -8 Y P%sT 1979 Ap lastSu 2
-7 Y M%sT 1980 -7 Y M%sT 1980
@ -4171,6 +4248,7 @@ L America/Argentina/Cordoba America/Rosario
L America/Tijuana America/Santa_Isabel L America/Tijuana America/Santa_Isabel
L America/Denver America/Shiprock L America/Denver America/Shiprock
L America/Toronto America/Thunder_Bay L America/Toronto America/Thunder_Bay
L America/Edmonton America/Yellowknife
L Pacific/Auckland Antarctica/South_Pole L Pacific/Auckland Antarctica/South_Pole
L Asia/Shanghai Asia/Chongqing L Asia/Shanghai Asia/Chongqing
L Asia/Shanghai Asia/Harbin L Asia/Shanghai Asia/Harbin

View file

@ -121,9 +121,8 @@ CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central) CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas) CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest) CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W) CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west) CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west) CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +4906-11631 America/Creston MST - BC (Creston) CA +4906-11631 America/Creston MST - BC (Creston)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John) CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
@ -139,7 +138,7 @@ CG -0416+01517 Africa/Brazzaville
CH +4723+00832 Europe/Zurich CH +4723+00832 Europe/Zurich
CI +0519-00402 Africa/Abidjan CI +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas) CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island CL -2709-10926 Pacific/Easter Easter Island
CM +0403+00942 Africa/Douala CM +0403+00942 Africa/Douala
@ -151,10 +150,10 @@ CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde CV +1455-02331 Atlantic/Cape_Verde
CW +1211-06900 America/Curacao CW +1211-06900 America/Curacao
CX -1025+10543 Indian/Christmas CX -1025+10543 Indian/Christmas
CY +3510+03322 Asia/Nicosia Cyprus (most areas) CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ +5005+01426 Europe/Prague CZ +5005+01426 Europe/Prague
DE +5230+01322 Europe/Berlin Germany (most areas) DE +5230+01322 Europe/Berlin most of Germany
DE +4742+00841 Europe/Busingen Busingen DE +4742+00841 Europe/Busingen Busingen
DJ +1136+04309 Africa/Djibouti DJ +1136+04309 Africa/Djibouti
DK +5540+01235 Europe/Copenhagen DK +5540+01235 Europe/Copenhagen
@ -187,7 +186,7 @@ GF +0456-05220 America/Cayenne
GG +492717-0023210 Europe/Guernsey GG +492717-0023210 Europe/Guernsey
GH +0533-00013 Africa/Accra GH +0533-00013 Africa/Accra
GI +3608-00521 Europe/Gibraltar GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas) GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast) GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik GL +7634-06847 America/Thule Thule/Pituffik
@ -235,7 +234,7 @@ KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul KR +3733+12658 Asia/Seoul
KW +2920+04759 Asia/Kuwait KW +2920+04759 Asia/Kuwait
KY +1918-08123 America/Cayman KY +1918-08123 America/Cayman
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas) KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe
@ -259,12 +258,12 @@ MD +4700+02850 Europe/Chisinau
ME +4226+01916 Europe/Podgorica ME +4226+01916 Europe/Podgorica
MF +1804-06305 America/Marigot MF +1804-06305 America/Marigot
MG -1855+04731 Indian/Antananarivo MG -1855+04731 Indian/Antananarivo
MH +0709+17112 Pacific/Majuro Marshall Islands (most areas) MH +0709+17112 Pacific/Majuro most of Marshall Islands
MH +0905+16720 Pacific/Kwajalein Kwajalein MH +0905+16720 Pacific/Kwajalein Kwajalein
MK +4159+02126 Europe/Skopje MK +4159+02126 Europe/Skopje
ML +1239-00800 Africa/Bamako ML +1239-00800 Africa/Bamako
MM +1647+09610 Asia/Yangon MM +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas) MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar
MO +221150+1133230 Asia/Macau MO +221150+1133230 Asia/Macau
@ -302,7 +301,7 @@ NO +5955+01045 Europe/Oslo
NP +2743+08519 Asia/Kathmandu NP +2743+08519 Asia/Kathmandu
NR -0031+16655 Pacific/Nauru NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue NU -1901-16955 Pacific/Niue
NZ -3652+17446 Pacific/Auckland New Zealand (most areas) NZ -3652+17446 Pacific/Auckland most of New Zealand
NZ -4357-17633 Pacific/Chatham Chatham Islands NZ -4357-17633 Pacific/Chatham Chatham Islands
OM +2336+05835 Asia/Muscat OM +2336+05835 Asia/Muscat
PA +0858-07932 America/Panama PA +0858-07932 America/Panama
@ -310,7 +309,7 @@ PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands PF -0900-13930 Pacific/Marquesas Marquesas Islands
PF -2308-13457 Pacific/Gambier Gambier Islands PF -2308-13457 Pacific/Gambier Gambier Islands
PG -0930+14710 Pacific/Port_Moresby Papua New Guinea (most areas) PG -0930+14710 Pacific/Port_Moresby most of Papua New Guinea
PG -0613+15534 Pacific/Bougainville Bougainville PG -0613+15534 Pacific/Bougainville Bougainville
PH +1435+12100 Asia/Manila PH +1435+12100 Asia/Manila
PK +2452+06703 Asia/Karachi PK +2452+06703 Asia/Karachi
@ -356,7 +355,7 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
RW -0157+03004 Africa/Kigali RW -0157+03004 Africa/Kigali
@ -397,7 +396,7 @@ TT +1039-06131 America/Port_of_Spain
TV -0831+17913 Pacific/Funafuti TV -0831+17913 Pacific/Funafuti
TW +2503+12130 Asia/Taipei TW +2503+12130 Asia/Taipei
TZ -0648+03917 Africa/Dar_es_Salaam TZ -0648+03917 Africa/Dar_es_Salaam
UA +5026+03031 Europe/Kyiv Ukraine (most areas) UA +5026+03031 Europe/Kyiv most of Ukraine
UG +0019+03225 Africa/Kampala UG +0019+03225 Africa/Kampala
UM +2813-17722 Pacific/Midway Midway Islands UM +2813-17722 Pacific/Midway Midway Islands
UM +1917+16637 Pacific/Wake Wake Island UM +1917+16637 Pacific/Wake Wake Island
@ -420,7 +419,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer) US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas) US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east) US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US +332654-1120424 America/Phoenix MST - Arizona (except Navajo) US +332654-1120424 America/Phoenix MST - AZ (except Navajo)
US +340308-1181434 America/Los_Angeles Pacific US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas) US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area US +581807-1342511 America/Juneau Alaska - Juneau area
@ -428,7 +427,7 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west) US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west) UZ +3940+06648 Asia/Samarkand Uzbekistan (west)

View file

@ -18,7 +18,10 @@
# Please see the theory.html file for how these names are chosen. # Please see the theory.html file for how these names are chosen.
# If multiple timezones overlap a country, each has a row in the # If multiple timezones overlap a country, each has a row in the
# table, with each column 1 containing the country code. # table, with each column 1 containing the country code.
# 4. Comments; present if and only if a country has multiple timezones. # 4. Comments; present if and only if countries have multiple timezones,
# and useful only for those countries. For example, the comments
# for the row with countries CH,DE,LI and name Europe/Zurich
# are useful only for DE, since CH and LI have no other timezones.
# #
# If a timezone covers multiple countries, the most-populous city is used, # If a timezone covers multiple countries, the most-populous city is used,
# and that country is listed first in column 1; any other countries # and that country is listed first in column 1; any other countries
@ -34,7 +37,7 @@
#country- #country-
#codes coordinates TZ comments #codes coordinates TZ comments
AD +4230+00131 Europe/Andorra AD +4230+00131 Europe/Andorra
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai UAE, Oman, Réunion, Seychelles, Crozet, Scattered Is AE,OM,RE,SC,TF +2518+05518 Asia/Dubai Crozet, Scattered Is
AF +3431+06912 Asia/Kabul AF +3431+06912 Asia/Kabul
AL +4120+01950 Europe/Tirane AL +4120+01950 Europe/Tirane
AM +4011+04430 Asia/Yerevan AM +4011+04430 Asia/Yerevan
@ -45,7 +48,7 @@ AQ -6448-06406 Antarctica/Palmer Palmer
AQ -6734-06808 Antarctica/Rothera Rothera AQ -6734-06808 Antarctica/Rothera Rothera
AQ -720041+0023206 Antarctica/Troll Troll AQ -720041+0023206 Antarctica/Troll Troll
AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF) AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF)
AR -3124-06411 America/Argentina/Cordoba Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF) AR -3124-06411 America/Argentina/Cordoba most areas: CB, CC, CN, ER, FM, MN, SE, SF
AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN) AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN)
AR -2411-06518 America/Argentina/Jujuy Jujuy (JY) AR -2411-06518 America/Argentina/Jujuy Jujuy (JY)
AR -2649-06513 America/Argentina/Tucuman Tucumán (TM) AR -2649-06513 America/Argentina/Tucuman Tucumán (TM)
@ -56,7 +59,7 @@ AR -3253-06849 America/Argentina/Mendoza Mendoza (MZ)
AR -3319-06621 America/Argentina/San_Luis San Luis (SL) AR -3319-06621 America/Argentina/San_Luis San Luis (SL)
AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC) AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC)
AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF) AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF)
AS,UM -1416-17042 Pacific/Pago_Pago Samoa, Midway AS,UM -1416-17042 Pacific/Pago_Pago Midway
AT +4813+01620 Europe/Vienna AT +4813+01620 Europe/Vienna
AU -3133+15905 Australia/Lord_Howe Lord Howe Island AU -3133+15905 Australia/Lord_Howe Lord Howe Island
AU -5430+15857 Antarctica/Macquarie Macquarie Island AU -5430+15857 Antarctica/Macquarie Macquarie Island
@ -101,26 +104,25 @@ CA +4439-06336 America/Halifax Atlantic - NS (most areas); PE
CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton) CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton)
CA +4606-06447 America/Moncton Atlantic - New Brunswick CA +4606-06447 America/Moncton Atlantic - New Brunswick
CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas) CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas)
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas), Bahamas CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas)
CA +6344-06828 America/Iqaluit Eastern - NU (most areas) CA +6344-06828 America/Iqaluit Eastern - NU (most areas)
CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba
CA +744144-0944945 America/Resolute Central - NU (Resolute) CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central) CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas) CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest) CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W) CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west) CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west) CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John) CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson) CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson)
CA +6043-13503 America/Whitehorse MST - Yukon (east) CA +6043-13503 America/Whitehorse MST - Yukon (east)
CA +6404-13925 America/Dawson MST - Yukon (west) CA +6404-13925 America/Dawson MST - Yukon (west)
CA +4916-12307 America/Vancouver Pacific - BC (most areas) CA +4916-12307 America/Vancouver Pacific - BC (most areas)
CH,DE,LI +4723+00832 Europe/Zurich Swiss time CH,DE,LI +4723+00832 Europe/Zurich Büsingen
CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas) CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island CL -2709-10926 Pacific/Easter Easter Island
CN +3114+12128 Asia/Shanghai Beijing Time CN +3114+12128 Asia/Shanghai Beijing Time
@ -129,10 +131,10 @@ CO +0436-07405 America/Bogota
CR +0956-08405 America/Costa_Rica CR +0956-08405 America/Costa_Rica
CU +2308-08222 America/Havana CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde CV +1455-02331 Atlantic/Cape_Verde
CY +3510+03322 Asia/Nicosia Cyprus (most areas) CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ,SK +5005+01426 Europe/Prague CZ,SK +5005+01426 Europe/Prague
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin Germany (most areas), Scandinavia DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin most of Germany
DO +1828-06954 America/Santo_Domingo DO +1828-06954 America/Santo_Domingo
DZ +3647+00303 Africa/Algiers DZ +3647+00303 Africa/Algiers
EC -0210-07950 America/Guayaquil Ecuador (mainland) EC -0210-07950 America/Guayaquil Ecuador (mainland)
@ -153,7 +155,7 @@ GB,GG,IM,JE +513030-0000731 Europe/London
GE +4143+04449 Asia/Tbilisi GE +4143+04449 Asia/Tbilisi
GF +0456-05220 America/Cayenne GF +0456-05220 America/Cayenne
GI +3608-00521 Europe/Gibraltar GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas) GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast) GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik GL +7634-06847 America/Thule Thule/Pituffik
@ -183,12 +185,12 @@ JO +3157+03556 Asia/Amman
JP +353916+1394441 Asia/Tokyo JP +353916+1394441 Asia/Tokyo
KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi
KG +4254+07436 Asia/Bishkek KG +4254+07436 Asia/Bishkek
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Tuvalu, Wallis & Futuna, Wake KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Wake
KI -0247-17143 Pacific/Kanton Phoenix Islands KI -0247-17143 Pacific/Kanton Phoenix Islands
KI +0152-15720 Pacific/Kiritimati Line Islands KI +0152-15720 Pacific/Kiritimati Line Islands
KP +3901+12545 Asia/Pyongyang KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul KR +3733+12658 Asia/Seoul
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas) KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe
@ -205,14 +207,14 @@ MA +3339-00735 Africa/Casablanca
MD +4700+02850 Europe/Chisinau MD +4700+02850 Europe/Chisinau
MH +0905+16720 Pacific/Kwajalein Kwajalein MH +0905+16720 Pacific/Kwajalein Kwajalein
MM,CC +1647+09610 Asia/Yangon MM,CC +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas) MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar
MO +221150+1133230 Asia/Macau MO +221150+1133230 Asia/Macau
MQ +1436-06105 America/Martinique MQ +1436-06105 America/Martinique
MT +3554+01431 Europe/Malta MT +3554+01431 Europe/Malta
MU -2010+05730 Indian/Mauritius MU -2010+05730 Indian/Mauritius
MV,TF +0410+07330 Indian/Maldives Maldives, Kerguelen, St Paul I, Amsterdam I MV,TF +0410+07330 Indian/Maldives Kerguelen, St Paul I, Amsterdam I
MX +1924-09909 America/Mexico_City Central Mexico MX +1924-09909 America/Mexico_City Central Mexico
MX +2105-08646 America/Cancun Quintana Roo MX +2105-08646 America/Cancun Quintana Roo
MX +2058-08937 America/Merida Campeche, Yucatán MX +2058-08937 America/Merida Campeche, Yucatán
@ -225,7 +227,7 @@ MX +2313-10625 America/Mazatlan Baja California Sur, Nayarit (most areas), Sinal
MX +2048-10515 America/Bahia_Banderas Bahía de Banderas MX +2048-10515 America/Bahia_Banderas Bahía de Banderas
MX +2904-11058 America/Hermosillo Sonora MX +2904-11058 America/Hermosillo Sonora
MX +3232-11701 America/Tijuana Baja California MX +3232-11701 America/Tijuana Baja California
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak, Brunei MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak
MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time
NA -2234+01706 Africa/Windhoek NA -2234+01706 Africa/Windhoek
NC -2216+16627 Pacific/Noumea NC -2216+16627 Pacific/Noumea
@ -237,7 +239,7 @@ NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue NU -1901-16955 Pacific/Niue
NZ,AQ -3652+17446 Pacific/Auckland New Zealand time NZ,AQ -3652+17446 Pacific/Auckland New Zealand time
NZ -4357-17633 Pacific/Chatham Chatham Islands NZ -4357-17633 Pacific/Chatham Chatham Islands
PA,CA,KY +0858-07932 America/Panama EST - Panama, Cayman, ON (Atikokan), NU (Coral H) PA,CA,KY +0858-07932 America/Panama EST - ON (Atikokan), NU (Coral H)
PE -1203-07703 America/Lima PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands PF -0900-13930 Pacific/Marquesas Marquesas Islands
@ -285,13 +287,13 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Arabia, Syowa SA,AQ,KW,YE +2438+04643 Asia/Riyadh Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Solomons, Pohnpei SB,FM -0932+16012 Pacific/Guadalcanal Pohnpei
SD +1536+03232 Africa/Khartoum SD +1536+03232 Africa/Khartoum
SG,MY +0117+10351 Asia/Singapore Singapore, peninsular Malaysia SG,MY +0117+10351 Asia/Singapore peninsular Malaysia
SR +0550-05510 America/Paramaribo SR +0550-05510 America/Paramaribo
SS +0451+03137 Africa/Juba SS +0451+03137 Africa/Juba
ST +0020+00644 Africa/Sao_Tome ST +0020+00644 Africa/Sao_Tome
@ -299,7 +301,7 @@ SV +1342-08912 America/El_Salvador
SY +3330+03618 Asia/Damascus SY +3330+03618 Asia/Damascus
TC +2128-07108 America/Grand_Turk TC +2128-07108 America/Grand_Turk
TD +1207+01503 Africa/Ndjamena TD +1207+01503 Africa/Ndjamena
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok Indochina (most areas) TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok north Vietnam
TJ +3835+06848 Asia/Dushanbe TJ +3835+06848 Asia/Dushanbe
TK -0922-17114 Pacific/Fakaofo TK -0922-17114 Pacific/Fakaofo
TL -0833+12535 Asia/Dili TL -0833+12535 Asia/Dili
@ -308,7 +310,7 @@ TN +3648+01011 Africa/Tunis
TO -210800-1751200 Pacific/Tongatapu TO -210800-1751200 Pacific/Tongatapu
TR +4101+02858 Europe/Istanbul TR +4101+02858 Europe/Istanbul
TW +2503+12130 Asia/Taipei TW +2503+12130 Asia/Taipei
UA +5026+03031 Europe/Kyiv Ukraine (most areas) UA +5026+03031 Europe/Kyiv most of Ukraine
US +404251-0740023 America/New_York Eastern (most areas) US +404251-0740023 America/New_York Eastern (most areas)
US +421953-0830245 America/Detroit Eastern - MI (most areas) US +421953-0830245 America/Detroit Eastern - MI (most areas)
US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area) US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area)
@ -328,7 +330,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer) US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas) US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east) US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US,CA +332654-1120424 America/Phoenix MST - Arizona (except Navajo), Creston BC US,CA +332654-1120424 America/Phoenix MST - AZ (most areas), Creston BC
US +340308-1181434 America/Los_Angeles Pacific US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas) US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area US +581807-1342511 America/Juneau Alaska - Juneau area
@ -336,13 +338,13 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west) US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands US +515248-1763929 America/Adak Alaska - western Aleutians
US,UM +211825-1575130 Pacific/Honolulu Hawaii US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west) UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
UZ +4120+06918 Asia/Tashkent Uzbekistan (east) UZ +4120+06918 Asia/Tashkent Uzbekistan (east)
VE +1030-06656 America/Caracas VE +1030-06656 America/Caracas
VN +1045+10640 Asia/Ho_Chi_Minh Vietnam (south) VN +1045+10640 Asia/Ho_Chi_Minh south Vietnam
VU -1740+16825 Pacific/Efate VU -1740+16825 Pacific/Efate
WS -1350-17144 Pacific/Apia WS -1350-17144 Pacific/Apia
ZA,LS,SZ -2615+02800 Africa/Johannesburg ZA,LS,SZ -2615+02800 Africa/Johannesburg

View file

@ -243,7 +243,6 @@ America/Iqaluit
America/Resolute America/Resolute
America/Rankin_Inlet America/Rankin_Inlet
America/Cambridge_Bay America/Cambridge_Bay
America/Yellowknife
America/Inuvik America/Inuvik
America/Whitehorse America/Whitehorse
America/Dawson America/Dawson
@ -561,6 +560,7 @@ America/Rosario
America/Santa_Isabel America/Santa_Isabel
America/Shiprock America/Shiprock
America/Thunder_Bay America/Thunder_Bay
America/Yellowknife
Antarctica/South_Pole Antarctica/South_Pole
Asia/Chongqing Asia/Chongqing
Asia/Harbin Asia/Harbin

View file

@ -1,23 +1,48 @@
""" """
Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
""" """
from __future__ import absolute_import
from __future__ import annotations
# Set default logging handler to avoid "No handler found" warnings. # Set default logging handler to avoid "No handler found" warnings.
import logging import logging
import typing
import warnings import warnings
from logging import NullHandler from logging import NullHandler
from . import exceptions from . import exceptions
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from ._version import __version__ from ._version import __version__
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
from .filepost import encode_multipart_formdata from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse from .response import BaseHTTPResponse, HTTPResponse
from .util.request import make_headers from .util.request import make_headers
from .util.retry import Retry from .util.retry import Retry
from .util.timeout import Timeout from .util.timeout import Timeout
from .util.url import get_host
# Ensure that Python is compiled with OpenSSL 1.1.1+
# If the 'ssl' module isn't available at all that's
# fine, we only care if the module is available.
try:
import ssl
except ImportError:
pass
else:
if not ssl.OPENSSL_VERSION.startswith("OpenSSL "): # Defensive:
warnings.warn(
"urllib3 v2.0 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/3020",
exceptions.NotOpenSSLWarning,
)
elif ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # Defensive:
raise ImportError(
"urllib3 v2.0 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/2168"
)
# === NOTE TO REPACKAGERS AND VENDORS === # === NOTE TO REPACKAGERS AND VENDORS ===
# Please delete this block, this logic is only # Please delete this block, this logic is only
@ -25,12 +50,12 @@ from .util.url import get_host
# See: https://github.com/urllib3/urllib3/issues/2680 # See: https://github.com/urllib3/urllib3/issues/2680
try: try:
import urllib3_secure_extra # type: ignore # noqa: F401 import urllib3_secure_extra # type: ignore # noqa: F401
except ImportError: except ModuleNotFoundError:
pass pass
else: else:
warnings.warn( warnings.warn(
"'urllib3[secure]' extra is deprecated and will be removed " "'urllib3[secure]' extra is deprecated and will be removed "
"in a future release of urllib3 2.x. Read more in this issue: " "in urllib3 v2.1.0. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2680", "https://github.com/urllib3/urllib3/issues/2680",
category=DeprecationWarning, category=DeprecationWarning,
stacklevel=2, stacklevel=2,
@ -42,6 +67,7 @@ __version__ = __version__
__all__ = ( __all__ = (
"HTTPConnectionPool", "HTTPConnectionPool",
"HTTPHeaderDict",
"HTTPSConnectionPool", "HTTPSConnectionPool",
"PoolManager", "PoolManager",
"ProxyManager", "ProxyManager",
@ -52,15 +78,18 @@ __all__ = (
"connection_from_url", "connection_from_url",
"disable_warnings", "disable_warnings",
"encode_multipart_formdata", "encode_multipart_formdata",
"get_host",
"make_headers", "make_headers",
"proxy_from_url", "proxy_from_url",
"request",
"BaseHTTPResponse",
) )
logging.getLogger(__name__).addHandler(NullHandler()) logging.getLogger(__name__).addHandler(NullHandler())
def add_stderr_logger(level=logging.DEBUG): def add_stderr_logger(
level: int = logging.DEBUG,
) -> logging.StreamHandler[typing.TextIO]:
""" """
Helper for quickly adding a StreamHandler to the logger. Useful for Helper for quickly adding a StreamHandler to the logger. Useful for
debugging. debugging.
@ -87,16 +116,51 @@ del NullHandler
# mechanisms to silence them. # mechanisms to silence them.
# SecurityWarning's always go off by default. # SecurityWarning's always go off by default.
warnings.simplefilter("always", exceptions.SecurityWarning, append=True) warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
# SubjectAltNameWarning's should go off once per host
warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
# InsecurePlatformWarning's don't vary between requests, so we keep it default. # InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True) warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
# SNIMissingWarnings should go off only once.
warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
def disable_warnings(category=exceptions.HTTPWarning): def disable_warnings(category: type[Warning] = exceptions.HTTPWarning) -> None:
""" """
Helper for quickly disabling all urllib3 warnings. Helper for quickly disabling all urllib3 warnings.
""" """
warnings.simplefilter("ignore", category) warnings.simplefilter("ignore", category)
_DEFAULT_POOL = PoolManager()
def request(
method: str,
url: str,
*,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
preload_content: bool | None = True,
decode_content: bool | None = True,
redirect: bool | None = True,
retries: Retry | bool | int | None = None,
timeout: Timeout | float | int | None = 3,
json: typing.Any | None = None,
) -> BaseHTTPResponse:
"""
A convenience, top-level request method. It uses a module-global ``PoolManager`` instance.
Therefore, its side effects could be shared across dependencies relying on it.
To avoid side effects create a new ``PoolManager`` instance and use it instead.
The method does not accept low-level ``**urlopen_kw`` keyword arguments.
"""
return _DEFAULT_POOL.request(
method,
url,
body=body,
fields=fields,
headers=headers,
preload_content=preload_content,
decode_content=decode_content,
redirect=redirect,
retries=retries,
timeout=timeout,
json=json,
)

View file

@ -0,0 +1,173 @@
from __future__ import annotations
import typing
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from .util.url import Url
_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str]
class ProxyConfig(typing.NamedTuple):
ssl_context: ssl.SSLContext | None
use_forwarding_for_https: bool
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
class _ResponseOptions(typing.NamedTuple):
# TODO: Remove this in favor of a better
# HTTP request/response lifecycle tracking.
request_method: str
request_url: str
preload_content: bool
decode_content: bool
enforce_content_length: bool
if typing.TYPE_CHECKING:
import ssl
from typing_extensions import Literal, Protocol
from .response import BaseHTTPResponse
class BaseHTTPConnection(Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
host: str
port: int
timeout: None | (
float
) # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved.
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool
proxy_is_verified: bool | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
...
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
...
def connect(self) -> None:
...
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
...
def getresponse(self) -> BaseHTTPResponse:
...
def close(self) -> None:
...
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
class BaseHTTPSConnection(BaseHTTPConnection, Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
# Certificate verification methods
cert_reqs: int | str | None
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
ssl_context: ssl.SSLContext | None
# Trusted CAs
ca_certs: str | None
ca_cert_dir: str | None
ca_cert_data: None | str | bytes
# TLS version
ssl_minimum_version: int | None
ssl_maximum_version: int | None
ssl_version: int | str | None # Deprecated
# Client certificates
cert_file: str | None
key_file: str | None
key_password: str | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
...

View file

@ -1,34 +1,66 @@
from __future__ import absolute_import from __future__ import annotations
try:
from collections.abc import Mapping, MutableMapping
except ImportError:
from collections import Mapping, MutableMapping
try:
from threading import RLock
except ImportError: # Platform-specific: No threads available
class RLock:
def __enter__(self):
pass
def __exit__(self, exc_type, exc_value, traceback):
pass
import typing
from collections import OrderedDict from collections import OrderedDict
from enum import Enum, auto
from threading import RLock
if typing.TYPE_CHECKING:
# We can only import Protocol if TYPE_CHECKING because it's a development
# dependency, and is not available at runtime.
from typing_extensions import Protocol
class HasGettableStringKeys(Protocol):
def keys(self) -> typing.Iterator[str]:
...
def __getitem__(self, key: str) -> str:
...
from .exceptions import InvalidHeader
from .packages import six
from .packages.six import iterkeys, itervalues
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"] __all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
_Null = object() # Key type
_KT = typing.TypeVar("_KT")
# Value type
_VT = typing.TypeVar("_VT")
# Default type
_DT = typing.TypeVar("_DT")
ValidHTTPHeaderSource = typing.Union[
"HTTPHeaderDict",
typing.Mapping[str, str],
typing.Iterable[typing.Tuple[str, str]],
"HasGettableStringKeys",
]
class RecentlyUsedContainer(MutableMapping): class _Sentinel(Enum):
not_passed = auto()
def ensure_can_construct_http_header_dict(
potential: object,
) -> ValidHTTPHeaderSource | None:
if isinstance(potential, HTTPHeaderDict):
return potential
elif isinstance(potential, typing.Mapping):
# Full runtime checking of the contents of a Mapping is expensive, so for the
# purposes of typechecking, we assume that any Mapping is the right shape.
return typing.cast(typing.Mapping[str, str], potential)
elif isinstance(potential, typing.Iterable):
# Similarly to Mapping, full runtime checking of the contents of an Iterable is
# expensive, so for the purposes of typechecking, we assume that any Iterable
# is the right shape.
return typing.cast(typing.Iterable[typing.Tuple[str, str]], potential)
elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"):
return typing.cast("HasGettableStringKeys", potential)
else:
return None
class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]):
""" """
Provides a thread-safe dict-like container which maintains up to Provides a thread-safe dict-like container which maintains up to
``maxsize`` keys while throwing away the least-recently-used keys beyond ``maxsize`` keys while throwing away the least-recently-used keys beyond
@ -42,69 +74,134 @@ class RecentlyUsedContainer(MutableMapping):
``dispose_func(value)`` is called. Callback which will get called ``dispose_func(value)`` is called. Callback which will get called
""" """
ContainerCls = OrderedDict _container: typing.OrderedDict[_KT, _VT]
_maxsize: int
dispose_func: typing.Callable[[_VT], None] | None
lock: RLock
def __init__(self, maxsize=10, dispose_func=None): def __init__(
self,
maxsize: int = 10,
dispose_func: typing.Callable[[_VT], None] | None = None,
) -> None:
super().__init__()
self._maxsize = maxsize self._maxsize = maxsize
self.dispose_func = dispose_func self.dispose_func = dispose_func
self._container = OrderedDict()
self._container = self.ContainerCls()
self.lock = RLock() self.lock = RLock()
def __getitem__(self, key): def __getitem__(self, key: _KT) -> _VT:
# Re-insert the item, moving it to the end of the eviction line. # Re-insert the item, moving it to the end of the eviction line.
with self.lock: with self.lock:
item = self._container.pop(key) item = self._container.pop(key)
self._container[key] = item self._container[key] = item
return item return item
def __setitem__(self, key, value): def __setitem__(self, key: _KT, value: _VT) -> None:
evicted_value = _Null evicted_item = None
with self.lock: with self.lock:
# Possibly evict the existing value of 'key' # Possibly evict the existing value of 'key'
evicted_value = self._container.get(key, _Null) try:
# If the key exists, we'll overwrite it, which won't change the
# size of the pool. Because accessing a key should move it to
# the end of the eviction line, we pop it out first.
evicted_item = key, self._container.pop(key)
self._container[key] = value
except KeyError:
# When the key does not exist, we insert the value first so that
# evicting works in all cases, including when self._maxsize is 0
self._container[key] = value self._container[key] = value
# If we didn't evict an existing value, we might have to evict the
# least recently used item from the beginning of the container.
if len(self._container) > self._maxsize: if len(self._container) > self._maxsize:
_key, evicted_value = self._container.popitem(last=False) # If we didn't evict an existing value, and we've hit our maximum
# size, then we have to evict the least recently used item from
# the beginning of the container.
evicted_item = self._container.popitem(last=False)
if self.dispose_func and evicted_value is not _Null: # After releasing the lock on the pool, dispose of any evicted value.
if evicted_item is not None and self.dispose_func:
_, evicted_value = evicted_item
self.dispose_func(evicted_value) self.dispose_func(evicted_value)
def __delitem__(self, key): def __delitem__(self, key: _KT) -> None:
with self.lock: with self.lock:
value = self._container.pop(key) value = self._container.pop(key)
if self.dispose_func: if self.dispose_func:
self.dispose_func(value) self.dispose_func(value)
def __len__(self): def __len__(self) -> int:
with self.lock: with self.lock:
return len(self._container) return len(self._container)
def __iter__(self): def __iter__(self) -> typing.NoReturn:
raise NotImplementedError( raise NotImplementedError(
"Iteration over this class is unlikely to be threadsafe." "Iteration over this class is unlikely to be threadsafe."
) )
def clear(self): def clear(self) -> None:
with self.lock: with self.lock:
# Copy pointers to all values, then wipe the mapping # Copy pointers to all values, then wipe the mapping
values = list(itervalues(self._container)) values = list(self._container.values())
self._container.clear() self._container.clear()
if self.dispose_func: if self.dispose_func:
for value in values: for value in values:
self.dispose_func(value) self.dispose_func(value)
def keys(self): def keys(self) -> set[_KT]: # type: ignore[override]
with self.lock: with self.lock:
return list(iterkeys(self._container)) return set(self._container.keys())
class HTTPHeaderDict(MutableMapping): class HTTPHeaderDictItemView(typing.Set[typing.Tuple[str, str]]):
"""
HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of
address.
If we directly try to get an item with a particular name, we will get a string
back that is the concatenated version of all the values:
>>> d['X-Header-Name']
'Value1, Value2, Value3'
However, if we iterate over an HTTPHeaderDict's items, we will optionally combine
these values based on whether combine=True was called when building up the dictionary
>>> d = HTTPHeaderDict({"A": "1", "B": "foo"})
>>> d.add("A", "2", combine=True)
>>> d.add("B", "bar")
>>> list(d.items())
[
('A', '1, 2'),
('B', 'foo'),
('B', 'bar'),
]
This class conforms to the interface required by the MutableMapping ABC while
also giving us the nonstandard iteration behavior we want; items with duplicate
keys, ordered by time of first insertion.
"""
_headers: HTTPHeaderDict
def __init__(self, headers: HTTPHeaderDict) -> None:
self._headers = headers
def __len__(self) -> int:
return len(list(self._headers.iteritems()))
def __iter__(self) -> typing.Iterator[tuple[str, str]]:
return self._headers.iteritems()
def __contains__(self, item: object) -> bool:
if isinstance(item, tuple) and len(item) == 2:
passed_key, passed_val = item
if isinstance(passed_key, str) and isinstance(passed_val, str):
return self._headers._has_value_for_header(passed_key, passed_val)
return False
class HTTPHeaderDict(typing.MutableMapping[str, str]):
""" """
:param headers: :param headers:
An iterable of field-value pairs. Must not contain multiple field names An iterable of field-value pairs. Must not contain multiple field names
@ -138,9 +235,11 @@ class HTTPHeaderDict(MutableMapping):
'7' '7'
""" """
def __init__(self, headers=None, **kwargs): _container: typing.MutableMapping[str, list[str]]
super(HTTPHeaderDict, self).__init__()
self._container = OrderedDict() def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str):
super().__init__()
self._container = {} # 'dict' is insert-ordered in Python 3.7+
if headers is not None: if headers is not None:
if isinstance(headers, HTTPHeaderDict): if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers) self._copy_from(headers)
@ -149,123 +248,147 @@ class HTTPHeaderDict(MutableMapping):
if kwargs: if kwargs:
self.extend(kwargs) self.extend(kwargs)
def __setitem__(self, key, val): def __setitem__(self, key: str, val: str) -> None:
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
self._container[key.lower()] = [key, val] self._container[key.lower()] = [key, val]
return self._container[key.lower()]
def __getitem__(self, key): def __getitem__(self, key: str) -> str:
val = self._container[key.lower()] val = self._container[key.lower()]
return ", ".join(val[1:]) return ", ".join(val[1:])
def __delitem__(self, key): def __delitem__(self, key: str) -> None:
del self._container[key.lower()] del self._container[key.lower()]
def __contains__(self, key): def __contains__(self, key: object) -> bool:
if isinstance(key, str):
return key.lower() in self._container return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, "keys"):
return False return False
if not isinstance(other, type(self)):
other = type(self)(other)
return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
(k.lower(), v) for k, v in other.itermerged()
)
def __ne__(self, other): def setdefault(self, key: str, default: str = "") -> str:
return super().setdefault(key, default)
def __eq__(self, other: object) -> bool:
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return False
else:
other_as_http_header_dict = type(self)(maybe_constructable)
return {k.lower(): v for k, v in self.itermerged()} == {
k.lower(): v for k, v in other_as_http_header_dict.itermerged()
}
def __ne__(self, other: object) -> bool:
return not self.__eq__(other) return not self.__eq__(other)
if six.PY2: # Python 2 def __len__(self) -> int:
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
def __len__(self):
return len(self._container) return len(self._container)
def __iter__(self): def __iter__(self) -> typing.Iterator[str]:
# Only provide the originally cased names # Only provide the originally cased names
for vals in self._container.values(): for vals in self._container.values():
yield vals[0] yield vals[0]
def pop(self, key, default=__marker): def discard(self, key: str) -> None:
"""D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
"""
# Using the MutableMapping function directly fails due to the private marker.
# Using ordinary dict.pop would expose the internal structures.
# So let's reinvent the wheel.
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def discard(self, key):
try: try:
del self[key] del self[key]
except KeyError: except KeyError:
pass pass
def add(self, key, val): def add(self, key: str, val: str, *, combine: bool = False) -> None:
"""Adds a (name, value) pair, doesn't overwrite the value if it already """Adds a (name, value) pair, doesn't overwrite the value if it already
exists. exists.
If this is called with combine=True, instead of adding a new header value
as a distinct item during iteration, this will instead append the value to
any existing header value with a comma. If no existing header value exists
for the key, then the value will simply be added, ignoring the combine parameter.
>>> headers = HTTPHeaderDict(foo='bar') >>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz') >>> headers.add('Foo', 'baz')
>>> headers['foo'] >>> headers['foo']
'bar, baz' 'bar, baz'
>>> list(headers.items())
[('foo', 'bar'), ('foo', 'baz')]
>>> headers.add('foo', 'quz', combine=True)
>>> list(headers.items())
[('foo', 'bar, baz, quz')]
""" """
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
key_lower = key.lower() key_lower = key.lower()
new_vals = [key, val] new_vals = [key, val]
# Keep the common case aka no item present as fast as possible # Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals) vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals: if new_vals is not vals:
# if there are values here, then there is at least the initial
# key/value pair
assert len(vals) >= 2
if combine:
vals[-1] = vals[-1] + ", " + val
else:
vals.append(val) vals.append(val)
def extend(self, *args, **kwargs): def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None:
"""Generic import function for any type of header-like object. """Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__ with self.add instead of self.__setitem__
""" """
if len(args) > 1: if len(args) > 1:
raise TypeError( raise TypeError(
"extend() takes at most 1 positional " f"extend() takes at most 1 positional arguments ({len(args)} given)"
"arguments ({0} given)".format(len(args))
) )
other = args[0] if len(args) >= 1 else () other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict): if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems(): for key, val in other.iteritems():
self.add(key, val) self.add(key, val)
elif isinstance(other, Mapping): elif isinstance(other, typing.Mapping):
for key in other: for key, val in other.items():
self.add(key, other[key]) self.add(key, val)
elif hasattr(other, "keys"): elif isinstance(other, typing.Iterable):
for key in other.keys(): other = typing.cast(typing.Iterable[typing.Tuple[str, str]], other)
self.add(key, other[key])
else:
for key, value in other: for key, value in other:
self.add(key, value) self.add(key, value)
elif hasattr(other, "keys") and hasattr(other, "__getitem__"):
# THIS IS NOT A TYPESAFE BRANCH
# In this branch, the object has a `keys` attr but is not a Mapping or any of
# the other types indicated in the method signature. We do some stuff with
# it as though it partially implements the Mapping interface, but we're not
# doing that stuff safely AT ALL.
for key in other.keys():
self.add(key, other[key])
for key, value in kwargs.items(): for key, value in kwargs.items():
self.add(key, value) self.add(key, value)
def getlist(self, key, default=__marker): @typing.overload
def getlist(self, key: str) -> list[str]:
...
@typing.overload
def getlist(self, key: str, default: _DT) -> list[str] | _DT:
...
def getlist(
self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed
) -> list[str] | _DT:
"""Returns a list of all the values for the named field. Returns an """Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist.""" empty list if the key doesn't exist."""
try: try:
vals = self._container[key.lower()] vals = self._container[key.lower()]
except KeyError: except KeyError:
if default is self.__marker: if default is _Sentinel.not_passed:
# _DT is unbound; empty list is instance of List[str]
return [] return []
# _DT is bound; default is instance of _DT
return default return default
else: else:
# _DT may or may not be bound; vals[1:] is instance of List[str], which
# meets our external interface requirement of `Union[List[str], _DT]`.
return vals[1:] return vals[1:]
# Backwards compatibility for httplib # Backwards compatibility for httplib
@ -276,62 +399,65 @@ class HTTPHeaderDict(MutableMapping):
# Backwards compatibility for http.cookiejar # Backwards compatibility for http.cookiejar
get_all = getlist get_all = getlist
def __repr__(self): def __repr__(self) -> str:
return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) return f"{type(self).__name__}({dict(self.itermerged())})"
def _copy_from(self, other): def _copy_from(self, other: HTTPHeaderDict) -> None:
for key in other: for key in other:
val = other.getlist(key) val = other.getlist(key)
if isinstance(val, list): self._container[key.lower()] = [key, *val]
# Don't need to convert tuples
val = list(val)
self._container[key.lower()] = [key] + val
def copy(self): def copy(self) -> HTTPHeaderDict:
clone = type(self)() clone = type(self)()
clone._copy_from(self) clone._copy_from(self)
return clone return clone
def iteritems(self): def iteritems(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all header lines, including duplicate ones.""" """Iterate over all header lines, including duplicate ones."""
for key in self: for key in self:
vals = self._container[key.lower()] vals = self._container[key.lower()]
for val in vals[1:]: for val in vals[1:]:
yield vals[0], val yield vals[0], val
def itermerged(self): def itermerged(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all headers, merging duplicate ones together.""" """Iterate over all headers, merging duplicate ones together."""
for key in self: for key in self:
val = self._container[key.lower()] val = self._container[key.lower()]
yield val[0], ", ".join(val[1:]) yield val[0], ", ".join(val[1:])
def items(self): def items(self) -> HTTPHeaderDictItemView: # type: ignore[override]
return list(self.iteritems()) return HTTPHeaderDictItemView(self)
@classmethod def _has_value_for_header(self, header_name: str, potential_value: str) -> bool:
def from_httplib(cls, message): # Python 2 if header_name in self:
"""Read headers from a Python 2 httplib message object.""" return potential_value in self._container[header_name.lower()][1:]
# python2.7 does not expose a proper API for exporting multiheaders return False
# efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
obs_fold_continued_leaders = (" ", "\t")
headers = []
for line in message.headers: def __ior__(self, other: object) -> HTTPHeaderDict:
if line.startswith(obs_fold_continued_leaders): # Supports extending a header dict in-place using operator |=
if not headers: # combining items with add instead of __setitem__
# We received a header line that starts with OWS as described maybe_constructable = ensure_can_construct_http_header_dict(other)
# in RFC-7230 S3.2.4. This indicates a multiline header, but if maybe_constructable is None:
# there exists no previous header to which we can attach it. return NotImplemented
raise InvalidHeader( self.extend(maybe_constructable)
"Header continuation with no previous header: %s" % line return self
)
else:
key, value = headers[-1]
headers[-1] = (key, value + " " + line.strip())
continue
key, value = line.split(":", 1) def __or__(self, other: object) -> HTTPHeaderDict:
headers.append((key, value.strip())) # Supports merging header dicts using operator |
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = self.copy()
result.extend(maybe_constructable)
return result
return cls(headers) def __ror__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator | when other is on left side
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = type(self)(maybe_constructable)
result.extend(self)
return result

View file

@ -1,12 +1,23 @@
from __future__ import absolute_import from __future__ import annotations
from .filepost import encode_multipart_formdata import json as _json
from .packages.six.moves.urllib.parse import urlencode import typing
from urllib.parse import urlencode
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .response import BaseHTTPResponse
__all__ = ["RequestMethods"] __all__ = ["RequestMethods"]
_TYPE_ENCODE_URL_FIELDS = typing.Union[
typing.Sequence[typing.Tuple[str, typing.Union[str, bytes]]],
typing.Mapping[str, typing.Union[str, bytes]],
]
class RequestMethods(object):
class RequestMethods:
""" """
Convenience mixin for classes who implement a :meth:`urlopen` method, such Convenience mixin for classes who implement a :meth:`urlopen` method, such
as :class:`urllib3.HTTPConnectionPool` and as :class:`urllib3.HTTPConnectionPool` and
@ -37,25 +48,34 @@ class RequestMethods(object):
_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"} _encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
def __init__(self, headers=None): def __init__(self, headers: typing.Mapping[str, str] | None = None) -> None:
self.headers = headers or {} self.headers = headers or {}
def urlopen( def urlopen(
self, self,
method, method: str,
url, url: str,
body=None, body: _TYPE_BODY | None = None,
headers=None, headers: typing.Mapping[str, str] | None = None,
encode_multipart=True, encode_multipart: bool = True,
multipart_boundary=None, multipart_boundary: str | None = None,
**kw **kw: typing.Any,
): # Abstract ) -> BaseHTTPResponse: # Abstract
raise NotImplementedError( raise NotImplementedError(
"Classes extending RequestMethods must implement " "Classes extending RequestMethods must implement "
"their own ``urlopen`` method." "their own ``urlopen`` method."
) )
def request(self, method, url, fields=None, headers=None, **urlopen_kw): def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
json: typing.Any | None = None,
**urlopen_kw: typing.Any,
) -> BaseHTTPResponse:
""" """
Make a request using :meth:`urlopen` with the appropriate encoding of Make a request using :meth:`urlopen` with the appropriate encoding of
``fields`` based on the ``method`` used. ``fields`` based on the ``method`` used.
@ -68,18 +88,45 @@ class RequestMethods(object):
""" """
method = method.upper() method = method.upper()
urlopen_kw["request_url"] = url if json is not None and body is not None:
raise TypeError(
"request got values for both 'body' and 'json' parameters which are mutually exclusive"
)
if json is not None:
if headers is None:
headers = self.headers.copy() # type: ignore
if not ("content-type" in map(str.lower, headers.keys())):
headers["Content-Type"] = "application/json" # type: ignore
body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode(
"utf-8"
)
if body is not None:
urlopen_kw["body"] = body
if method in self._encode_url_methods: if method in self._encode_url_methods:
return self.request_encode_url( return self.request_encode_url(
method, url, fields=fields, headers=headers, **urlopen_kw method,
url,
fields=fields, # type: ignore[arg-type]
headers=headers,
**urlopen_kw,
) )
else: else:
return self.request_encode_body( return self.request_encode_body(
method, url, fields=fields, headers=headers, **urlopen_kw method, url, fields=fields, headers=headers, **urlopen_kw
) )
def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw): def request_encode_url(
self,
method: str,
url: str,
fields: _TYPE_ENCODE_URL_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
""" """
Make a request using :meth:`urlopen` with the ``fields`` encoded in Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc. the url. This is useful for request methods like GET, HEAD, DELETE, etc.
@ -87,7 +134,7 @@ class RequestMethods(object):
if headers is None: if headers is None:
headers = self.headers headers = self.headers
extra_kw = {"headers": headers} extra_kw: dict[str, typing.Any] = {"headers": headers}
extra_kw.update(urlopen_kw) extra_kw.update(urlopen_kw)
if fields: if fields:
@ -97,14 +144,14 @@ class RequestMethods(object):
def request_encode_body( def request_encode_body(
self, self,
method, method: str,
url, url: str,
fields=None, fields: _TYPE_FIELDS | None = None,
headers=None, headers: typing.Mapping[str, str] | None = None,
encode_multipart=True, encode_multipart: bool = True,
multipart_boundary=None, multipart_boundary: str | None = None,
**urlopen_kw **urlopen_kw: str,
): ) -> BaseHTTPResponse:
""" """
Make a request using :meth:`urlopen` with the ``fields`` encoded in Make a request using :meth:`urlopen` with the ``fields`` encoded in
the body. This is useful for request methods like POST, PUT, PATCH, etc. the body. This is useful for request methods like POST, PUT, PATCH, etc.
@ -143,7 +190,8 @@ class RequestMethods(object):
if headers is None: if headers is None:
headers = self.headers headers = self.headers
extra_kw = {"headers": {}} extra_kw: dict[str, typing.Any] = {"headers": HTTPHeaderDict(headers)}
body: bytes | str
if fields: if fields:
if "body" in urlopen_kw: if "body" in urlopen_kw:
@ -157,14 +205,13 @@ class RequestMethods(object):
) )
else: else:
body, content_type = ( body, content_type = (
urlencode(fields), urlencode(fields), # type: ignore[arg-type]
"application/x-www-form-urlencoded", "application/x-www-form-urlencoded",
) )
extra_kw["body"] = body extra_kw["body"] = body
extra_kw["headers"] = {"Content-Type": content_type} extra_kw["headers"].setdefault("Content-Type", content_type)
extra_kw["headers"].update(headers)
extra_kw.update(urlopen_kw) extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw) return self.urlopen(method, url, **extra_kw)

View file

@ -1,2 +1,4 @@
# This file is protected via CODEOWNERS # This file is protected via CODEOWNERS
__version__ = "1.26.15" from __future__ import annotations
__version__ = "2.0.4"

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,36 +0,0 @@
"""
This module provides means to detect the App Engine environment.
"""
import os
def is_appengine():
return is_local_appengine() or is_prod_appengine()
def is_appengine_sandbox():
"""Reports if the app is running in the first generation sandbox.
The second generation runtimes are technically still in a sandbox, but it
is much less restrictive, so generally you shouldn't need to check for it.
see https://cloud.google.com/appengine/docs/standard/runtimes
"""
return is_appengine() and os.environ["APPENGINE_RUNTIME"] == "python27"
def is_local_appengine():
return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
"SERVER_SOFTWARE", ""
).startswith("Development/")
def is_prod_appengine():
return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
"SERVER_SOFTWARE", ""
).startswith("Google App Engine/")
def is_prod_appengine_mvms():
"""Deprecated."""
return False

View file

@ -1,3 +1,5 @@
# type: ignore
""" """
This module uses ctypes to bind a whole bunch of functions and constants from This module uses ctypes to bind a whole bunch of functions and constants from
SecureTransport. The goal here is to provide the low-level API to SecureTransport. The goal here is to provide the low-level API to
@ -29,7 +31,8 @@ license and by oscrypto's:
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. DEALINGS IN THE SOFTWARE.
""" """
from __future__ import absolute_import
from __future__ import annotations
import platform import platform
from ctypes import ( from ctypes import (
@ -48,8 +51,6 @@ from ctypes import (
) )
from ctypes.util import find_library from ctypes.util import find_library
from ...packages.six import raise_from
if platform.system() != "Darwin": if platform.system() != "Darwin":
raise ImportError("Only macOS is supported") raise ImportError("Only macOS is supported")
@ -57,16 +58,16 @@ version = platform.mac_ver()[0]
version_info = tuple(map(int, version.split("."))) version_info = tuple(map(int, version.split(".")))
if version_info < (10, 8): if version_info < (10, 8):
raise OSError( raise OSError(
"Only OS X 10.8 and newer are supported, not %s.%s" f"Only OS X 10.8 and newer are supported, not {version_info[0]}.{version_info[1]}"
% (version_info[0], version_info[1])
) )
def load_cdll(name, macos10_16_path): def load_cdll(name: str, macos10_16_path: str) -> CDLL:
"""Loads a CDLL by name, falling back to known path on 10.16+""" """Loads a CDLL by name, falling back to known path on 10.16+"""
try: try:
# Big Sur is technically 11 but we use 10.16 due to the Big Sur # Big Sur is technically 11 but we use 10.16 due to the Big Sur
# beta being labeled as 10.16. # beta being labeled as 10.16.
path: str | None
if version_info >= (10, 16): if version_info >= (10, 16):
path = macos10_16_path path = macos10_16_path
else: else:
@ -75,7 +76,7 @@ def load_cdll(name, macos10_16_path):
raise OSError # Caught and reraised as 'ImportError' raise OSError # Caught and reraised as 'ImportError'
return CDLL(path, use_errno=True) return CDLL(path, use_errno=True)
except OSError: except OSError:
raise_from(ImportError("The library %s failed to load" % name), None) raise ImportError(f"The library {name} failed to load") from None
Security = load_cdll( Security = load_cdll(
@ -416,104 +417,14 @@ try:
CoreFoundation.CFStringRef = CFStringRef CoreFoundation.CFStringRef = CFStringRef
CoreFoundation.CFDictionaryRef = CFDictionaryRef CoreFoundation.CFDictionaryRef = CFDictionaryRef
except (AttributeError): except AttributeError:
raise ImportError("Error initializing ctypes") raise ImportError("Error initializing ctypes") from None
class CFConst(object): class CFConst:
""" """
A class object that acts as essentially a namespace for CoreFoundation A class object that acts as essentially a namespace for CoreFoundation
constants. constants.
""" """
kCFStringEncodingUTF8 = CFStringEncoding(0x08000100) kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
class SecurityConst(object):
"""
A class object that acts as essentially a namespace for Security constants.
"""
kSSLSessionOptionBreakOnServerAuth = 0
kSSLProtocol2 = 1
kSSLProtocol3 = 2
kTLSProtocol1 = 4
kTLSProtocol11 = 7
kTLSProtocol12 = 8
# SecureTransport does not support TLS 1.3 even if there's a constant for it
kTLSProtocol13 = 10
kTLSProtocolMaxSupported = 999
kSSLClientSide = 1
kSSLStreamType = 0
kSecFormatPEMSequence = 10
kSecTrustResultInvalid = 0
kSecTrustResultProceed = 1
# This gap is present on purpose: this was kSecTrustResultConfirm, which
# is deprecated.
kSecTrustResultDeny = 3
kSecTrustResultUnspecified = 4
kSecTrustResultRecoverableTrustFailure = 5
kSecTrustResultFatalTrustFailure = 6
kSecTrustResultOtherError = 7
errSSLProtocol = -9800
errSSLWouldBlock = -9803
errSSLClosedGraceful = -9805
errSSLClosedNoNotify = -9816
errSSLClosedAbort = -9806
errSSLXCertChainInvalid = -9807
errSSLCrypto = -9809
errSSLInternal = -9810
errSSLCertExpired = -9814
errSSLCertNotYetValid = -9815
errSSLUnknownRootCert = -9812
errSSLNoRootCert = -9813
errSSLHostNameMismatch = -9843
errSSLPeerHandshakeFail = -9824
errSSLPeerUserCancelled = -9839
errSSLWeakPeerEphemeralDHKey = -9850
errSSLServerAuthCompleted = -9841
errSSLRecordOverflow = -9847
errSecVerifyFailed = -67808
errSecNoTrustSettings = -25263
errSecItemNotFound = -25300
errSecInvalidTrustSettings = -25262
# Cipher suites. We only pick the ones our default cipher string allows.
# Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9
TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8
TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
TLS_AES_128_GCM_SHA256 = 0x1301
TLS_AES_256_GCM_SHA384 = 0x1302
TLS_AES_128_CCM_8_SHA256 = 0x1305
TLS_AES_128_CCM_SHA256 = 0x1304

View file

@ -7,6 +7,8 @@ CoreFoundation messing about and memory management. The concerns in this module
are almost entirely about trying to avoid memory leaks and providing are almost entirely about trying to avoid memory leaks and providing
appropriate and useful assistance to the higher-level code. appropriate and useful assistance to the higher-level code.
""" """
from __future__ import annotations
import base64 import base64
import ctypes import ctypes
import itertools import itertools
@ -15,8 +17,20 @@ import re
import ssl import ssl
import struct import struct
import tempfile import tempfile
import typing
from .bindings import CFConst, CoreFoundation, Security from .bindings import ( # type: ignore[attr-defined]
CFArray,
CFConst,
CFData,
CFDictionary,
CFMutableArray,
CFString,
CFTypeRef,
CoreFoundation,
SecKeychainRef,
Security,
)
# This regular expression is used to grab PEM data out of a PEM bundle. # This regular expression is used to grab PEM data out of a PEM bundle.
_PEM_CERTS_RE = re.compile( _PEM_CERTS_RE = re.compile(
@ -24,7 +38,7 @@ _PEM_CERTS_RE = re.compile(
) )
def _cf_data_from_bytes(bytestring): def _cf_data_from_bytes(bytestring: bytes) -> CFData:
""" """
Given a bytestring, create a CFData object from it. This CFData object must Given a bytestring, create a CFData object from it. This CFData object must
be CFReleased by the caller. be CFReleased by the caller.
@ -34,7 +48,9 @@ def _cf_data_from_bytes(bytestring):
) )
def _cf_dictionary_from_tuples(tuples): def _cf_dictionary_from_tuples(
tuples: list[tuple[typing.Any, typing.Any]]
) -> CFDictionary:
""" """
Given a list of Python tuples, create an associated CFDictionary. Given a list of Python tuples, create an associated CFDictionary.
""" """
@ -56,7 +72,7 @@ def _cf_dictionary_from_tuples(tuples):
) )
def _cfstr(py_bstr): def _cfstr(py_bstr: bytes) -> CFString:
""" """
Given a Python binary data, create a CFString. Given a Python binary data, create a CFString.
The string must be CFReleased by the caller. The string must be CFReleased by the caller.
@ -70,7 +86,7 @@ def _cfstr(py_bstr):
return cf_str return cf_str
def _create_cfstring_array(lst): def _create_cfstring_array(lst: list[bytes]) -> CFMutableArray:
""" """
Given a list of Python binary data, create an associated CFMutableArray. Given a list of Python binary data, create an associated CFMutableArray.
The array must be CFReleased by the caller. The array must be CFReleased by the caller.
@ -97,11 +113,11 @@ def _create_cfstring_array(lst):
except BaseException as e: except BaseException as e:
if cf_arr: if cf_arr:
CoreFoundation.CFRelease(cf_arr) CoreFoundation.CFRelease(cf_arr)
raise ssl.SSLError("Unable to allocate array: %s" % (e,)) raise ssl.SSLError(f"Unable to allocate array: {e}") from None
return cf_arr return cf_arr
def _cf_string_to_unicode(value): def _cf_string_to_unicode(value: CFString) -> str | None:
""" """
Creates a Unicode string from a CFString object. Used entirely for error Creates a Unicode string from a CFString object. Used entirely for error
reporting. reporting.
@ -123,10 +139,12 @@ def _cf_string_to_unicode(value):
string = buffer.value string = buffer.value
if string is not None: if string is not None:
string = string.decode("utf-8") string = string.decode("utf-8")
return string return string # type: ignore[no-any-return]
def _assert_no_error(error, exception_class=None): def _assert_no_error(
error: int, exception_class: type[BaseException] | None = None
) -> None:
""" """
Checks the return code and throws an exception if there is an error to Checks the return code and throws an exception if there is an error to
report report
@ -138,8 +156,8 @@ def _assert_no_error(error, exception_class=None):
output = _cf_string_to_unicode(cf_error_string) output = _cf_string_to_unicode(cf_error_string)
CoreFoundation.CFRelease(cf_error_string) CoreFoundation.CFRelease(cf_error_string)
if output is None or output == u"": if output is None or output == "":
output = u"OSStatus %s" % error output = f"OSStatus {error}"
if exception_class is None: if exception_class is None:
exception_class = ssl.SSLError exception_class = ssl.SSLError
@ -147,7 +165,7 @@ def _assert_no_error(error, exception_class=None):
raise exception_class(output) raise exception_class(output)
def _cert_array_from_pem(pem_bundle): def _cert_array_from_pem(pem_bundle: bytes) -> CFArray:
""" """
Given a bundle of certs in PEM format, turns them into a CFArray of certs Given a bundle of certs in PEM format, turns them into a CFArray of certs
that can be used to validate a cert chain. that can be used to validate a cert chain.
@ -193,23 +211,23 @@ def _cert_array_from_pem(pem_bundle):
return cert_array return cert_array
def _is_cert(item): def _is_cert(item: CFTypeRef) -> bool:
""" """
Returns True if a given CFTypeRef is a certificate. Returns True if a given CFTypeRef is a certificate.
""" """
expected = Security.SecCertificateGetTypeID() expected = Security.SecCertificateGetTypeID()
return CoreFoundation.CFGetTypeID(item) == expected return CoreFoundation.CFGetTypeID(item) == expected # type: ignore[no-any-return]
def _is_identity(item): def _is_identity(item: CFTypeRef) -> bool:
""" """
Returns True if a given CFTypeRef is an identity. Returns True if a given CFTypeRef is an identity.
""" """
expected = Security.SecIdentityGetTypeID() expected = Security.SecIdentityGetTypeID()
return CoreFoundation.CFGetTypeID(item) == expected return CoreFoundation.CFGetTypeID(item) == expected # type: ignore[no-any-return]
def _temporary_keychain(): def _temporary_keychain() -> tuple[SecKeychainRef, str]:
""" """
This function creates a temporary Mac keychain that we can use to work with This function creates a temporary Mac keychain that we can use to work with
credentials. This keychain uses a one-time password and a temporary file to credentials. This keychain uses a one-time password and a temporary file to
@ -244,7 +262,9 @@ def _temporary_keychain():
return keychain, tempdirectory return keychain, tempdirectory
def _load_items_from_file(keychain, path): def _load_items_from_file(
keychain: SecKeychainRef, path: str
) -> tuple[list[CFTypeRef], list[CFTypeRef]]:
""" """
Given a single file, loads all the trust objects from it into arrays and Given a single file, loads all the trust objects from it into arrays and
the keychain. the keychain.
@ -299,7 +319,7 @@ def _load_items_from_file(keychain, path):
return (identities, certificates) return (identities, certificates)
def _load_client_cert_chain(keychain, *paths): def _load_client_cert_chain(keychain: SecKeychainRef, *paths: str | None) -> CFArray:
""" """
Load certificates and maybe keys from a number of files. Has the end goal Load certificates and maybe keys from a number of files. Has the end goal
of returning a CFArray containing one SecIdentityRef, and then zero or more of returning a CFArray containing one SecIdentityRef, and then zero or more
@ -335,10 +355,10 @@ def _load_client_cert_chain(keychain, *paths):
identities = [] identities = []
# Filter out bad paths. # Filter out bad paths.
paths = (path for path in paths if path) filtered_paths = (path for path in paths if path)
try: try:
for file_path in paths: for file_path in filtered_paths:
new_identities, new_certs = _load_items_from_file(keychain, file_path) new_identities, new_certs = _load_items_from_file(keychain, file_path)
identities.extend(new_identities) identities.extend(new_identities)
certificates.extend(new_certs) certificates.extend(new_certs)
@ -383,7 +403,7 @@ TLS_PROTOCOL_VERSIONS = {
} }
def _build_tls_unknown_ca_alert(version): def _build_tls_unknown_ca_alert(version: str) -> bytes:
""" """
Builds a TLS alert record for an unknown CA. Builds a TLS alert record for an unknown CA.
""" """
@ -395,3 +415,60 @@ def _build_tls_unknown_ca_alert(version):
record_type_alert = 0x15 record_type_alert = 0x15
record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg
return record return record
class SecurityConst:
"""
A class object that acts as essentially a namespace for Security constants.
"""
kSSLSessionOptionBreakOnServerAuth = 0
kSSLProtocol2 = 1
kSSLProtocol3 = 2
kTLSProtocol1 = 4
kTLSProtocol11 = 7
kTLSProtocol12 = 8
# SecureTransport does not support TLS 1.3 even if there's a constant for it
kTLSProtocol13 = 10
kTLSProtocolMaxSupported = 999
kSSLClientSide = 1
kSSLStreamType = 0
kSecFormatPEMSequence = 10
kSecTrustResultInvalid = 0
kSecTrustResultProceed = 1
# This gap is present on purpose: this was kSecTrustResultConfirm, which
# is deprecated.
kSecTrustResultDeny = 3
kSecTrustResultUnspecified = 4
kSecTrustResultRecoverableTrustFailure = 5
kSecTrustResultFatalTrustFailure = 6
kSecTrustResultOtherError = 7
errSSLProtocol = -9800
errSSLWouldBlock = -9803
errSSLClosedGraceful = -9805
errSSLClosedNoNotify = -9816
errSSLClosedAbort = -9806
errSSLXCertChainInvalid = -9807
errSSLCrypto = -9809
errSSLInternal = -9810
errSSLCertExpired = -9814
errSSLCertNotYetValid = -9815
errSSLUnknownRootCert = -9812
errSSLNoRootCert = -9813
errSSLHostNameMismatch = -9843
errSSLPeerHandshakeFail = -9824
errSSLPeerUserCancelled = -9839
errSSLWeakPeerEphemeralDHKey = -9850
errSSLServerAuthCompleted = -9841
errSSLRecordOverflow = -9847
errSecVerifyFailed = -67808
errSecNoTrustSettings = -25263
errSecItemNotFound = -25300
errSecInvalidTrustSettings = -25262

View file

@ -1,314 +0,0 @@
"""
This module provides a pool manager that uses Google App Engine's
`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
Example usage::
from urllib3 import PoolManager
from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
if is_appengine_sandbox():
# AppEngineManager uses AppEngine's URLFetch API behind the scenes
http = AppEngineManager()
else:
# PoolManager uses a socket-level API behind the scenes
http = PoolManager()
r = http.request('GET', 'https://google.com/')
There are `limitations <https://cloud.google.com/appengine/docs/python/\
urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
the best choice for your application. There are three options for using
urllib3 on Google App Engine:
1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
cost-effective in many circumstances as long as your usage is within the
limitations.
2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
Sockets also have `limitations and restrictions
<https://cloud.google.com/appengine/docs/python/sockets/\
#limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
To use sockets, be sure to specify the following in your ``app.yaml``::
env_variables:
GAE_USE_SOCKETS_HTTPLIB : 'true'
3. If you are using `App Engine Flexible
<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
:class:`PoolManager` without any configuration or special environment variables.
"""
from __future__ import absolute_import
import io
import logging
import warnings
from ..exceptions import (
HTTPError,
HTTPWarning,
MaxRetryError,
ProtocolError,
SSLError,
TimeoutError,
)
from ..packages.six.moves.urllib.parse import urljoin
from ..request import RequestMethods
from ..response import HTTPResponse
from ..util.retry import Retry
from ..util.timeout import Timeout
from . import _appengine_environ
try:
from google.appengine.api import urlfetch
except ImportError:
urlfetch = None
log = logging.getLogger(__name__)
class AppEnginePlatformWarning(HTTPWarning):
pass
class AppEnginePlatformError(HTTPError):
pass
class AppEngineManager(RequestMethods):
"""
Connection manager for Google App Engine sandbox applications.
This manager uses the URLFetch service directly instead of using the
emulated httplib, and is subject to URLFetch limitations as described in
the App Engine documentation `here
<https://cloud.google.com/appengine/docs/python/urlfetch>`_.
Notably it will raise an :class:`AppEnginePlatformError` if:
* URLFetch is not available.
* If you attempt to use this on App Engine Flexible, as full socket
support is available.
* If a request size is more than 10 megabytes.
* If a response size is more than 32 megabytes.
* If you use an unsupported request method such as OPTIONS.
Beyond those cases, it will raise normal urllib3 errors.
"""
def __init__(
self,
headers=None,
retries=None,
validate_certificate=True,
urlfetch_retries=True,
):
if not urlfetch:
raise AppEnginePlatformError(
"URLFetch is not available in this environment."
)
warnings.warn(
"urllib3 is using URLFetch on Google App Engine sandbox instead "
"of sockets. To use sockets directly instead of URLFetch see "
"https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
AppEnginePlatformWarning,
)
RequestMethods.__init__(self, headers)
self.validate_certificate = validate_certificate
self.urlfetch_retries = urlfetch_retries
self.retries = retries or Retry.DEFAULT
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
# Return False to re-raise any potential exceptions
return False
def urlopen(
self,
method,
url,
body=None,
headers=None,
retries=None,
redirect=True,
timeout=Timeout.DEFAULT_TIMEOUT,
**response_kw
):
retries = self._get_retries(retries, redirect)
try:
follow_redirects = redirect and retries.redirect != 0 and retries.total
response = urlfetch.fetch(
url,
payload=body,
method=method,
headers=headers or {},
allow_truncated=False,
follow_redirects=self.urlfetch_retries and follow_redirects,
deadline=self._get_absolute_timeout(timeout),
validate_certificate=self.validate_certificate,
)
except urlfetch.DeadlineExceededError as e:
raise TimeoutError(self, e)
except urlfetch.InvalidURLError as e:
if "too large" in str(e):
raise AppEnginePlatformError(
"URLFetch request too large, URLFetch only "
"supports requests up to 10mb in size.",
e,
)
raise ProtocolError(e)
except urlfetch.DownloadError as e:
if "Too many redirects" in str(e):
raise MaxRetryError(self, url, reason=e)
raise ProtocolError(e)
except urlfetch.ResponseTooLargeError as e:
raise AppEnginePlatformError(
"URLFetch response too large, URLFetch only supports"
"responses up to 32mb in size.",
e,
)
except urlfetch.SSLCertificateError as e:
raise SSLError(e)
except urlfetch.InvalidMethodError as e:
raise AppEnginePlatformError(
"URLFetch does not support method: %s" % method, e
)
http_response = self._urlfetch_response_to_http_response(
response, retries=retries, **response_kw
)
# Handle redirect?
redirect_location = redirect and http_response.get_redirect_location()
if redirect_location:
# Check for redirect response
if self.urlfetch_retries and retries.raise_on_redirect:
raise MaxRetryError(self, url, "too many redirects")
else:
if http_response.status == 303:
method = "GET"
try:
retries = retries.increment(
method, url, response=http_response, _pool=self
)
except MaxRetryError:
if retries.raise_on_redirect:
raise MaxRetryError(self, url, "too many redirects")
return http_response
retries.sleep_for_retry(http_response)
log.debug("Redirecting %s -> %s", url, redirect_location)
redirect_url = urljoin(url, redirect_location)
return self.urlopen(
method,
redirect_url,
body,
headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw
)
# Check if we should retry the HTTP response.
has_retry_after = bool(http_response.headers.get("Retry-After"))
if retries.is_retry(method, http_response.status, has_retry_after):
retries = retries.increment(method, url, response=http_response, _pool=self)
log.debug("Retry: %s", url)
retries.sleep(http_response)
return self.urlopen(
method,
url,
body=body,
headers=headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw
)
return http_response
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
if is_prod_appengine():
# Production GAE handles deflate encoding automatically, but does
# not remove the encoding header.
content_encoding = urlfetch_resp.headers.get("content-encoding")
if content_encoding == "deflate":
del urlfetch_resp.headers["content-encoding"]
transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
# We have a full response's content,
# so let's make sure we don't report ourselves as chunked data.
if transfer_encoding == "chunked":
encodings = transfer_encoding.split(",")
encodings.remove("chunked")
urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
original_response = HTTPResponse(
# In order for decoding to work, we must present the content as
# a file-like object.
body=io.BytesIO(urlfetch_resp.content),
msg=urlfetch_resp.header_msg,
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
**response_kw
)
return HTTPResponse(
body=io.BytesIO(urlfetch_resp.content),
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
original_response=original_response,
**response_kw
)
def _get_absolute_timeout(self, timeout):
if timeout is Timeout.DEFAULT_TIMEOUT:
return None # Defer to URLFetch's default.
if isinstance(timeout, Timeout):
if timeout._read is not None or timeout._connect is not None:
warnings.warn(
"URLFetch does not support granular timeout settings, "
"reverting to total or default URLFetch timeout.",
AppEnginePlatformWarning,
)
return timeout.total
return timeout
def _get_retries(self, retries, redirect):
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if retries.connect or retries.read or retries.redirect:
warnings.warn(
"URLFetch only supports total retries and does not "
"recognize connect, read, or redirect retry parameters.",
AppEnginePlatformWarning,
)
return retries
# Alias methods from _appengine_environ to maintain public API interface.
is_appengine = _appengine_environ.is_appengine
is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
is_local_appengine = _appengine_environ.is_local_appengine
is_prod_appengine = _appengine_environ.is_prod_appengine
is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms

View file

@ -1,130 +0,0 @@
"""
NTLM authenticating pool, contributed by erikcederstran
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
"""
from __future__ import absolute_import
import warnings
from logging import getLogger
from ntlm import ntlm
from .. import HTTPSConnectionPool
from ..packages.six.moves.http_client import HTTPSConnection
warnings.warn(
"The 'urllib3.contrib.ntlmpool' module is deprecated and will be removed "
"in urllib3 v2.0 release, urllib3 is not able to support it properly due "
"to reasons listed in issue: https://github.com/urllib3/urllib3/issues/2282. "
"If you are a user of this module please comment in the mentioned issue.",
DeprecationWarning,
)
log = getLogger(__name__)
class NTLMConnectionPool(HTTPSConnectionPool):
"""
Implements an NTLM authentication version of an urllib3 connection pool
"""
scheme = "https"
def __init__(self, user, pw, authurl, *args, **kwargs):
"""
authurl is a random URL on the server that is protected by NTLM.
user is the Windows user, probably in the DOMAIN\\username format.
pw is the password for the user.
"""
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
self.authurl = authurl
self.rawuser = user
user_parts = user.split("\\", 1)
self.domain = user_parts[0].upper()
self.user = user_parts[1]
self.pw = pw
def _new_conn(self):
# Performs the NTLM handshake that secures the connection. The socket
# must be kept open while requests are performed.
self.num_connections += 1
log.debug(
"Starting NTLM HTTPS connection no. %d: https://%s%s",
self.num_connections,
self.host,
self.authurl,
)
headers = {"Connection": "Keep-Alive"}
req_header = "Authorization"
resp_header = "www-authenticate"
conn = HTTPSConnection(host=self.host, port=self.port)
# Send negotiation message
headers[req_header] = "NTLM %s" % ntlm.create_NTLM_NEGOTIATE_MESSAGE(
self.rawuser
)
log.debug("Request headers: %s", headers)
conn.request("GET", self.authurl, None, headers)
res = conn.getresponse()
reshdr = dict(res.headers)
log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", reshdr)
log.debug("Response data: %s [...]", res.read(100))
# Remove the reference to the socket, so that it can not be closed by
# the response object (we want to keep the socket open)
res.fp = None
# Server should respond with a challenge message
auth_header_values = reshdr[resp_header].split(", ")
auth_header_value = None
for s in auth_header_values:
if s[:5] == "NTLM ":
auth_header_value = s[5:]
if auth_header_value is None:
raise Exception(
"Unexpected %s response header: %s" % (resp_header, reshdr[resp_header])
)
# Send authentication message
ServerChallenge, NegotiateFlags = ntlm.parse_NTLM_CHALLENGE_MESSAGE(
auth_header_value
)
auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(
ServerChallenge, self.user, self.domain, self.pw, NegotiateFlags
)
headers[req_header] = "NTLM %s" % auth_msg
log.debug("Request headers: %s", headers)
conn.request("GET", self.authurl, None, headers)
res = conn.getresponse()
log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", dict(res.headers))
log.debug("Response data: %s [...]", res.read()[:100])
if res.status != 200:
if res.status == 401:
raise Exception("Server rejected request: wrong username or password")
raise Exception("Wrong server response: %s %s" % (res.status, res.reason))
res.fp = None
log.debug("Connection established")
return conn
def urlopen(
self,
method,
url,
body=None,
headers=None,
retries=3,
redirect=True,
assert_same_host=True,
):
if headers is None:
headers = {}
headers["Connection"] = "Keep-Alive"
return super(NTLMConnectionPool, self).urlopen(
method, url, body, headers, retries, redirect, assert_same_host
)

View file

@ -1,8 +1,8 @@
""" """
TLS with SNI_-support for Python 2. Follow these instructions if you would Module for using pyOpenSSL as a TLS backend. This module was relevant before
like to verify TLS certificates in Python 2. Note, the default libraries do the standard library ``ssl`` module supported SNI, but now that we've dropped
*not* do certificate checking; you need to do additional work to validate support for Python 2.7 all relevant Python versions support SNI so
certificates yourself. **this module is no longer recommended**.
This needs the following packages installed: This needs the following packages installed:
@ -10,7 +10,7 @@ This needs the following packages installed:
* `cryptography`_ (minimum 1.3.4, from pyopenssl) * `cryptography`_ (minimum 1.3.4, from pyopenssl)
* `idna`_ (minimum 2.0, from cryptography) * `idna`_ (minimum 2.0, from cryptography)
However, pyopenssl depends on cryptography, which depends on idna, so while we However, pyOpenSSL depends on cryptography, which depends on idna, so while we
use all three directly here we end up having relatively few packages required. use all three directly here we end up having relatively few packages required.
You can install them with the following command: You can install them with the following command:
@ -33,75 +33,55 @@ like this:
except ImportError: except ImportError:
pass pass
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
Activating this module also has the positive side effect of disabling SSL/TLS
compression in Python 2 (see `CRIME attack`_).
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
.. _pyopenssl: https://www.pyopenssl.org .. _pyopenssl: https://www.pyopenssl.org
.. _cryptography: https://cryptography.io .. _cryptography: https://cryptography.io
.. _idna: https://github.com/kjd/idna .. _idna: https://github.com/kjd/idna
""" """
from __future__ import absolute_import
import OpenSSL.crypto from __future__ import annotations
import OpenSSL.SSL
import OpenSSL.SSL # type: ignore[import]
from cryptography import x509 from cryptography import x509
from cryptography.hazmat.backends.openssl import backend as openssl_backend
try: try:
from cryptography.x509 import UnsupportedExtension from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined]
except ImportError: except ImportError:
# UnsupportedExtension is gone in cryptography >= 2.1.0 # UnsupportedExtension is gone in cryptography >= 2.1.0
class UnsupportedExtension(Exception): class UnsupportedExtension(Exception): # type: ignore[no-redef]
pass pass
from io import BytesIO
from socket import error as SocketError
from socket import timeout
try: # Platform-specific: Python 2
from socket import _fileobject
except ImportError: # Platform-specific: Python 3
_fileobject = None
from ..packages.backports.makefile import backport_makefile
import logging import logging
import ssl import ssl
import sys import typing
import warnings import warnings
from io import BytesIO
from socket import socket as socket_cls
from socket import timeout
from .. import util from .. import util
from ..packages import six
from ..util.ssl_ import PROTOCOL_TLS_CLIENT
warnings.warn( warnings.warn(
"'urllib3.contrib.pyopenssl' module is deprecated and will be removed " "'urllib3.contrib.pyopenssl' module is deprecated and will be removed "
"in a future release of urllib3 2.x. Read more in this issue: " "in urllib3 v2.1.0. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2680", "https://github.com/urllib3/urllib3/issues/2680",
category=DeprecationWarning, category=DeprecationWarning,
stacklevel=2, stacklevel=2,
) )
__all__ = ["inject_into_urllib3", "extract_from_urllib3"] if typing.TYPE_CHECKING:
from OpenSSL.crypto import X509 # type: ignore[import]
# SNI always works.
HAS_SNI = True __all__ = ["inject_into_urllib3", "extract_from_urllib3"]
# Map from urllib3 to PyOpenSSL compatible parameter-values. # Map from urllib3 to PyOpenSSL compatible parameter-values.
_openssl_versions = { _openssl_versions = {
util.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
} }
if hasattr(ssl, "PROTOCOL_SSLv3") and hasattr(OpenSSL.SSL, "SSLv3_METHOD"):
_openssl_versions[ssl.PROTOCOL_SSLv3] = OpenSSL.SSL.SSLv3_METHOD
if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"): if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"):
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD _openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
@ -115,43 +95,77 @@ _stdlib_to_openssl_verify = {
ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
} }
_openssl_to_stdlib_verify = dict((v, k) for k, v in _stdlib_to_openssl_verify.items()) _openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()}
# The SSLvX values are the most likely to be missing in the future
# but we check them all just to be sure.
_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr(
OpenSSL.SSL, "OP_NO_SSLv3", 0
)
_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0)
_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0)
_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0)
_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0)
_openssl_to_ssl_minimum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1,
ssl.TLSVersion.TLSv1_3: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
ssl.TLSVersion.MAXIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
}
_openssl_to_ssl_maximum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3
| _OP_NO_TLSv1
| _OP_NO_TLSv1_1
| _OP_NO_TLSv1_2
| _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
}
# OpenSSL will only write 16K at a time # OpenSSL will only write 16K at a time
SSL_WRITE_BLOCKSIZE = 16384 SSL_WRITE_BLOCKSIZE = 16384
orig_util_HAS_SNI = util.HAS_SNI
orig_util_SSLContext = util.ssl_.SSLContext orig_util_SSLContext = util.ssl_.SSLContext
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def inject_into_urllib3(): def inject_into_urllib3() -> None:
"Monkey-patch urllib3 with PyOpenSSL-backed SSL-support." "Monkey-patch urllib3 with PyOpenSSL-backed SSL-support."
_validate_dependencies_met() _validate_dependencies_met()
util.SSLContext = PyOpenSSLContext util.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.ssl_.SSLContext = PyOpenSSLContext util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.HAS_SNI = HAS_SNI
util.ssl_.HAS_SNI = HAS_SNI
util.IS_PYOPENSSL = True util.IS_PYOPENSSL = True
util.ssl_.IS_PYOPENSSL = True util.ssl_.IS_PYOPENSSL = True
def extract_from_urllib3(): def extract_from_urllib3() -> None:
"Undo monkey-patching by :func:`inject_into_urllib3`." "Undo monkey-patching by :func:`inject_into_urllib3`."
util.SSLContext = orig_util_SSLContext util.SSLContext = orig_util_SSLContext
util.ssl_.SSLContext = orig_util_SSLContext util.ssl_.SSLContext = orig_util_SSLContext
util.HAS_SNI = orig_util_HAS_SNI
util.ssl_.HAS_SNI = orig_util_HAS_SNI
util.IS_PYOPENSSL = False util.IS_PYOPENSSL = False
util.ssl_.IS_PYOPENSSL = False util.ssl_.IS_PYOPENSSL = False
def _validate_dependencies_met(): def _validate_dependencies_met() -> None:
""" """
Verifies that PyOpenSSL's package-level dependencies have been met. Verifies that PyOpenSSL's package-level dependencies have been met.
Throws `ImportError` if they are not met. Throws `ImportError` if they are not met.
@ -177,7 +191,7 @@ def _validate_dependencies_met():
) )
def _dnsname_to_stdlib(name): def _dnsname_to_stdlib(name: str) -> str | None:
""" """
Converts a dNSName SubjectAlternativeName field to the form used by the Converts a dNSName SubjectAlternativeName field to the form used by the
standard library on the given Python version. standard library on the given Python version.
@ -191,7 +205,7 @@ def _dnsname_to_stdlib(name):
the name given should be skipped. the name given should be skipped.
""" """
def idna_encode(name): def idna_encode(name: str) -> bytes | None:
""" """
Borrowed wholesale from the Python Cryptography Project. It turns out Borrowed wholesale from the Python Cryptography Project. It turns out
that we can't just safely call `idna.encode`: it can explode for that we can't just safely call `idna.encode`: it can explode for
@ -200,7 +214,7 @@ def _dnsname_to_stdlib(name):
import idna import idna
try: try:
for prefix in [u"*.", u"."]: for prefix in ["*.", "."]:
if name.startswith(prefix): if name.startswith(prefix):
name = name[len(prefix) :] name = name[len(prefix) :]
return prefix.encode("ascii") + idna.encode(name) return prefix.encode("ascii") + idna.encode(name)
@ -212,24 +226,17 @@ def _dnsname_to_stdlib(name):
if ":" in name: if ":" in name:
return name return name
name = idna_encode(name) encoded_name = idna_encode(name)
if name is None: if encoded_name is None:
return None return None
elif sys.version_info >= (3, 0): return encoded_name.decode("utf-8")
name = name.decode("utf-8")
return name
def get_subj_alt_name(peer_cert): def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]:
""" """
Given an PyOpenSSL certificate, provides all the subject alternative names. Given an PyOpenSSL certificate, provides all the subject alternative names.
""" """
# Pass the cert to cryptography, which has much better APIs for this.
if hasattr(peer_cert, "to_cryptography"):
cert = peer_cert.to_cryptography() cert = peer_cert.to_cryptography()
else:
der = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, peer_cert)
cert = x509.load_der_x509_certificate(der, openssl_backend)
# We want to find the SAN extension. Ask Cryptography to locate it (it's # We want to find the SAN extension. Ask Cryptography to locate it (it's
# faster than looping in Python) # faster than looping in Python)
@ -273,93 +280,94 @@ def get_subj_alt_name(peer_cert):
return names return names
class WrappedSocket(object): class WrappedSocket:
"""API-compatibility wrapper for Python OpenSSL's Connection-class. """API-compatibility wrapper for Python OpenSSL's Connection-class."""
Note: _makefile_refs, _drop() and _reuse() are needed for the garbage def __init__(
collector of pypy. self,
""" connection: OpenSSL.SSL.Connection,
socket: socket_cls,
def __init__(self, connection, socket, suppress_ragged_eofs=True): suppress_ragged_eofs: bool = True,
) -> None:
self.connection = connection self.connection = connection
self.socket = socket self.socket = socket
self.suppress_ragged_eofs = suppress_ragged_eofs self.suppress_ragged_eofs = suppress_ragged_eofs
self._makefile_refs = 0 self._io_refs = 0
self._closed = False self._closed = False
def fileno(self): def fileno(self) -> int:
return self.socket.fileno() return self.socket.fileno()
# Copy-pasted from Python 3.5 source code # Copy-pasted from Python 3.5 source code
def _decref_socketios(self): def _decref_socketios(self) -> None:
if self._makefile_refs > 0: if self._io_refs > 0:
self._makefile_refs -= 1 self._io_refs -= 1
if self._closed: if self._closed:
self.close() self.close()
def recv(self, *args, **kwargs): def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes:
try: try:
data = self.connection.recv(*args, **kwargs) data = self.connection.recv(*args, **kwargs)
except OpenSSL.SSL.SysCallError as e: except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"): if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return b"" return b""
else: else:
raise SocketError(str(e)) raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError: except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return b"" return b""
else: else:
raise raise
except OpenSSL.SSL.WantReadError: except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()): if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out") raise timeout("The read operation timed out") from e
else: else:
return self.recv(*args, **kwargs) return self.recv(*args, **kwargs)
# TLS 1.3 post-handshake authentication # TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e: except OpenSSL.SSL.Error as e:
raise ssl.SSLError("read error: %r" % e) raise ssl.SSLError(f"read error: {e!r}") from e
else: else:
return data return data # type: ignore[no-any-return]
def recv_into(self, *args, **kwargs): def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int:
try: try:
return self.connection.recv_into(*args, **kwargs) return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return]
except OpenSSL.SSL.SysCallError as e: except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"): if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return 0 return 0
else: else:
raise SocketError(str(e)) raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError: except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return 0 return 0
else: else:
raise raise
except OpenSSL.SSL.WantReadError: except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()): if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out") raise timeout("The read operation timed out") from e
else: else:
return self.recv_into(*args, **kwargs) return self.recv_into(*args, **kwargs)
# TLS 1.3 post-handshake authentication # TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e: except OpenSSL.SSL.Error as e:
raise ssl.SSLError("read error: %r" % e) raise ssl.SSLError(f"read error: {e!r}") from e
def settimeout(self, timeout): def settimeout(self, timeout: float) -> None:
return self.socket.settimeout(timeout) return self.socket.settimeout(timeout)
def _send_until_done(self, data): def _send_until_done(self, data: bytes) -> int:
while True: while True:
try: try:
return self.connection.send(data) return self.connection.send(data) # type: ignore[no-any-return]
except OpenSSL.SSL.WantWriteError: except OpenSSL.SSL.WantWriteError as e:
if not util.wait_for_write(self.socket, self.socket.gettimeout()): if not util.wait_for_write(self.socket, self.socket.gettimeout()):
raise timeout() raise timeout() from e
continue continue
except OpenSSL.SSL.SysCallError as e: except OpenSSL.SSL.SysCallError as e:
raise SocketError(str(e)) raise OSError(e.args[0], str(e)) from e
def sendall(self, data): def sendall(self, data: bytes) -> None:
total_sent = 0 total_sent = 0
while total_sent < len(data): while total_sent < len(data):
sent = self._send_until_done( sent = self._send_until_done(
@ -367,135 +375,135 @@ class WrappedSocket(object):
) )
total_sent += sent total_sent += sent
def shutdown(self): def shutdown(self) -> None:
# FIXME rethrow compatible exceptions should we ever use this # FIXME rethrow compatible exceptions should we ever use this
self.connection.shutdown() self.connection.shutdown()
def close(self): def close(self) -> None:
if self._makefile_refs < 1:
try:
self._closed = True self._closed = True
return self.connection.close() if self._io_refs <= 0:
self._real_close()
def _real_close(self) -> None:
try:
return self.connection.close() # type: ignore[no-any-return]
except OpenSSL.SSL.Error: except OpenSSL.SSL.Error:
return return
else:
self._makefile_refs -= 1
def getpeercert(self, binary_form=False): def getpeercert(
self, binary_form: bool = False
) -> dict[str, list[typing.Any]] | None:
x509 = self.connection.get_peer_certificate() x509 = self.connection.get_peer_certificate()
if not x509: if not x509:
return x509 return x509 # type: ignore[no-any-return]
if binary_form: if binary_form:
return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return]
return { return {
"subject": ((("commonName", x509.get_subject().CN),),), "subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item]
"subjectAltName": get_subj_alt_name(x509), "subjectAltName": get_subj_alt_name(x509),
} }
def version(self): def version(self) -> str:
return self.connection.get_protocol_version_name() return self.connection.get_protocol_version_name() # type: ignore[no-any-return]
def _reuse(self):
self._makefile_refs += 1
def _drop(self):
if self._makefile_refs < 1:
self.close()
else:
self._makefile_refs -= 1
if _fileobject: # Platform-specific: Python 2 WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined]
def makefile(self, mode, bufsize=-1):
self._makefile_refs += 1
return _fileobject(self, mode, bufsize, close=True)
else: # Platform-specific: Python 3
makefile = backport_makefile
WrappedSocket.makefile = makefile
class PyOpenSSLContext(object): class PyOpenSSLContext:
""" """
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
for translating the interface of the standard library ``SSLContext`` object for translating the interface of the standard library ``SSLContext`` object
to calls into PyOpenSSL. to calls into PyOpenSSL.
""" """
def __init__(self, protocol): def __init__(self, protocol: int) -> None:
self.protocol = _openssl_versions[protocol] self.protocol = _openssl_versions[protocol]
self._ctx = OpenSSL.SSL.Context(self.protocol) self._ctx = OpenSSL.SSL.Context(self.protocol)
self._options = 0 self._options = 0
self.check_hostname = False self.check_hostname = False
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
@property @property
def options(self): def options(self) -> int:
return self._options return self._options
@options.setter @options.setter
def options(self, value): def options(self, value: int) -> None:
self._options = value self._options = value
self._ctx.set_options(value) self._set_ctx_options()
@property @property
def verify_mode(self): def verify_mode(self) -> int:
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()] return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
@verify_mode.setter @verify_mode.setter
def verify_mode(self, value): def verify_mode(self, value: ssl.VerifyMode) -> None:
self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback) self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback)
def set_default_verify_paths(self): def set_default_verify_paths(self) -> None:
self._ctx.set_default_verify_paths() self._ctx.set_default_verify_paths()
def set_ciphers(self, ciphers): def set_ciphers(self, ciphers: bytes | str) -> None:
if isinstance(ciphers, six.text_type): if isinstance(ciphers, str):
ciphers = ciphers.encode("utf-8") ciphers = ciphers.encode("utf-8")
self._ctx.set_cipher_list(ciphers) self._ctx.set_cipher_list(ciphers)
def load_verify_locations(self, cafile=None, capath=None, cadata=None): def load_verify_locations(
self,
cafile: str | None = None,
capath: str | None = None,
cadata: bytes | None = None,
) -> None:
if cafile is not None: if cafile is not None:
cafile = cafile.encode("utf-8") cafile = cafile.encode("utf-8") # type: ignore[assignment]
if capath is not None: if capath is not None:
capath = capath.encode("utf-8") capath = capath.encode("utf-8") # type: ignore[assignment]
try: try:
self._ctx.load_verify_locations(cafile, capath) self._ctx.load_verify_locations(cafile, capath)
if cadata is not None: if cadata is not None:
self._ctx.load_verify_locations(BytesIO(cadata)) self._ctx.load_verify_locations(BytesIO(cadata))
except OpenSSL.SSL.Error as e: except OpenSSL.SSL.Error as e:
raise ssl.SSLError("unable to load trusted certificates: %r" % e) raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e
def load_cert_chain(self, certfile, keyfile=None, password=None): def load_cert_chain(
self,
certfile: str,
keyfile: str | None = None,
password: str | None = None,
) -> None:
try:
self._ctx.use_certificate_chain_file(certfile) self._ctx.use_certificate_chain_file(certfile)
if password is not None: if password is not None:
if not isinstance(password, six.binary_type): if not isinstance(password, bytes):
password = password.encode("utf-8") password = password.encode("utf-8") # type: ignore[assignment]
self._ctx.set_passwd_cb(lambda *_: password) self._ctx.set_passwd_cb(lambda *_: password)
self._ctx.use_privatekey_file(keyfile or certfile) self._ctx.use_privatekey_file(keyfile or certfile)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e
def set_alpn_protocols(self, protocols): def set_alpn_protocols(self, protocols: list[bytes | str]) -> None:
protocols = [six.ensure_binary(p) for p in protocols] protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
return self._ctx.set_alpn_protos(protocols) return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return]
def wrap_socket( def wrap_socket(
self, self,
sock, sock: socket_cls,
server_side=False, server_side: bool = False,
do_handshake_on_connect=True, do_handshake_on_connect: bool = True,
suppress_ragged_eofs=True, suppress_ragged_eofs: bool = True,
server_hostname=None, server_hostname: bytes | str | None = None,
): ) -> WrappedSocket:
cnx = OpenSSL.SSL.Connection(self._ctx, sock) cnx = OpenSSL.SSL.Connection(self._ctx, sock)
if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3 # If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3
if server_hostname and not util.ssl_.is_ipaddress(server_hostname):
if isinstance(server_hostname, str):
server_hostname = server_hostname.encode("utf-8") server_hostname = server_hostname.encode("utf-8")
if server_hostname is not None:
cnx.set_tlsext_host_name(server_hostname) cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state() cnx.set_connect_state()
@ -503,16 +511,47 @@ class PyOpenSSLContext(object):
while True: while True:
try: try:
cnx.do_handshake() cnx.do_handshake()
except OpenSSL.SSL.WantReadError: except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(sock, sock.gettimeout()): if not util.wait_for_read(sock, sock.gettimeout()):
raise timeout("select timed out") raise timeout("select timed out") from e
continue continue
except OpenSSL.SSL.Error as e: except OpenSSL.SSL.Error as e:
raise ssl.SSLError("bad handshake: %r" % e) raise ssl.SSLError(f"bad handshake: {e!r}") from e
break break
return WrappedSocket(cnx, sock) return WrappedSocket(cnx, sock)
def _set_ctx_options(self) -> None:
self._ctx.set_options(
self._options
| _openssl_to_ssl_minimum_version[self._minimum_version]
| _openssl_to_ssl_maximum_version[self._maximum_version]
)
def _verify_callback(cnx, x509, err_no, err_depth, return_code): @property
def minimum_version(self) -> int:
return self._minimum_version
@minimum_version.setter
def minimum_version(self, minimum_version: int) -> None:
self._minimum_version = minimum_version
self._set_ctx_options()
@property
def maximum_version(self) -> int:
return self._maximum_version
@maximum_version.setter
def maximum_version(self, maximum_version: int) -> None:
self._maximum_version = maximum_version
self._set_ctx_options()
def _verify_callback(
cnx: OpenSSL.SSL.Connection,
x509: X509,
err_no: int,
err_depth: int,
return_code: int,
) -> bool:
return err_no == 0 return err_no == 0

View file

@ -51,7 +51,8 @@ license and by oscrypto's:
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. DEALINGS IN THE SOFTWARE.
""" """
from __future__ import absolute_import
from __future__ import annotations
import contextlib import contextlib
import ctypes import ctypes
@ -62,14 +63,18 @@ import socket
import ssl import ssl
import struct import struct
import threading import threading
import typing
import warnings
import weakref import weakref
from socket import socket as socket_cls
import six
from .. import util from .. import util
from ..util.ssl_ import PROTOCOL_TLS_CLIENT from ._securetransport.bindings import ( # type: ignore[attr-defined]
from ._securetransport.bindings import CoreFoundation, Security, SecurityConst CoreFoundation,
Security,
)
from ._securetransport.low_level import ( from ._securetransport.low_level import (
SecurityConst,
_assert_no_error, _assert_no_error,
_build_tls_unknown_ca_alert, _build_tls_unknown_ca_alert,
_cert_array_from_pem, _cert_array_from_pem,
@ -78,18 +83,19 @@ from ._securetransport.low_level import (
_temporary_keychain, _temporary_keychain,
) )
try: # Platform-specific: Python 2 warnings.warn(
from socket import _fileobject "'urllib3.contrib.securetransport' module is deprecated and will be removed "
except ImportError: # Platform-specific: Python 3 "in urllib3 v2.1.0. Read more in this issue: "
_fileobject = None "https://github.com/urllib3/urllib3/issues/2681",
from ..packages.backports.makefile import backport_makefile category=DeprecationWarning,
stacklevel=2,
)
if typing.TYPE_CHECKING:
from typing_extensions import Literal
__all__ = ["inject_into_urllib3", "extract_from_urllib3"] __all__ = ["inject_into_urllib3", "extract_from_urllib3"]
# SNI always works
HAS_SNI = True
orig_util_HAS_SNI = util.HAS_SNI
orig_util_SSLContext = util.ssl_.SSLContext orig_util_SSLContext = util.ssl_.SSLContext
# This dictionary is used by the read callback to obtain a handle to the # This dictionary is used by the read callback to obtain a handle to the
@ -108,55 +114,24 @@ orig_util_SSLContext = util.ssl_.SSLContext
# #
# This is good: if we had to lock in the callbacks we'd drastically slow down # This is good: if we had to lock in the callbacks we'd drastically slow down
# the performance of this code. # the performance of this code.
_connection_refs = weakref.WeakValueDictionary() _connection_refs: weakref.WeakValueDictionary[
int, WrappedSocket
] = weakref.WeakValueDictionary()
_connection_ref_lock = threading.Lock() _connection_ref_lock = threading.Lock()
# Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over # Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over
# for no better reason than we need *a* limit, and this one is right there. # for no better reason than we need *a* limit, and this one is right there.
SSL_WRITE_BLOCKSIZE = 16384 SSL_WRITE_BLOCKSIZE = 16384
# This is our equivalent of util.ssl_.DEFAULT_CIPHERS, but expanded out to
# individual cipher suites. We need to do this because this is how
# SecureTransport wants them.
CIPHER_SUITES = [
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,
SecurityConst.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256,
SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA,
SecurityConst.TLS_AES_256_GCM_SHA384,
SecurityConst.TLS_AES_128_GCM_SHA256,
SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384,
SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256,
SecurityConst.TLS_AES_128_CCM_8_SHA256,
SecurityConst.TLS_AES_128_CCM_SHA256,
SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256,
SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256,
SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA,
SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA,
]
# Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of # Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of
# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version. # TLSv1 and a high of TLSv1.2. For everything else, we pin to that version.
# TLSv1 to 1.2 are supported on macOS 10.8+ # TLSv1 to 1.2 are supported on macOS 10.8+
_protocol_to_min_max = { _protocol_to_min_max = {
util.PROTOCOL_TLS: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), util.ssl_.PROTOCOL_TLS: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), # type: ignore[attr-defined]
PROTOCOL_TLS_CLIENT: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), util.ssl_.PROTOCOL_TLS_CLIENT: ( # type: ignore[attr-defined]
SecurityConst.kTLSProtocol1,
SecurityConst.kTLSProtocol12,
),
} }
if hasattr(ssl, "PROTOCOL_SSLv2"): if hasattr(ssl, "PROTOCOL_SSLv2"):
@ -186,31 +161,38 @@ if hasattr(ssl, "PROTOCOL_TLSv1_2"):
) )
def inject_into_urllib3(): _tls_version_to_st: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: SecurityConst.kTLSProtocol1,
ssl.TLSVersion.TLSv1: SecurityConst.kTLSProtocol1,
ssl.TLSVersion.TLSv1_1: SecurityConst.kTLSProtocol11,
ssl.TLSVersion.TLSv1_2: SecurityConst.kTLSProtocol12,
ssl.TLSVersion.MAXIMUM_SUPPORTED: SecurityConst.kTLSProtocol12,
}
def inject_into_urllib3() -> None:
""" """
Monkey-patch urllib3 with SecureTransport-backed SSL-support. Monkey-patch urllib3 with SecureTransport-backed SSL-support.
""" """
util.SSLContext = SecureTransportContext util.SSLContext = SecureTransportContext # type: ignore[assignment]
util.ssl_.SSLContext = SecureTransportContext util.ssl_.SSLContext = SecureTransportContext # type: ignore[assignment]
util.HAS_SNI = HAS_SNI
util.ssl_.HAS_SNI = HAS_SNI
util.IS_SECURETRANSPORT = True util.IS_SECURETRANSPORT = True
util.ssl_.IS_SECURETRANSPORT = True util.ssl_.IS_SECURETRANSPORT = True
def extract_from_urllib3(): def extract_from_urllib3() -> None:
""" """
Undo monkey-patching by :func:`inject_into_urllib3`. Undo monkey-patching by :func:`inject_into_urllib3`.
""" """
util.SSLContext = orig_util_SSLContext util.SSLContext = orig_util_SSLContext
util.ssl_.SSLContext = orig_util_SSLContext util.ssl_.SSLContext = orig_util_SSLContext
util.HAS_SNI = orig_util_HAS_SNI
util.ssl_.HAS_SNI = orig_util_HAS_SNI
util.IS_SECURETRANSPORT = False util.IS_SECURETRANSPORT = False
util.ssl_.IS_SECURETRANSPORT = False util.ssl_.IS_SECURETRANSPORT = False
def _read_callback(connection_id, data_buffer, data_length_pointer): def _read_callback(
connection_id: int, data_buffer: int, data_length_pointer: bytearray
) -> int:
""" """
SecureTransport read callback. This is called by ST to request that data SecureTransport read callback. This is called by ST to request that data
be returned from the socket. be returned from the socket.
@ -232,7 +214,7 @@ def _read_callback(connection_id, data_buffer, data_length_pointer):
while read_count < requested_length: while read_count < requested_length:
if timeout is None or timeout >= 0: if timeout is None or timeout >= 0:
if not util.wait_for_read(base_socket, timeout): if not util.wait_for_read(base_socket, timeout):
raise socket.error(errno.EAGAIN, "timed out") raise OSError(errno.EAGAIN, "timed out")
remaining = requested_length - read_count remaining = requested_length - read_count
buffer = (ctypes.c_char * remaining).from_address( buffer = (ctypes.c_char * remaining).from_address(
@ -244,7 +226,7 @@ def _read_callback(connection_id, data_buffer, data_length_pointer):
if not read_count: if not read_count:
return SecurityConst.errSSLClosedGraceful return SecurityConst.errSSLClosedGraceful
break break
except (socket.error) as e: except OSError as e:
error = e.errno error = e.errno
if error is not None and error != errno.EAGAIN: if error is not None and error != errno.EAGAIN:
@ -265,7 +247,9 @@ def _read_callback(connection_id, data_buffer, data_length_pointer):
return SecurityConst.errSSLInternal return SecurityConst.errSSLInternal
def _write_callback(connection_id, data_buffer, data_length_pointer): def _write_callback(
connection_id: int, data_buffer: int, data_length_pointer: bytearray
) -> int:
""" """
SecureTransport write callback. This is called by ST to request that data SecureTransport write callback. This is called by ST to request that data
actually be sent on the network. actually be sent on the network.
@ -288,14 +272,14 @@ def _write_callback(connection_id, data_buffer, data_length_pointer):
while sent < bytes_to_write: while sent < bytes_to_write:
if timeout is None or timeout >= 0: if timeout is None or timeout >= 0:
if not util.wait_for_write(base_socket, timeout): if not util.wait_for_write(base_socket, timeout):
raise socket.error(errno.EAGAIN, "timed out") raise OSError(errno.EAGAIN, "timed out")
chunk_sent = base_socket.send(data) chunk_sent = base_socket.send(data)
sent += chunk_sent sent += chunk_sent
# This has some needless copying here, but I'm not sure there's # This has some needless copying here, but I'm not sure there's
# much value in optimising this data path. # much value in optimising this data path.
data = data[chunk_sent:] data = data[chunk_sent:]
except (socket.error) as e: except OSError as e:
error = e.errno error = e.errno
if error is not None and error != errno.EAGAIN: if error is not None and error != errno.EAGAIN:
@ -323,22 +307,20 @@ _read_callback_pointer = Security.SSLReadFunc(_read_callback)
_write_callback_pointer = Security.SSLWriteFunc(_write_callback) _write_callback_pointer = Security.SSLWriteFunc(_write_callback)
class WrappedSocket(object): class WrappedSocket:
""" """
API-compatibility wrapper for Python's OpenSSL wrapped socket object. API-compatibility wrapper for Python's OpenSSL wrapped socket object.
Note: _makefile_refs, _drop(), and _reuse() are needed for the garbage
collector of PyPy.
""" """
def __init__(self, socket): def __init__(self, socket: socket_cls) -> None:
self.socket = socket self.socket = socket
self.context = None self.context = None
self._makefile_refs = 0 self._io_refs = 0
self._closed = False self._closed = False
self._exception = None self._real_closed = False
self._exception: Exception | None = None
self._keychain = None self._keychain = None
self._keychain_dir = None self._keychain_dir: str | None = None
self._client_cert_chain = None self._client_cert_chain = None
# We save off the previously-configured timeout and then set it to # We save off the previously-configured timeout and then set it to
@ -350,7 +332,7 @@ class WrappedSocket(object):
self.socket.settimeout(0) self.socket.settimeout(0)
@contextlib.contextmanager @contextlib.contextmanager
def _raise_on_error(self): def _raise_on_error(self) -> typing.Generator[None, None, None]:
""" """
A context manager that can be used to wrap calls that do I/O from A context manager that can be used to wrap calls that do I/O from
SecureTransport. If any of the I/O callbacks hit an exception, this SecureTransport. If any of the I/O callbacks hit an exception, this
@ -367,23 +349,10 @@ class WrappedSocket(object):
yield yield
if self._exception is not None: if self._exception is not None:
exception, self._exception = self._exception, None exception, self._exception = self._exception, None
self.close() self._real_close()
raise exception raise exception
def _set_ciphers(self): def _set_alpn_protocols(self, protocols: list[bytes] | None) -> None:
"""
Sets up the allowed ciphers. By default this matches the set in
util.ssl_.DEFAULT_CIPHERS, at least as supported by macOS. This is done
custom and doesn't allow changing at this time, mostly because parsing
OpenSSL cipher strings is going to be a freaking nightmare.
"""
ciphers = (Security.SSLCipherSuite * len(CIPHER_SUITES))(*CIPHER_SUITES)
result = Security.SSLSetEnabledCiphers(
self.context, ciphers, len(CIPHER_SUITES)
)
_assert_no_error(result)
def _set_alpn_protocols(self, protocols):
""" """
Sets up the ALPN protocols on the context. Sets up the ALPN protocols on the context.
""" """
@ -396,7 +365,7 @@ class WrappedSocket(object):
finally: finally:
CoreFoundation.CFRelease(protocols_arr) CoreFoundation.CFRelease(protocols_arr)
def _custom_validate(self, verify, trust_bundle): def _custom_validate(self, verify: bool, trust_bundle: bytes | None) -> None:
""" """
Called when we have set custom validation. We do this in two cases: Called when we have set custom validation. We do this in two cases:
first, when cert validation is entirely disabled; and second, when first, when cert validation is entirely disabled; and second, when
@ -404,7 +373,7 @@ class WrappedSocket(object):
Raises an SSLError if the connection is not trusted. Raises an SSLError if the connection is not trusted.
""" """
# If we disabled cert validation, just say: cool. # If we disabled cert validation, just say: cool.
if not verify: if not verify or trust_bundle is None:
return return
successes = ( successes = (
@ -415,10 +384,12 @@ class WrappedSocket(object):
trust_result = self._evaluate_trust(trust_bundle) trust_result = self._evaluate_trust(trust_bundle)
if trust_result in successes: if trust_result in successes:
return return
reason = "error code: %d" % (trust_result,) reason = f"error code: {int(trust_result)}"
exc = None
except Exception as e: except Exception as e:
# Do not trust on error # Do not trust on error
reason = "exception: %r" % (e,) reason = f"exception: {e!r}"
exc = e
# SecureTransport does not send an alert nor shuts down the connection. # SecureTransport does not send an alert nor shuts down the connection.
rec = _build_tls_unknown_ca_alert(self.version()) rec = _build_tls_unknown_ca_alert(self.version())
@ -428,10 +399,10 @@ class WrappedSocket(object):
# l_linger = 0, linger for 0 seoncds # l_linger = 0, linger for 0 seoncds
opts = struct.pack("ii", 1, 0) opts = struct.pack("ii", 1, 0)
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, opts) self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, opts)
self.close() self._real_close()
raise ssl.SSLError("certificate verify failed, %s" % reason) raise ssl.SSLError(f"certificate verify failed, {reason}") from exc
def _evaluate_trust(self, trust_bundle): def _evaluate_trust(self, trust_bundle: bytes) -> int:
# We want data in memory, so load it up. # We want data in memory, so load it up.
if os.path.isfile(trust_bundle): if os.path.isfile(trust_bundle):
with open(trust_bundle, "rb") as f: with open(trust_bundle, "rb") as f:
@ -469,20 +440,20 @@ class WrappedSocket(object):
if cert_array is not None: if cert_array is not None:
CoreFoundation.CFRelease(cert_array) CoreFoundation.CFRelease(cert_array)
return trust_result.value return trust_result.value # type: ignore[no-any-return]
def handshake( def handshake(
self, self,
server_hostname, server_hostname: bytes | str | None,
verify, verify: bool,
trust_bundle, trust_bundle: bytes | None,
min_version, min_version: int,
max_version, max_version: int,
client_cert, client_cert: str | None,
client_key, client_key: str | None,
client_key_passphrase, client_key_passphrase: typing.Any,
alpn_protocols, alpn_protocols: list[bytes] | None,
): ) -> None:
""" """
Actually performs the TLS handshake. This is run automatically by Actually performs the TLS handshake. This is run automatically by
wrapped socket, and shouldn't be needed in user code. wrapped socket, and shouldn't be needed in user code.
@ -510,6 +481,8 @@ class WrappedSocket(object):
_assert_no_error(result) _assert_no_error(result)
# If we have a server hostname, we should set that too. # If we have a server hostname, we should set that too.
# RFC6066 Section 3 tells us not to use SNI when the host is an IP, but we have
# to do it anyway to match server_hostname against the server certificate
if server_hostname: if server_hostname:
if not isinstance(server_hostname, bytes): if not isinstance(server_hostname, bytes):
server_hostname = server_hostname.encode("utf-8") server_hostname = server_hostname.encode("utf-8")
@ -519,9 +492,6 @@ class WrappedSocket(object):
) )
_assert_no_error(result) _assert_no_error(result)
# Setup the ciphers.
self._set_ciphers()
# Setup the ALPN protocols. # Setup the ALPN protocols.
self._set_alpn_protocols(alpn_protocols) self._set_alpn_protocols(alpn_protocols)
@ -564,25 +534,27 @@ class WrappedSocket(object):
_assert_no_error(result) _assert_no_error(result)
break break
def fileno(self): def fileno(self) -> int:
return self.socket.fileno() return self.socket.fileno()
# Copy-pasted from Python 3.5 source code # Copy-pasted from Python 3.5 source code
def _decref_socketios(self): def _decref_socketios(self) -> None:
if self._makefile_refs > 0: if self._io_refs > 0:
self._makefile_refs -= 1 self._io_refs -= 1
if self._closed: if self._closed:
self.close() self.close()
def recv(self, bufsiz): def recv(self, bufsiz: int) -> bytes:
buffer = ctypes.create_string_buffer(bufsiz) buffer = ctypes.create_string_buffer(bufsiz)
bytes_read = self.recv_into(buffer, bufsiz) bytes_read = self.recv_into(buffer, bufsiz)
data = buffer[:bytes_read] data = buffer[:bytes_read]
return data return typing.cast(bytes, data)
def recv_into(self, buffer, nbytes=None): def recv_into(
self, buffer: ctypes.Array[ctypes.c_char], nbytes: int | None = None
) -> int:
# Read short on EOF. # Read short on EOF.
if self._closed: if self._real_closed:
return 0 return 0
if nbytes is None: if nbytes is None:
@ -615,7 +587,7 @@ class WrappedSocket(object):
# well. Note that we don't actually return here because in # well. Note that we don't actually return here because in
# principle this could actually be fired along with return data. # principle this could actually be fired along with return data.
# It's unlikely though. # It's unlikely though.
self.close() self._real_close()
else: else:
_assert_no_error(result) _assert_no_error(result)
@ -623,13 +595,13 @@ class WrappedSocket(object):
# was actually read. # was actually read.
return processed_bytes.value return processed_bytes.value
def settimeout(self, timeout): def settimeout(self, timeout: float) -> None:
self._timeout = timeout self._timeout = timeout
def gettimeout(self): def gettimeout(self) -> float | None:
return self._timeout return self._timeout
def send(self, data): def send(self, data: bytes) -> int:
processed_bytes = ctypes.c_size_t(0) processed_bytes = ctypes.c_size_t(0)
with self._raise_on_error(): with self._raise_on_error():
@ -646,20 +618,24 @@ class WrappedSocket(object):
# We sent, and probably succeeded. Tell them how much we sent. # We sent, and probably succeeded. Tell them how much we sent.
return processed_bytes.value return processed_bytes.value
def sendall(self, data): def sendall(self, data: bytes) -> None:
total_sent = 0 total_sent = 0
while total_sent < len(data): while total_sent < len(data):
sent = self.send(data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE]) sent = self.send(data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE])
total_sent += sent total_sent += sent
def shutdown(self): def shutdown(self) -> None:
with self._raise_on_error(): with self._raise_on_error():
Security.SSLClose(self.context) Security.SSLClose(self.context)
def close(self): def close(self) -> None:
# TODO: should I do clean shutdown here? Do I have to?
if self._makefile_refs < 1:
self._closed = True self._closed = True
# TODO: should I do clean shutdown here? Do I have to?
if self._io_refs <= 0:
self._real_close()
def _real_close(self) -> None:
self._real_closed = True
if self.context: if self.context:
CoreFoundation.CFRelease(self.context) CoreFoundation.CFRelease(self.context)
self.context = None self.context = None
@ -672,10 +648,8 @@ class WrappedSocket(object):
shutil.rmtree(self._keychain_dir) shutil.rmtree(self._keychain_dir)
self._keychain = self._keychain_dir = None self._keychain = self._keychain_dir = None
return self.socket.close() return self.socket.close()
else:
self._makefile_refs -= 1
def getpeercert(self, binary_form=False): def getpeercert(self, binary_form: bool = False) -> bytes | None:
# Urgh, annoying. # Urgh, annoying.
# #
# Here's how we do this: # Here's how we do this:
@ -733,7 +707,7 @@ class WrappedSocket(object):
return der_bytes return der_bytes
def version(self): def version(self) -> str:
protocol = Security.SSLProtocol() protocol = Security.SSLProtocol()
result = Security.SSLGetNegotiatedProtocolVersion( result = Security.SSLGetNegotiatedProtocolVersion(
self.context, ctypes.byref(protocol) self.context, ctypes.byref(protocol)
@ -752,55 +726,50 @@ class WrappedSocket(object):
elif protocol.value == SecurityConst.kSSLProtocol2: elif protocol.value == SecurityConst.kSSLProtocol2:
return "SSLv2" return "SSLv2"
else: else:
raise ssl.SSLError("Unknown TLS version: %r" % protocol) raise ssl.SSLError(f"Unknown TLS version: {protocol!r}")
def _reuse(self):
self._makefile_refs += 1
def _drop(self):
if self._makefile_refs < 1:
self.close()
else:
self._makefile_refs -= 1
if _fileobject: # Platform-specific: Python 2 def makefile(
self: socket_cls,
def makefile(self, mode, bufsize=-1): mode: (
self._makefile_refs += 1 Literal["r"] | Literal["w"] | Literal["rw"] | Literal["wr"] | Literal[""]
return _fileobject(self, mode, bufsize, close=True) ) = "r",
buffering: int | None = None,
else: # Platform-specific: Python 3 *args: typing.Any,
**kwargs: typing.Any,
def makefile(self, mode="r", buffering=None, *args, **kwargs): ) -> typing.BinaryIO | typing.TextIO:
# We disable buffering with SecureTransport because it conflicts with # We disable buffering with SecureTransport because it conflicts with
# the buffering that ST does internally (see issue #1153 for more). # the buffering that ST does internally (see issue #1153 for more).
buffering = 0 buffering = 0
return backport_makefile(self, mode, buffering, *args, **kwargs) return socket_cls.makefile(self, mode, buffering, *args, **kwargs)
WrappedSocket.makefile = makefile WrappedSocket.makefile = makefile # type: ignore[attr-defined]
class SecureTransportContext(object): class SecureTransportContext:
""" """
I am a wrapper class for the SecureTransport library, to translate the I am a wrapper class for the SecureTransport library, to translate the
interface of the standard library ``SSLContext`` object to calls into interface of the standard library ``SSLContext`` object to calls into
SecureTransport. SecureTransport.
""" """
def __init__(self, protocol): def __init__(self, protocol: int) -> None:
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
if protocol not in (None, ssl.PROTOCOL_TLS, ssl.PROTOCOL_TLS_CLIENT):
self._min_version, self._max_version = _protocol_to_min_max[protocol] self._min_version, self._max_version = _protocol_to_min_max[protocol]
self._options = 0 self._options = 0
self._verify = False self._verify = False
self._trust_bundle = None self._trust_bundle: bytes | None = None
self._client_cert = None self._client_cert: str | None = None
self._client_key = None self._client_key: str | None = None
self._client_key_passphrase = None self._client_key_passphrase = None
self._alpn_protocols = None self._alpn_protocols: list[bytes] | None = None
@property @property
def check_hostname(self): def check_hostname(self) -> Literal[True]:
""" """
SecureTransport cannot have its hostname checking disabled. For more, SecureTransport cannot have its hostname checking disabled. For more,
see the comment on getpeercert() in this file. see the comment on getpeercert() in this file.
@ -808,15 +777,14 @@ class SecureTransportContext(object):
return True return True
@check_hostname.setter @check_hostname.setter
def check_hostname(self, value): def check_hostname(self, value: typing.Any) -> None:
""" """
SecureTransport cannot have its hostname checking disabled. For more, SecureTransport cannot have its hostname checking disabled. For more,
see the comment on getpeercert() in this file. see the comment on getpeercert() in this file.
""" """
pass
@property @property
def options(self): def options(self) -> int:
# TODO: Well, crap. # TODO: Well, crap.
# #
# So this is the bit of the code that is the most likely to cause us # So this is the bit of the code that is the most likely to cause us
@ -826,19 +794,19 @@ class SecureTransportContext(object):
return self._options return self._options
@options.setter @options.setter
def options(self, value): def options(self, value: int) -> None:
# TODO: Update in line with above. # TODO: Update in line with above.
self._options = value self._options = value
@property @property
def verify_mode(self): def verify_mode(self) -> int:
return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE
@verify_mode.setter @verify_mode.setter
def verify_mode(self, value): def verify_mode(self, value: int) -> None:
self._verify = True if value == ssl.CERT_REQUIRED else False self._verify = value == ssl.CERT_REQUIRED
def set_default_verify_paths(self): def set_default_verify_paths(self) -> None:
# So, this has to do something a bit weird. Specifically, what it does # So, this has to do something a bit weird. Specifically, what it does
# is nothing. # is nothing.
# #
@ -850,15 +818,18 @@ class SecureTransportContext(object):
# ignoring it. # ignoring it.
pass pass
def load_default_certs(self): def load_default_certs(self) -> None:
return self.set_default_verify_paths() return self.set_default_verify_paths()
def set_ciphers(self, ciphers): def set_ciphers(self, ciphers: typing.Any) -> None:
# For now, we just require the default cipher string.
if ciphers != util.ssl_.DEFAULT_CIPHERS:
raise ValueError("SecureTransport doesn't support custom cipher strings") raise ValueError("SecureTransport doesn't support custom cipher strings")
def load_verify_locations(self, cafile=None, capath=None, cadata=None): def load_verify_locations(
self,
cafile: str | None = None,
capath: str | None = None,
cadata: bytes | None = None,
) -> None:
# OK, we only really support cadata and cafile. # OK, we only really support cadata and cafile.
if capath is not None: if capath is not None:
raise ValueError("SecureTransport does not support cert directories") raise ValueError("SecureTransport does not support cert directories")
@ -868,14 +839,19 @@ class SecureTransportContext(object):
with open(cafile): with open(cafile):
pass pass
self._trust_bundle = cafile or cadata self._trust_bundle = cafile or cadata # type: ignore[assignment]
def load_cert_chain(self, certfile, keyfile=None, password=None): def load_cert_chain(
self,
certfile: str,
keyfile: str | None = None,
password: str | None = None,
) -> None:
self._client_cert = certfile self._client_cert = certfile
self._client_key = keyfile self._client_key = keyfile
self._client_cert_passphrase = password self._client_cert_passphrase = password
def set_alpn_protocols(self, protocols): def set_alpn_protocols(self, protocols: list[str | bytes]) -> None:
""" """
Sets the ALPN protocols that will later be set on the context. Sets the ALPN protocols that will later be set on the context.
@ -885,16 +861,16 @@ class SecureTransportContext(object):
raise NotImplementedError( raise NotImplementedError(
"SecureTransport supports ALPN only in macOS 10.12+" "SecureTransport supports ALPN only in macOS 10.12+"
) )
self._alpn_protocols = [six.ensure_binary(p) for p in protocols] self._alpn_protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
def wrap_socket( def wrap_socket(
self, self,
sock, sock: socket_cls,
server_side=False, server_side: bool = False,
do_handshake_on_connect=True, do_handshake_on_connect: bool = True,
suppress_ragged_eofs=True, suppress_ragged_eofs: bool = True,
server_hostname=None, server_hostname: bytes | str | None = None,
): ) -> WrappedSocket:
# So, what do we do here? Firstly, we assert some properties. This is a # So, what do we do here? Firstly, we assert some properties. This is a
# stripped down shim, so there is some functionality we don't support. # stripped down shim, so there is some functionality we don't support.
# See PEP 543 for the real deal. # See PEP 543 for the real deal.
@ -911,11 +887,27 @@ class SecureTransportContext(object):
server_hostname, server_hostname,
self._verify, self._verify,
self._trust_bundle, self._trust_bundle,
self._min_version, _tls_version_to_st[self._minimum_version],
self._max_version, _tls_version_to_st[self._maximum_version],
self._client_cert, self._client_cert,
self._client_key, self._client_key,
self._client_key_passphrase, self._client_key_passphrase,
self._alpn_protocols, self._alpn_protocols,
) )
return wrapped_socket return wrapped_socket
@property
def minimum_version(self) -> int:
return self._minimum_version
@minimum_version.setter
def minimum_version(self, minimum_version: int) -> None:
self._minimum_version = minimum_version
@property
def maximum_version(self) -> int:
return self._maximum_version
@maximum_version.setter
def maximum_version(self, maximum_version: int) -> None:
self._maximum_version = maximum_version

View file

@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
""" """
This module contains provisional support for SOCKS proxies from within This module contains provisional support for SOCKS proxies from within
urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
@ -38,10 +37,11 @@ with the proxy:
proxy_url="socks5h://<username>:<password>@proxy-host" proxy_url="socks5h://<username>:<password>@proxy-host"
""" """
from __future__ import absolute_import
from __future__ import annotations
try: try:
import socks import socks # type: ignore[import]
except ImportError: except ImportError:
import warnings import warnings
@ -51,13 +51,13 @@ except ImportError:
( (
"SOCKS support in urllib3 requires the installation of optional " "SOCKS support in urllib3 requires the installation of optional "
"dependencies: specifically, PySocks. For more information, see " "dependencies: specifically, PySocks. For more information, see "
"https://urllib3.readthedocs.io/en/1.26.x/contrib.html#socks-proxies" "https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies"
), ),
DependencyWarning, DependencyWarning,
) )
raise raise
from socket import error as SocketError import typing
from socket import timeout as SocketTimeout from socket import timeout as SocketTimeout
from ..connection import HTTPConnection, HTTPSConnection from ..connection import HTTPConnection, HTTPSConnection
@ -69,7 +69,21 @@ from ..util.url import parse_url
try: try:
import ssl import ssl
except ImportError: except ImportError:
ssl = None ssl = None # type: ignore[assignment]
try:
from typing import TypedDict
class _TYPE_SOCKS_OPTIONS(TypedDict):
socks_version: int
proxy_host: str | None
proxy_port: str | None
username: str | None
password: str | None
rdns: bool
except ImportError: # Python 3.7
_TYPE_SOCKS_OPTIONS = typing.Dict[str, typing.Any] # type: ignore[misc, assignment]
class SOCKSConnection(HTTPConnection): class SOCKSConnection(HTTPConnection):
@ -77,15 +91,20 @@ class SOCKSConnection(HTTPConnection):
A plain-text HTTP connection that connects via a SOCKS proxy. A plain-text HTTP connection that connects via a SOCKS proxy.
""" """
def __init__(self, *args, **kwargs): def __init__(
self._socks_options = kwargs.pop("_socks_options") self,
super(SOCKSConnection, self).__init__(*args, **kwargs) _socks_options: _TYPE_SOCKS_OPTIONS,
*args: typing.Any,
**kwargs: typing.Any,
) -> None:
self._socks_options = _socks_options
super().__init__(*args, **kwargs)
def _new_conn(self): def _new_conn(self) -> socks.socksocket:
""" """
Establish a new connection via the SOCKS proxy. Establish a new connection via the SOCKS proxy.
""" """
extra_kw = {} extra_kw: dict[str, typing.Any] = {}
if self.source_address: if self.source_address:
extra_kw["source_address"] = self.source_address extra_kw["source_address"] = self.source_address
@ -102,15 +121,14 @@ class SOCKSConnection(HTTPConnection):
proxy_password=self._socks_options["password"], proxy_password=self._socks_options["password"],
proxy_rdns=self._socks_options["rdns"], proxy_rdns=self._socks_options["rdns"],
timeout=self.timeout, timeout=self.timeout,
**extra_kw **extra_kw,
) )
except SocketTimeout: except SocketTimeout as e:
raise ConnectTimeoutError( raise ConnectTimeoutError(
self, self,
"Connection to %s timed out. (connect timeout=%s)" f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
% (self.host, self.timeout), ) from e
)
except socks.ProxyError as e: except socks.ProxyError as e:
# This is fragile as hell, but it seems to be the only way to raise # This is fragile as hell, but it seems to be the only way to raise
@ -120,22 +138,23 @@ class SOCKSConnection(HTTPConnection):
if isinstance(error, SocketTimeout): if isinstance(error, SocketTimeout):
raise ConnectTimeoutError( raise ConnectTimeoutError(
self, self,
"Connection to %s timed out. (connect timeout=%s)" f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
% (self.host, self.timeout), ) from e
else:
# Adding `from e` messes with coverage somehow, so it's omitted.
# See #2386.
raise NewConnectionError(
self, f"Failed to establish a new connection: {error}"
) )
else: else:
raise NewConnectionError( raise NewConnectionError(
self, "Failed to establish a new connection: %s" % error self, f"Failed to establish a new connection: {e}"
) ) from e
else:
raise NewConnectionError(
self, "Failed to establish a new connection: %s" % e
)
except SocketError as e: # Defensive: PySocks should catch all these. except OSError as e: # Defensive: PySocks should catch all these.
raise NewConnectionError( raise NewConnectionError(
self, "Failed to establish a new connection: %s" % e self, f"Failed to establish a new connection: {e}"
) ) from e
return conn return conn
@ -169,12 +188,12 @@ class SOCKSProxyManager(PoolManager):
def __init__( def __init__(
self, self,
proxy_url, proxy_url: str,
username=None, username: str | None = None,
password=None, password: str | None = None,
num_pools=10, num_pools: int = 10,
headers=None, headers: typing.Mapping[str, str] | None = None,
**connection_pool_kw **connection_pool_kw: typing.Any,
): ):
parsed = parse_url(proxy_url) parsed = parse_url(proxy_url)
@ -195,7 +214,7 @@ class SOCKSProxyManager(PoolManager):
socks_version = socks.PROXY_TYPE_SOCKS4 socks_version = socks.PROXY_TYPE_SOCKS4
rdns = True rdns = True
else: else:
raise ValueError("Unable to determine SOCKS version from %s" % proxy_url) raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
self.proxy_url = proxy_url self.proxy_url = proxy_url
@ -209,8 +228,6 @@ class SOCKSProxyManager(PoolManager):
} }
connection_pool_kw["_socks_options"] = socks_options connection_pool_kw["_socks_options"] = socks_options
super(SOCKSProxyManager, self).__init__( super().__init__(num_pools, headers, **connection_pool_kw)
num_pools, headers, **connection_pool_kw
)
self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme

View file

@ -1,6 +1,16 @@
from __future__ import absolute_import from __future__ import annotations
from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead import socket
import typing
import warnings
from email.errors import MessageDefect
from http.client import IncompleteRead as httplib_IncompleteRead
if typing.TYPE_CHECKING:
from .connection import HTTPConnection
from .connectionpool import ConnectionPool
from .response import HTTPResponse
from .util.retry import Retry
# Base Exceptions # Base Exceptions
@ -8,23 +18,24 @@ from .packages.six.moves.http_client import IncompleteRead as httplib_Incomplete
class HTTPError(Exception): class HTTPError(Exception):
"""Base exception used by this module.""" """Base exception used by this module."""
pass
class HTTPWarning(Warning): class HTTPWarning(Warning):
"""Base warning used by this module.""" """Base warning used by this module."""
pass
_TYPE_REDUCE_RESULT = typing.Tuple[
typing.Callable[..., object], typing.Tuple[object, ...]
]
class PoolError(HTTPError): class PoolError(HTTPError):
"""Base exception for errors caused within a pool.""" """Base exception for errors caused within a pool."""
def __init__(self, pool, message): def __init__(self, pool: ConnectionPool, message: str) -> None:
self.pool = pool self.pool = pool
HTTPError.__init__(self, "%s: %s" % (pool, message)) super().__init__(f"{pool}: {message}")
def __reduce__(self): def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes. # For pickling purposes.
return self.__class__, (None, None) return self.__class__, (None, None)
@ -32,11 +43,11 @@ class PoolError(HTTPError):
class RequestError(PoolError): class RequestError(PoolError):
"""Base exception for PoolErrors that have associated URLs.""" """Base exception for PoolErrors that have associated URLs."""
def __init__(self, pool, url, message): def __init__(self, pool: ConnectionPool, url: str, message: str) -> None:
self.url = url self.url = url
PoolError.__init__(self, pool, message) super().__init__(pool, message)
def __reduce__(self): def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes. # For pickling purposes.
return self.__class__, (None, self.url, None) return self.__class__, (None, self.url, None)
@ -44,28 +55,25 @@ class RequestError(PoolError):
class SSLError(HTTPError): class SSLError(HTTPError):
"""Raised when SSL certificate fails in an HTTPS connection.""" """Raised when SSL certificate fails in an HTTPS connection."""
pass
class ProxyError(HTTPError): class ProxyError(HTTPError):
"""Raised when the connection to a proxy fails.""" """Raised when the connection to a proxy fails."""
def __init__(self, message, error, *args): # The original error is also available as __cause__.
super(ProxyError, self).__init__(message, error, *args) original_error: Exception
def __init__(self, message: str, error: Exception) -> None:
super().__init__(message, error)
self.original_error = error self.original_error = error
class DecodeError(HTTPError): class DecodeError(HTTPError):
"""Raised when automatic decoding based on Content-Type fails.""" """Raised when automatic decoding based on Content-Type fails."""
pass
class ProtocolError(HTTPError): class ProtocolError(HTTPError):
"""Raised when something unexpected happens mid-request/response.""" """Raised when something unexpected happens mid-request/response."""
pass
#: Renamed to ProtocolError but aliased for backwards compatibility. #: Renamed to ProtocolError but aliased for backwards compatibility.
ConnectionError = ProtocolError ConnectionError = ProtocolError
@ -79,33 +87,36 @@ class MaxRetryError(RequestError):
:param pool: The connection pool :param pool: The connection pool
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
:param string url: The requested Url :param str url: The requested Url
:param exceptions.Exception reason: The underlying error :param reason: The underlying error
:type reason: :class:`Exception`
""" """
def __init__(self, pool, url, reason=None): def __init__(
self, pool: ConnectionPool, url: str, reason: Exception | None = None
) -> None:
self.reason = reason self.reason = reason
message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason) message = f"Max retries exceeded with url: {url} (Caused by {reason!r})"
RequestError.__init__(self, pool, url, message) super().__init__(pool, url, message)
class HostChangedError(RequestError): class HostChangedError(RequestError):
"""Raised when an existing pool gets a request for a foreign host.""" """Raised when an existing pool gets a request for a foreign host."""
def __init__(self, pool, url, retries=3): def __init__(
message = "Tried to open a foreign host with url: %s" % url self, pool: ConnectionPool, url: str, retries: Retry | int = 3
RequestError.__init__(self, pool, url, message) ) -> None:
message = f"Tried to open a foreign host with url: {url}"
super().__init__(pool, url, message)
self.retries = retries self.retries = retries
class TimeoutStateError(HTTPError): class TimeoutStateError(HTTPError):
"""Raised when passing an invalid state to a timeout""" """Raised when passing an invalid state to a timeout"""
pass
class TimeoutError(HTTPError): class TimeoutError(HTTPError):
"""Raised when a socket timeout error occurs. """Raised when a socket timeout error occurs.
@ -114,53 +125,66 @@ class TimeoutError(HTTPError):
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`. <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
""" """
pass
class ReadTimeoutError(TimeoutError, RequestError): class ReadTimeoutError(TimeoutError, RequestError):
"""Raised when a socket timeout occurs while receiving data from a server""" """Raised when a socket timeout occurs while receiving data from a server"""
pass
# This timeout error does not have a URL attached and needs to inherit from the # This timeout error does not have a URL attached and needs to inherit from the
# base HTTPError # base HTTPError
class ConnectTimeoutError(TimeoutError): class ConnectTimeoutError(TimeoutError):
"""Raised when a socket timeout occurs while connecting to a server""" """Raised when a socket timeout occurs while connecting to a server"""
pass
class NewConnectionError(ConnectTimeoutError, HTTPError):
class NewConnectionError(ConnectTimeoutError, PoolError):
"""Raised when we fail to establish a new connection. Usually ECONNREFUSED.""" """Raised when we fail to establish a new connection. Usually ECONNREFUSED."""
pass def __init__(self, conn: HTTPConnection, message: str) -> None:
self.conn = conn
super().__init__(f"{conn}: {message}")
@property
def pool(self) -> HTTPConnection:
warnings.warn(
"The 'pool' property is deprecated and will be removed "
"in urllib3 v2.1.0. Use 'conn' instead.",
DeprecationWarning,
stacklevel=2,
)
return self.conn
class NameResolutionError(NewConnectionError):
"""Raised when host name resolution fails."""
def __init__(self, host: str, conn: HTTPConnection, reason: socket.gaierror):
message = f"Failed to resolve '{host}' ({reason})"
super().__init__(conn, message)
class EmptyPoolError(PoolError): class EmptyPoolError(PoolError):
"""Raised when a pool runs out of connections and no more are allowed.""" """Raised when a pool runs out of connections and no more are allowed."""
pass
class FullPoolError(PoolError):
"""Raised when we try to add a connection to a full pool in blocking mode."""
class ClosedPoolError(PoolError): class ClosedPoolError(PoolError):
"""Raised when a request enters a pool after the pool has been closed.""" """Raised when a request enters a pool after the pool has been closed."""
pass
class LocationValueError(ValueError, HTTPError): class LocationValueError(ValueError, HTTPError):
"""Raised when there is something wrong with a given URL input.""" """Raised when there is something wrong with a given URL input."""
pass
class LocationParseError(LocationValueError): class LocationParseError(LocationValueError):
"""Raised when get_host or similar fails to parse the URL input.""" """Raised when get_host or similar fails to parse the URL input."""
def __init__(self, location): def __init__(self, location: str) -> None:
message = "Failed to parse: %s" % location message = f"Failed to parse: {location}"
HTTPError.__init__(self, message) super().__init__(message)
self.location = location self.location = location
@ -168,9 +192,9 @@ class LocationParseError(LocationValueError):
class URLSchemeUnknown(LocationValueError): class URLSchemeUnknown(LocationValueError):
"""Raised when a URL input has an unsupported scheme.""" """Raised when a URL input has an unsupported scheme."""
def __init__(self, scheme): def __init__(self, scheme: str):
message = "Not supported URL scheme %s" % scheme message = f"Not supported URL scheme {scheme}"
super(URLSchemeUnknown, self).__init__(message) super().__init__(message)
self.scheme = scheme self.scheme = scheme
@ -185,38 +209,22 @@ class ResponseError(HTTPError):
class SecurityWarning(HTTPWarning): class SecurityWarning(HTTPWarning):
"""Warned when performing security reducing actions""" """Warned when performing security reducing actions"""
pass
class SubjectAltNameWarning(SecurityWarning):
"""Warned when connecting to a host with a certificate missing a SAN."""
pass
class InsecureRequestWarning(SecurityWarning): class InsecureRequestWarning(SecurityWarning):
"""Warned when making an unverified HTTPS request.""" """Warned when making an unverified HTTPS request."""
pass
class NotOpenSSLWarning(SecurityWarning):
"""Warned when using unsupported SSL library"""
class SystemTimeWarning(SecurityWarning): class SystemTimeWarning(SecurityWarning):
"""Warned when system time is suspected to be wrong""" """Warned when system time is suspected to be wrong"""
pass
class InsecurePlatformWarning(SecurityWarning): class InsecurePlatformWarning(SecurityWarning):
"""Warned when certain TLS/SSL configuration is not available on a platform.""" """Warned when certain TLS/SSL configuration is not available on a platform."""
pass
class SNIMissingWarning(HTTPWarning):
"""Warned when making a HTTPS request without SNI available."""
pass
class DependencyWarning(HTTPWarning): class DependencyWarning(HTTPWarning):
""" """
@ -224,14 +232,10 @@ class DependencyWarning(HTTPWarning):
dependencies. dependencies.
""" """
pass
class ResponseNotChunked(ProtocolError, ValueError): class ResponseNotChunked(ProtocolError, ValueError):
"""Response needs to be chunked in order to read it as chunks.""" """Response needs to be chunked in order to read it as chunks."""
pass
class BodyNotHttplibCompatible(HTTPError): class BodyNotHttplibCompatible(HTTPError):
""" """
@ -239,8 +243,6 @@ class BodyNotHttplibCompatible(HTTPError):
(have an fp attribute which returns raw chunks) for read_chunked(). (have an fp attribute which returns raw chunks) for read_chunked().
""" """
pass
class IncompleteRead(HTTPError, httplib_IncompleteRead): class IncompleteRead(HTTPError, httplib_IncompleteRead):
""" """
@ -250,12 +252,13 @@ class IncompleteRead(HTTPError, httplib_IncompleteRead):
for ``partial`` to avoid creating large objects on streamed reads. for ``partial`` to avoid creating large objects on streamed reads.
""" """
def __init__(self, partial, expected): def __init__(self, partial: int, expected: int) -> None:
super(IncompleteRead, self).__init__(partial, expected) self.partial = partial # type: ignore[assignment]
self.expected = expected
def __repr__(self): def __repr__(self) -> str:
return "IncompleteRead(%i bytes read, %i more expected)" % ( return "IncompleteRead(%i bytes read, %i more expected)" % (
self.partial, self.partial, # type: ignore[str-format]
self.expected, self.expected,
) )
@ -263,14 +266,13 @@ class IncompleteRead(HTTPError, httplib_IncompleteRead):
class InvalidChunkLength(HTTPError, httplib_IncompleteRead): class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
"""Invalid chunk length in a chunked response.""" """Invalid chunk length in a chunked response."""
def __init__(self, response, length): def __init__(self, response: HTTPResponse, length: bytes) -> None:
super(InvalidChunkLength, self).__init__( self.partial: int = response.tell() # type: ignore[assignment]
response.tell(), response.length_remaining self.expected: int | None = response.length_remaining
)
self.response = response self.response = response
self.length = length self.length = length
def __repr__(self): def __repr__(self) -> str:
return "InvalidChunkLength(got length %r, %i bytes read)" % ( return "InvalidChunkLength(got length %r, %i bytes read)" % (
self.length, self.length,
self.partial, self.partial,
@ -280,15 +282,13 @@ class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
class InvalidHeader(HTTPError): class InvalidHeader(HTTPError):
"""The header provided was somehow invalid.""" """The header provided was somehow invalid."""
pass
class ProxySchemeUnknown(AssertionError, URLSchemeUnknown): class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
"""ProxyManager does not support the supplied scheme""" """ProxyManager does not support the supplied scheme"""
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0. # TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
def __init__(self, scheme): def __init__(self, scheme: str | None) -> None:
# 'localhost' is here because our URL parser parses # 'localhost' is here because our URL parser parses
# localhost:8080 -> scheme=localhost, remove if we fix this. # localhost:8080 -> scheme=localhost, remove if we fix this.
if scheme == "localhost": if scheme == "localhost":
@ -296,28 +296,23 @@ class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
if scheme is None: if scheme is None:
message = "Proxy URL had no scheme, should start with http:// or https://" message = "Proxy URL had no scheme, should start with http:// or https://"
else: else:
message = ( message = f"Proxy URL had unsupported scheme {scheme}, should use http:// or https://"
"Proxy URL had unsupported scheme %s, should use http:// or https://" super().__init__(message)
% scheme
)
super(ProxySchemeUnknown, self).__init__(message)
class ProxySchemeUnsupported(ValueError): class ProxySchemeUnsupported(ValueError):
"""Fetching HTTPS resources through HTTPS proxies is unsupported""" """Fetching HTTPS resources through HTTPS proxies is unsupported"""
pass
class HeaderParsingError(HTTPError): class HeaderParsingError(HTTPError):
"""Raised by assert_header_parsing, but we convert it to a log.warning statement.""" """Raised by assert_header_parsing, but we convert it to a log.warning statement."""
def __init__(self, defects, unparsed_data): def __init__(
message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data) self, defects: list[MessageDefect], unparsed_data: bytes | str | None
super(HeaderParsingError, self).__init__(message) ) -> None:
message = f"{defects or 'Unknown'}, unparsed data: {unparsed_data!r}"
super().__init__(message)
class UnrewindableBodyError(HTTPError): class UnrewindableBodyError(HTTPError):
"""urllib3 encountered an error when trying to rewind a body""" """urllib3 encountered an error when trying to rewind a body"""
pass

View file

@ -1,13 +1,20 @@
from __future__ import absolute_import from __future__ import annotations
import email.utils import email.utils
import mimetypes import mimetypes
import re import typing
from .packages import six _TYPE_FIELD_VALUE = typing.Union[str, bytes]
_TYPE_FIELD_VALUE_TUPLE = typing.Union[
_TYPE_FIELD_VALUE,
typing.Tuple[str, _TYPE_FIELD_VALUE],
typing.Tuple[str, _TYPE_FIELD_VALUE, str],
]
def guess_content_type(filename, default="application/octet-stream"): def guess_content_type(
filename: str | None, default: str = "application/octet-stream"
) -> str:
""" """
Guess the "Content-Type" of a file. Guess the "Content-Type" of a file.
@ -21,7 +28,7 @@ def guess_content_type(filename, default="application/octet-stream"):
return default return default
def format_header_param_rfc2231(name, value): def format_header_param_rfc2231(name: str, value: _TYPE_FIELD_VALUE) -> str:
""" """
Helper function to format and quote a single header parameter using the Helper function to format and quote a single header parameter using the
strategy defined in RFC 2231. strategy defined in RFC 2231.
@ -34,14 +41,28 @@ def format_header_param_rfc2231(name, value):
The name of the parameter, a string expected to be ASCII only. The name of the parameter, a string expected to be ASCII only.
:param value: :param value:
The value of the parameter, provided as ``bytes`` or `str``. The value of the parameter, provided as ``bytes`` or `str``.
:ret: :returns:
An RFC-2231-formatted unicode string. An RFC-2231-formatted unicode string.
.. deprecated:: 2.0.0
Will be removed in urllib3 v2.1.0. This is not valid for
``multipart/form-data`` header parameters.
""" """
if isinstance(value, six.binary_type): import warnings
warnings.warn(
"'format_header_param_rfc2231' is deprecated and will be "
"removed in urllib3 v2.1.0. This is not valid for "
"multipart/form-data header parameters.",
DeprecationWarning,
stacklevel=2,
)
if isinstance(value, bytes):
value = value.decode("utf-8") value = value.decode("utf-8")
if not any(ch in value for ch in '"\\\r\n'): if not any(ch in value for ch in '"\\\r\n'):
result = u'%s="%s"' % (name, value) result = f'{name}="{value}"'
try: try:
result.encode("ascii") result.encode("ascii")
except (UnicodeEncodeError, UnicodeDecodeError): except (UnicodeEncodeError, UnicodeDecodeError):
@ -49,81 +70,87 @@ def format_header_param_rfc2231(name, value):
else: else:
return result return result
if six.PY2: # Python 2:
value = value.encode("utf-8")
# encode_rfc2231 accepts an encoded string and returns an ascii-encoded
# string in Python 2 but accepts and returns unicode strings in Python 3
value = email.utils.encode_rfc2231(value, "utf-8") value = email.utils.encode_rfc2231(value, "utf-8")
value = "%s*=%s" % (name, value) value = f"{name}*={value}"
if six.PY2: # Python 2:
value = value.decode("utf-8")
return value return value
_HTML5_REPLACEMENTS = { def format_multipart_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
u"\u0022": u"%22",
# Replace "\" with "\\".
u"\u005C": u"\u005C\u005C",
}
# All control characters from 0x00 to 0x1F *except* 0x1B.
_HTML5_REPLACEMENTS.update(
{
six.unichr(cc): u"%{:02X}".format(cc)
for cc in range(0x00, 0x1F + 1)
if cc not in (0x1B,)
}
)
def _replace_multiple(value, needles_and_replacements):
def replacer(match):
return needles_and_replacements[match.group(0)]
pattern = re.compile(
r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
)
result = pattern.sub(replacer, value)
return result
def format_header_param_html5(name, value):
""" """
Helper function to format and quote a single header parameter using the Format and quote a single multipart header parameter.
HTML5 strategy.
Particularly useful for header parameters which might contain This follows the `WHATWG HTML Standard`_ as of 2021/06/10, matching
non-ASCII values, like file names. This follows the `HTML5 Working Draft the behavior of current browser and curl versions. Values are
Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. assumed to be UTF-8. The ``\\n``, ``\\r``, and ``"`` characters are
percent encoded.
.. _HTML5 Working Draft Section 4.10.22.7: .. _WHATWG HTML Standard:
https://w3c.github.io/html/sec-forms.html#multipart-form-data https://html.spec.whatwg.org/multipage/
form-control-infrastructure.html#multipart-form-data
:param name: :param name:
The name of the parameter, a string expected to be ASCII only. The name of the parameter, an ASCII-only ``str``.
:param value: :param value:
The value of the parameter, provided as ``bytes`` or `str``. The value of the parameter, a ``str`` or UTF-8 encoded
:ret: ``bytes``.
A unicode string, stripped of troublesome characters. :returns:
A string ``name="value"`` with the escaped value.
.. versionchanged:: 2.0.0
Matches the WHATWG HTML Standard as of 2021/06/10. Control
characters are no longer percent encoded.
.. versionchanged:: 2.0.0
Renamed from ``format_header_param_html5`` and
``format_header_param``. The old names will be removed in
urllib3 v2.1.0.
""" """
if isinstance(value, six.binary_type): if isinstance(value, bytes):
value = value.decode("utf-8") value = value.decode("utf-8")
value = _replace_multiple(value, _HTML5_REPLACEMENTS) # percent encode \n \r "
value = value.translate({10: "%0A", 13: "%0D", 34: "%22"})
return u'%s="%s"' % (name, value) return f'{name}="{value}"'
# For backwards-compatibility. def format_header_param_html5(name: str, value: _TYPE_FIELD_VALUE) -> str:
format_header_param = format_header_param_html5 """
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param_html5' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
class RequestField(object): def format_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
class RequestField:
""" """
A data container for request body parameters. A data container for request body parameters.
@ -135,29 +162,47 @@ class RequestField(object):
An optional filename of the request field. Must be unicode. An optional filename of the request field. Must be unicode.
:param headers: :param headers:
An optional dict-like object of headers to initially use for the field. An optional dict-like object of headers to initially use for the field.
:param header_formatter:
An optional callable that is used to encode and format the headers. By .. versionchanged:: 2.0.0
default, this is :func:`format_header_param_html5`. The ``header_formatter`` parameter is deprecated and will
be removed in urllib3 v2.1.0.
""" """
def __init__( def __init__(
self, self,
name, name: str,
data, data: _TYPE_FIELD_VALUE,
filename=None, filename: str | None = None,
headers=None, headers: typing.Mapping[str, str] | None = None,
header_formatter=format_header_param_html5, header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
): ):
self._name = name self._name = name
self._filename = filename self._filename = filename
self.data = data self.data = data
self.headers = {} self.headers: dict[str, str | None] = {}
if headers: if headers:
self.headers = dict(headers) self.headers = dict(headers)
if header_formatter is not None:
import warnings
warnings.warn(
"The 'header_formatter' parameter is deprecated and "
"will be removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
self.header_formatter = header_formatter self.header_formatter = header_formatter
else:
self.header_formatter = format_multipart_header_param
@classmethod @classmethod
def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5): def from_tuples(
cls,
fieldname: str,
value: _TYPE_FIELD_VALUE_TUPLE,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
) -> RequestField:
""" """
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
@ -174,11 +219,19 @@ class RequestField(object):
Field names and filenames must be unicode. Field names and filenames must be unicode.
""" """
filename: str | None
content_type: str | None
data: _TYPE_FIELD_VALUE
if isinstance(value, tuple): if isinstance(value, tuple):
if len(value) == 3: if len(value) == 3:
filename, data, content_type = value filename, data, content_type = typing.cast(
typing.Tuple[str, _TYPE_FIELD_VALUE, str], value
)
else: else:
filename, data = value filename, data = typing.cast(
typing.Tuple[str, _TYPE_FIELD_VALUE], value
)
content_type = guess_content_type(filename) content_type = guess_content_type(filename)
else: else:
filename = None filename = None
@ -192,20 +245,29 @@ class RequestField(object):
return request_param return request_param
def _render_part(self, name, value): def _render_part(self, name: str, value: _TYPE_FIELD_VALUE) -> str:
""" """
Overridable helper function to format a single header parameter. By Override this method to change how each multipart header
default, this calls ``self.header_formatter``. parameter is formatted. By default, this calls
:func:`format_multipart_header_param`.
:param name: :param name:
The name of the parameter, a string expected to be ASCII only. The name of the parameter, an ASCII-only ``str``.
:param value: :param value:
The value of the parameter, provided as a unicode string. The value of the parameter, a ``str`` or UTF-8 encoded
""" ``bytes``.
:meta public:
"""
return self.header_formatter(name, value) return self.header_formatter(name, value)
def _render_parts(self, header_parts): def _render_parts(
self,
header_parts: (
dict[str, _TYPE_FIELD_VALUE | None]
| typing.Sequence[tuple[str, _TYPE_FIELD_VALUE | None]]
),
) -> str:
""" """
Helper function to format and quote a single header. Helper function to format and quote a single header.
@ -216,18 +278,21 @@ class RequestField(object):
A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`. as `k1="v1"; k2="v2"; ...`.
""" """
iterable: typing.Iterable[tuple[str, _TYPE_FIELD_VALUE | None]]
parts = [] parts = []
iterable = header_parts
if isinstance(header_parts, dict): if isinstance(header_parts, dict):
iterable = header_parts.items() iterable = header_parts.items()
else:
iterable = header_parts
for name, value in iterable: for name, value in iterable:
if value is not None: if value is not None:
parts.append(self._render_part(name, value)) parts.append(self._render_part(name, value))
return u"; ".join(parts) return "; ".join(parts)
def render_headers(self): def render_headers(self) -> str:
""" """
Renders the headers for this request field. Renders the headers for this request field.
""" """
@ -236,39 +301,45 @@ class RequestField(object):
sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"] sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
for sort_key in sort_keys: for sort_key in sort_keys:
if self.headers.get(sort_key, False): if self.headers.get(sort_key, False):
lines.append(u"%s: %s" % (sort_key, self.headers[sort_key])) lines.append(f"{sort_key}: {self.headers[sort_key]}")
for header_name, header_value in self.headers.items(): for header_name, header_value in self.headers.items():
if header_name not in sort_keys: if header_name not in sort_keys:
if header_value: if header_value:
lines.append(u"%s: %s" % (header_name, header_value)) lines.append(f"{header_name}: {header_value}")
lines.append(u"\r\n") lines.append("\r\n")
return u"\r\n".join(lines) return "\r\n".join(lines)
def make_multipart( def make_multipart(
self, content_disposition=None, content_type=None, content_location=None self,
): content_disposition: str | None = None,
content_type: str | None = None,
content_location: str | None = None,
) -> None:
""" """
Makes this request field into a multipart request field. Makes this request field into a multipart request field.
This method overrides "Content-Disposition", "Content-Type" and This method overrides "Content-Disposition", "Content-Type" and
"Content-Location" headers to the request parameter. "Content-Location" headers to the request parameter.
:param content_disposition:
The 'Content-Disposition' of the request body. Defaults to 'form-data'
:param content_type: :param content_type:
The 'Content-Type' of the request body. The 'Content-Type' of the request body.
:param content_location: :param content_location:
The 'Content-Location' of the request body. The 'Content-Location' of the request body.
""" """
self.headers["Content-Disposition"] = content_disposition or u"form-data" content_disposition = (content_disposition or "form-data") + "; ".join(
self.headers["Content-Disposition"] += u"; ".join(
[ [
u"", "",
self._render_parts( self._render_parts(
((u"name", self._name), (u"filename", self._filename)) (("name", self._name), ("filename", self._filename))
), ),
] ]
) )
self.headers["Content-Disposition"] = content_disposition
self.headers["Content-Type"] = content_type self.headers["Content-Type"] = content_type
self.headers["Content-Location"] = content_location self.headers["Content-Location"] = content_location

View file

@ -1,28 +1,32 @@
from __future__ import absolute_import from __future__ import annotations
import binascii import binascii
import codecs import codecs
import os import os
import typing
from io import BytesIO from io import BytesIO
from .fields import RequestField from .fields import _TYPE_FIELD_VALUE_TUPLE, RequestField
from .packages import six
from .packages.six import b
writer = codecs.lookup("utf-8")[3] writer = codecs.lookup("utf-8")[3]
_TYPE_FIELDS_SEQUENCE = typing.Sequence[
typing.Union[typing.Tuple[str, _TYPE_FIELD_VALUE_TUPLE], RequestField]
]
_TYPE_FIELDS = typing.Union[
_TYPE_FIELDS_SEQUENCE,
typing.Mapping[str, _TYPE_FIELD_VALUE_TUPLE],
]
def choose_boundary():
def choose_boundary() -> str:
""" """
Our embarrassingly-simple replacement for mimetools.choose_boundary. Our embarrassingly-simple replacement for mimetools.choose_boundary.
""" """
boundary = binascii.hexlify(os.urandom(16)) return binascii.hexlify(os.urandom(16)).decode()
if not six.PY2:
boundary = boundary.decode("ascii")
return boundary
def iter_field_objects(fields): def iter_field_objects(fields: _TYPE_FIELDS) -> typing.Iterable[RequestField]:
""" """
Iterate over fields. Iterate over fields.
@ -30,42 +34,29 @@ def iter_field_objects(fields):
:class:`~urllib3.fields.RequestField`. :class:`~urllib3.fields.RequestField`.
""" """
if isinstance(fields, dict): iterable: typing.Iterable[RequestField | tuple[str, _TYPE_FIELD_VALUE_TUPLE]]
i = six.iteritems(fields)
else:
i = iter(fields)
for field in i: if isinstance(fields, typing.Mapping):
iterable = fields.items()
else:
iterable = fields
for field in iterable:
if isinstance(field, RequestField): if isinstance(field, RequestField):
yield field yield field
else: else:
yield RequestField.from_tuples(*field) yield RequestField.from_tuples(*field)
def iter_fields(fields): def encode_multipart_formdata(
""" fields: _TYPE_FIELDS, boundary: str | None = None
.. deprecated:: 1.6 ) -> tuple[bytes, str]:
Iterate over fields.
The addition of :class:`~urllib3.fields.RequestField` makes this function
obsolete. Instead, use :func:`iter_field_objects`, which returns
:class:`~urllib3.fields.RequestField` objects.
Supports list of (k, v) tuples and dicts.
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
return ((k, v) for k, v in fields)
def encode_multipart_formdata(fields, boundary=None):
""" """
Encode a dictionary of ``fields`` using the multipart/form-data MIME format. Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
:param fields: :param fields:
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
Values are processed by :func:`urllib3.fields.RequestField.from_tuples`.
:param boundary: :param boundary:
If not specified, then a random boundary will be generated using If not specified, then a random boundary will be generated using
@ -76,7 +67,7 @@ def encode_multipart_formdata(fields, boundary=None):
boundary = choose_boundary() boundary = choose_boundary()
for field in iter_field_objects(fields): for field in iter_field_objects(fields):
body.write(b("--%s\r\n" % (boundary))) body.write(f"--{boundary}\r\n".encode("latin-1"))
writer(body).write(field.render_headers()) writer(body).write(field.render_headers())
data = field.data data = field.data
@ -84,15 +75,15 @@ def encode_multipart_formdata(fields, boundary=None):
if isinstance(data, int): if isinstance(data, int):
data = str(data) # Backwards compatibility data = str(data) # Backwards compatibility
if isinstance(data, six.text_type): if isinstance(data, str):
writer(body).write(data) writer(body).write(data)
else: else:
body.write(data) body.write(data)
body.write(b"\r\n") body.write(b"\r\n")
body.write(b("--%s--\r\n" % (boundary))) body.write(f"--{boundary}--\r\n".encode("latin-1"))
content_type = str("multipart/form-data; boundary=%s" % boundary) content_type = f"multipart/form-data; boundary={boundary}"
return body.getvalue(), content_type return body.getvalue(), content_type

Some files were not shown because too many files have changed in this diff Show more