mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-08-20 05:13:21 -07:00
Bump beautifulsoup4 from 4.12.2 to 4.12.3 (#2267)
* Bump beautifulsoup4 from 4.12.2 to 4.12.3 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.12.2 to 4.12.3. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Update beautifulsoup4==4.12.3 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
faef9a94c4
commit
a0170a6f3d
25 changed files with 263 additions and 173 deletions
|
@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
|||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.12.2"
|
||||
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
|
||||
__version__ = "4.12.3"
|
||||
__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
|
|
|
@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object):
|
|||
XML_PREFIX_B = b'<?xml'
|
||||
|
||||
@classmethod
|
||||
def warn_if_markup_looks_like_xml(cls, markup):
|
||||
def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3):
|
||||
"""Perform a check on some markup to see if it looks like XML
|
||||
that's not XHTML. If so, issue a warning.
|
||||
|
||||
This is much less reliable than doing the check while parsing,
|
||||
but some of the tree builders can't do that.
|
||||
|
||||
:param stacklevel: The stacklevel of the code calling this
|
||||
function.
|
||||
|
||||
:return: True if the markup looks like non-XHTML XML, False
|
||||
otherwise.
|
||||
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
prefix = cls.XML_PREFIX_B
|
||||
|
@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object):
|
|||
and markup.startswith(prefix)
|
||||
and not looks_like_html.search(markup[:500])
|
||||
):
|
||||
cls._warn()
|
||||
cls._warn(stacklevel=stacklevel+2)
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _warn(cls):
|
||||
def _warn(cls, stacklevel=5):
|
||||
"""Issue a warning about XML being parsed as HTML."""
|
||||
warnings.warn(
|
||||
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning
|
||||
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning,
|
||||
stacklevel=stacklevel
|
||||
)
|
||||
|
||||
def _initialize_xml_detector(self):
|
||||
|
|
|
@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
|||
|
||||
# html5lib only parses HTML, so if it's given XML that's worth
|
||||
# noting.
|
||||
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
|
||||
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
|
||||
markup, stacklevel=3
|
||||
)
|
||||
|
||||
yield (markup, None, None, False)
|
||||
|
||||
|
|
|
@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
|||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
parser.close()
|
||||
except AssertionError as e:
|
||||
# html.parser raises AssertionError in rare cases to
|
||||
# indicate a fatal problem with the markup, especially
|
||||
# when there's an error in the doctype declaration.
|
||||
raise ParserRejectedMarkup(e)
|
||||
parser.close()
|
||||
parser.already_closed_empty_element = []
|
||||
|
|
|
@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
|||
self.processing_instruction_class = ProcessingInstruction
|
||||
# We're in HTML mode, so if we're given XML, that's worth
|
||||
# noting.
|
||||
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
|
||||
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
|
||||
markup, stacklevel=3
|
||||
)
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
|
|
|
@ -1356,7 +1356,7 @@ class Tag(PageElement):
|
|||
This is the first step in the deepcopy process.
|
||||
"""
|
||||
clone = type(self)(
|
||||
None, self.builder, self.name, self.namespace,
|
||||
None, None, self.name, self.namespace,
|
||||
self.prefix, self.attrs, is_xml=self._is_xml,
|
||||
sourceline=self.sourceline, sourcepos=self.sourcepos,
|
||||
can_be_empty_element=self.can_be_empty_element,
|
||||
|
@ -1845,6 +1845,11 @@ class Tag(PageElement):
|
|||
return space_before + s + space_after
|
||||
|
||||
def _format_tag(self, eventual_encoding, formatter, opening):
|
||||
if self.hidden:
|
||||
# A hidden tag is invisible, although its contents
|
||||
# are visible.
|
||||
return ''
|
||||
|
||||
# A tag starts with the < character (see below).
|
||||
|
||||
# Then the / character, if this is a closing tag.
|
||||
|
|
|
@ -51,7 +51,7 @@ class Formatter(EntitySubstitution):
|
|||
void_element_close_prefix='/', cdata_containing_tags=None,
|
||||
empty_attributes_are_booleans=False, indent=1,
|
||||
):
|
||||
"""Constructor.
|
||||
r"""Constructor.
|
||||
|
||||
:param language: This should be Formatter.XML if you are formatting
|
||||
XML markup and Formatter.HTML if you are formatting HTML markup.
|
||||
|
@ -76,7 +76,7 @@ class Formatter(EntitySubstitution):
|
|||
negative, or "" will only insert newlines. Using a
|
||||
positive integer indent indents that many spaces per
|
||||
level. If indent is a string (such as "\t"), that string
|
||||
is used to indent each level. The default behavior to
|
||||
is used to indent each level. The default behavior is to
|
||||
indent one space per level.
|
||||
"""
|
||||
self.language = language
|
||||
|
|
|
@ -1105,7 +1105,7 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
|
|||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
xmlns:ns2="http://example.com/ns2">
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
<20><> <20> <css
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
˙ ><applet></applet><applet></applet><apple|><applet><applet><appl›„><applet><applet></applet></applet></applet></applet><applet></applet><apple>t<applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet>et><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><azplet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><plet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet></applet></applet></applet></applet></appt></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet><<meta charset=utf-8>
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
- ˙˙ <math><select><mi><select><select>t
|
Binary file not shown.
|
@ -14,30 +14,75 @@ from bs4 import (
|
|||
BeautifulSoup,
|
||||
ParserRejectedMarkup,
|
||||
)
|
||||
try:
|
||||
from soupsieve.util import SelectorSyntaxError
|
||||
import lxml
|
||||
import html5lib
|
||||
fully_fuzzable = True
|
||||
except ImportError:
|
||||
fully_fuzzable = False
|
||||
|
||||
|
||||
@pytest.mark.skipif(not fully_fuzzable, reason="Prerequisites for fuzz tests are not installed.")
|
||||
class TestFuzz(object):
|
||||
|
||||
# Test case markup files from fuzzers are given this extension so
|
||||
# they can be included in builds.
|
||||
TESTCASE_SUFFIX = ".testcase"
|
||||
|
||||
# Copied 20230512 from
|
||||
# https://github.com/google/oss-fuzz/blob/4ac6a645a197a695fe76532251feb5067076b3f3/projects/bs4/bs4_fuzzer.py
|
||||
#
|
||||
# Copying the code lets us precisely duplicate the behavior of
|
||||
# oss-fuzz. The downside is that this code changes over time, so
|
||||
# multiple copies of the code must be kept around to run against
|
||||
# older tests. I'm not sure what to do about this, but I may
|
||||
# retire old tests after a time.
|
||||
def fuzz_test_with_css(self, filename):
|
||||
data = self.__markup(filename)
|
||||
parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
|
||||
try:
|
||||
idx = int(data[0]) % len(parsers)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
css_selector, data = data[1:10], data[10:]
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(data[1:], features=parsers[idx])
|
||||
except ParserRejectedMarkup:
|
||||
return
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
list(soup.find_all(True))
|
||||
try:
|
||||
soup.css.select(css_selector.decode('utf-8', 'replace'))
|
||||
except SelectorSyntaxError:
|
||||
return
|
||||
soup.prettify()
|
||||
|
||||
# This class of error has been fixed by catching a less helpful
|
||||
# exception from html.parser and raising ParserRejectedMarkup
|
||||
# instead.
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
|
||||
"crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a",
|
||||
]
|
||||
)
|
||||
def test_rejected_markup(self, filename):
|
||||
markup = self.__markup(filename)
|
||||
with pytest.raises(ParserRejectedMarkup):
|
||||
BeautifulSoup(markup, 'html.parser')
|
||||
|
||||
|
||||
# This class of error has to do with very deeply nested documents
|
||||
# which overflow the Python call stack when the tree is converted
|
||||
# to a string. This is an issue with Beautiful Soup which was fixed
|
||||
# as part of [bug=1471755].
|
||||
#
|
||||
# These test cases are in the older format that doesn't specify
|
||||
# which parser to use or give a CSS selector.
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
|
||||
|
@ -46,18 +91,44 @@ class TestFuzz(object):
|
|||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
|
||||
]
|
||||
)
|
||||
def test_deeply_nested_document(self, filename):
|
||||
def test_deeply_nested_document_without_css(self, filename):
|
||||
# Parsing the document and encoding it back to a string is
|
||||
# sufficient to demonstrate that the overflow problem has
|
||||
# been fixed.
|
||||
markup = self.__markup(filename)
|
||||
BeautifulSoup(markup, 'html.parser').encode()
|
||||
|
||||
# This class of error has to do with very deeply nested documents
|
||||
# which overflow the Python call stack when the tree is converted
|
||||
# to a string. This is an issue with Beautiful Soup which was fixed
|
||||
# as part of [bug=1471755].
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016",
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000",
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624",
|
||||
]
|
||||
)
|
||||
def test_deeply_nested_document(self, filename):
|
||||
self.fuzz_test_with_css(filename)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256",
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824",
|
||||
]
|
||||
)
|
||||
def test_soupsieve_errors(self, filename):
|
||||
self.fuzz_test_with_css(filename)
|
||||
|
||||
# This class of error represents problems with html5lib's parser,
|
||||
# not Beautiful Soup. I use
|
||||
# https://github.com/html5lib/html5lib-python/issues/568 to notify
|
||||
# the html5lib developers of these issues.
|
||||
@pytest.mark.skip("html5lib problems")
|
||||
#
|
||||
# These test cases are in the older format that doesn't specify
|
||||
# which parser to use or give a CSS selector.
|
||||
@pytest.mark.skip(reason="html5lib-specific problems")
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
|
||||
|
@ -68,7 +139,7 @@ class TestFuzz(object):
|
|||
|
||||
# b'-<math><sElect><mi><sElect><sElect>'
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
|
||||
|
||||
|
||||
# b'ñ<table><svg><html>'
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
|
||||
|
||||
|
@ -79,10 +150,24 @@ class TestFuzz(object):
|
|||
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
|
||||
]
|
||||
)
|
||||
def test_html5lib_parse_errors(self, filename):
|
||||
def test_html5lib_parse_errors_without_css(self, filename):
|
||||
markup = self.__markup(filename)
|
||||
print(BeautifulSoup(markup, 'html5lib').encode())
|
||||
|
||||
# This class of error represents problems with html5lib's parser,
|
||||
# not Beautiful Soup. I use
|
||||
# https://github.com/html5lib/html5lib-python/issues/568 to notify
|
||||
# the html5lib developers of these issues.
|
||||
@pytest.mark.skip(reason="html5lib-specific problems")
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
# b'- \xff\xff <math>\x10<select><mi><select><select>t'
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640",
|
||||
]
|
||||
)
|
||||
def test_html5lib_parse_errors(self, filename):
|
||||
self.fuzz_test_with_css(filename)
|
||||
|
||||
def __markup(self, filename):
|
||||
if not filename.endswith(self.TESTCASE_SUFFIX):
|
||||
filename += self.TESTCASE_SUFFIX
|
||||
|
|
|
@ -219,3 +219,16 @@ class TestMultiValuedAttributes(SoupTest):
|
|||
)
|
||||
assert soup.a['class'] == 'foo'
|
||||
assert soup.a['id'] == ['bar']
|
||||
|
||||
def test_hidden_tag_is_invisible(self):
|
||||
# Setting .hidden on a tag makes it invisible in output, but
|
||||
# leaves its contents visible.
|
||||
#
|
||||
# This is not a documented or supported feature of Beautiful
|
||||
# Soup (e.g. NavigableString doesn't support .hidden even
|
||||
# though it could), but some people use it and it's not
|
||||
# hurting anything to verify that it keeps working.
|
||||
#
|
||||
soup = self.soup('<div id="1"><span id="2">a string</span></div>')
|
||||
soup.span.hidden = True
|
||||
assert '<div id="1">a string</div>' == str(soup.div)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue