Bump beautifulsoup4 from 4.12.2 to 4.12.3 (#2267)

* Bump beautifulsoup4 from 4.12.2 to 4.12.3

Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.12.2 to 4.12.3.

---
updated-dependencies:
- dependency-name: beautifulsoup4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update beautifulsoup4==4.12.3

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2024-03-24 15:26:22 -07:00 committed by GitHub
commit a0170a6f3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 263 additions and 173 deletions

View file

@ -1105,7 +1105,7 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
doc = """<?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
xmlns:ns1="http://example.com/ns1"
xmlns:ns2="http://example.com/ns2"
xmlns:ns2="http://example.com/ns2">
<ns1:tag>foo</ns1:tag>
<ns1:tag>bar</ns1:tag>
<ns2:tag key="value">baz</ns2:tag>

View file

@ -0,0 +1 @@
˙ ><applet></applet><applet></applet><apple|><applet><applet><appl„><applet><applet></applet></applet></applet></applet><applet></applet><apple>t<applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet>et><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><azplet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><plet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet></applet></applet></applet></applet></appt></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet><<meta charset=utf-8>

View file

@ -0,0 +1 @@
- ˙˙ <math><select><mi><select><select>t

View file

@ -14,30 +14,75 @@ from bs4 import (
BeautifulSoup,
ParserRejectedMarkup,
)
try:
from soupsieve.util import SelectorSyntaxError
import lxml
import html5lib
fully_fuzzable = True
except ImportError:
fully_fuzzable = False
@pytest.mark.skipif(not fully_fuzzable, reason="Prerequisites for fuzz tests are not installed.")
class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so
# they can be included in builds.
TESTCASE_SUFFIX = ".testcase"
# Copied 20230512 from
# https://github.com/google/oss-fuzz/blob/4ac6a645a197a695fe76532251feb5067076b3f3/projects/bs4/bs4_fuzzer.py
#
# Copying the code lets us precisely duplicate the behavior of
# oss-fuzz. The downside is that this code changes over time, so
# multiple copies of the code must be kept around to run against
# older tests. I'm not sure what to do about this, but I may
# retire old tests after a time.
def fuzz_test_with_css(self, filename):
data = self.__markup(filename)
parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
try:
idx = int(data[0]) % len(parsers)
except ValueError:
return
css_selector, data = data[1:10], data[10:]
try:
soup = BeautifulSoup(data[1:], features=parsers[idx])
except ParserRejectedMarkup:
return
except ValueError:
return
list(soup.find_all(True))
try:
soup.css.select(css_selector.decode('utf-8', 'replace'))
except SelectorSyntaxError:
return
soup.prettify()
# This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup
# instead.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
"crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a",
]
)
def test_rejected_markup(self, filename):
markup = self.__markup(filename)
with pytest.raises(ParserRejectedMarkup):
BeautifulSoup(markup, 'html.parser')
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
#
# These test cases are in the older format that doesn't specify
# which parser to use or give a CSS selector.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
@ -46,18 +91,44 @@ class TestFuzz(object):
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
]
)
def test_deeply_nested_document(self, filename):
def test_deeply_nested_document_without_css(self, filename):
# Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has
# been fixed.
markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode()
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624",
]
)
def test_deeply_nested_document(self, filename):
self.fuzz_test_with_css(filename)
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824",
]
)
def test_soupsieve_errors(self, filename):
self.fuzz_test_with_css(filename)
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip("html5lib problems")
#
# These test cases are in the older format that doesn't specify
# which parser to use or give a CSS selector.
@pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize(
"filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
@ -68,7 +139,7 @@ class TestFuzz(object):
# b'-<math><sElect><mi><sElect><sElect>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
# b'ñ<table><svg><html>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
@ -79,10 +150,24 @@ class TestFuzz(object):
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
]
)
def test_html5lib_parse_errors(self, filename):
def test_html5lib_parse_errors_without_css(self, filename):
markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode())
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize(
"filename", [
# b'- \xff\xff <math>\x10<select><mi><select><select>t'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640",
]
)
def test_html5lib_parse_errors(self, filename):
self.fuzz_test_with_css(filename)
def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX

View file

@ -219,3 +219,16 @@ class TestMultiValuedAttributes(SoupTest):
)
assert soup.a['class'] == 'foo'
assert soup.a['id'] == ['bar']
def test_hidden_tag_is_invisible(self):
# Setting .hidden on a tag makes it invisible in output, but
# leaves its contents visible.
#
# This is not a documented or supported feature of Beautiful
# Soup (e.g. NavigableString doesn't support .hidden even
# though it could), but some people use it and it's not
# hurting anything to verify that it keeps working.
#
soup = self.soup('<div id="1"><span id="2">a string</span></div>')
soup.span.hidden = True
assert '<div id="1">a string</div>' == str(soup.div)