Bump beautifulsoup4 from 4.11.2 to 4.12.2 (#2037)

* Bump beautifulsoup4 from 4.11.2 to 4.12.2 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.11.2 to 4.12.2. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update beautifulsoup4==4.12.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-06 13:11:15 -07:00 · 2023-08-23 21:38:49 -07:00 · 2023-08-23 21:38:49 -07:00 · e70e08c3f5
commit e70e08c3f5
parent 1798594569
32 changed files with 1439 additions and 755 deletions
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit

 from bs4.builder import (
    DetectsXMLParsedAsHTML,
+    ParserRejectedMarkup,
    HTML,
    HTMLTreeBuilder,
    STRICT,
@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):

        self._initialize_xml_detector()

+    def error(self, message):
+        # NOTE: This method is required so long as Python 3.9 is
+        # supported. The corresponding code is removed from HTMLParser
+        # in 3.5, but not removed from ParserBase until 3.10.
+        # https://github.com/python/cpython/issues/76025
+        #
+        # The original implementation turned the error into a warning,
+        # but in every case I discovered, this made HTMLParser
+        # immediately crash with an error message that was less
+        # helpful than the warning. The new implementation makes it
+        # more clear that html.parser just can't parse this
+        # markup. The 3.10 implementation does the same, though it
+        # raises AssertionError rather than calling a method. (We
+        # catch this error and wrap it in a ParserRejectedMarkup.)
+        raise ParserRejectedMarkup(message)
+
    def handle_startendtag(self, name, attrs):
        """Handle an incoming empty-element tag.

@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
-        parser.feed(markup)
+        try:
+            parser.feed(markup)
+        except AssertionError as e:
+            # html.parser raises AssertionError in rare cases to
+            # indicate a fatal problem with the markup, especially
+            # when there's an error in the doctype declaration.
+            raise ParserRejectedMarkup(e)
        parser.close()
        parser.already_closed_empty_element = []