Update beautifulsoup4-4.10.0

This commit is contained in:
JonnyWong16 2021-10-14 20:46:06 -07:00
parent b581460b51
commit ab8fa4d5b3
No known key found for this signature in database
GPG key ID: B1F1F9807184697A
16 changed files with 4599 additions and 743 deletions

View file

@ -182,3 +182,45 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
soup = self.soup(markup, store_line_numbers=False)
self.assertEqual("sourceline", soup.p.sourceline.name)
self.assertEqual("sourcepos", soup.p.sourcepos.name)
def test_special_string_containers(self):
# The html5lib tree builder doesn't support this standard feature,
# because there's no way of knowing, when a string is created,
# where in the tree it will eventually end up.
pass
def test_html5_attributes(self):
# The html5lib TreeBuilder can convert any entity named in
# the HTML5 spec to a sequence of Unicode characters, and
# convert those Unicode characters to a (potentially
# different) named entity on the way out.
#
# This is a copy of the same test from
# HTMLParserTreeBuilderSmokeTest. It's not in the superclass
# because the lxml HTML TreeBuilder _doesn't_ work this way.
for input_element, output_unicode, output_element in (
("⇄", '\u21c4', b'⇄'),
('⊧', '\u22a7', b'⊧'),
('𝔑', '\U0001d511', b'𝔑'),
('≧̸', '\u2267\u0338', b'≧̸'),
('¬', '\xac', b'¬'),
('⫬', '\u2aec', b'⫬'),
('"', '"', b'"'),
('∴', '\u2234', b'∴'),
('∴', '\u2234', b'∴'),
('∴', '\u2234', b'∴'),
("fj", 'fj', b'fj'),
("⊔", '\u2294', b'⊔'),
("⊔︀", '\u2294\ufe00', b'⊔︀'),
("'", "'", b"'"),
("|", "|", b"|"),
):
markup = '<div>%s</div>' % input_element
div = self.soup(markup).div
without_element = div.encode()
expect = b"<div>%s</div>" % output_unicode.encode("utf8")
self.assertEqual(without_element, expect)
with_element = div.encode(formatter="html")
expect = b"<div>%s</div>" % output_element
self.assertEqual(with_element, expect)