Update bleach==6.1.0

This commit is contained in:
JonnyWong16 2024-03-24 15:00:46 -07:00
commit b28128a7cb
No known key found for this signature in database
GPG key ID: B1F1F9807184697A
3 changed files with 16 additions and 9 deletions

View file

@ -11,9 +11,9 @@ from bleach.sanitizer import (
# yyyymmdd # yyyymmdd
__releasedate__ = "20230123" __releasedate__ = "20231006"
# x.y.z or x.y.z.dev0 -- semver # x.y.z or x.y.z.dev0 -- semver
__version__ = "6.0.0" __version__ = "6.1.0"
__all__ = ["clean", "linkify"] __all__ = ["clean", "linkify"]

View file

@ -395,10 +395,17 @@ class BleachHTMLTokenizer(HTMLTokenizer):
# followed by a series of characters. It's treated as a tag # followed by a series of characters. It's treated as a tag
# name that abruptly ends, but we should treat that like # name that abruptly ends, but we should treat that like
# character data # character data
yield { yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
"type": TAG_TOKEN_TYPE_CHARACTERS, elif last_error_token["data"] in (
"data": "<" + self.currentToken["name"], "eof-in-attribute-name",
} "eof-in-attribute-value-no-quotes",
):
# Handle the case where the text being parsed ends with <
# followed by a series of characters and then space and then
# more characters. It's treated as a tag name followed by an
# attribute that abruptly ends, but we should treat that like
# character data.
yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
else: else:
yield last_error_token yield last_error_token

View file

@ -45,8 +45,8 @@ def build_url_re(tlds=TLDS, protocols=html5lib_shim.allowed_protocols):
r"""\(* # Match any opening parentheses. r"""\(* # Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http:// \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http://
([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)? ([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
(?:[/?][^\s\{{\}}\|\\\^\[\]`<>"]*)? (?:[/?][^\s\{{\}}\|\\\^`<>"]*)?
# /path/zz (excluding "unsafe" chars from RFC 1738, # /path/zz (excluding "unsafe" chars from RFC 3986,
# except for # and ~, which happen in practice) # except for # and ~, which happen in practice)
""".format( """.format(
"|".join(sorted(protocols)), "|".join(sorted(tlds)) "|".join(sorted(protocols)), "|".join(sorted(tlds))
@ -591,7 +591,7 @@ class LinkifyFilter(html5lib_shim.Filter):
in_a = False in_a = False
token_buffer = [] token_buffer = []
else: else:
token_buffer.append(token) token_buffer.extend(list(self.extract_entities(token)))
continue continue
if token["type"] in ["StartTag", "EmptyTag"]: if token["type"] in ["StartTag", "EmptyTag"]: