Update bleach==6.1.0

2025-08-22 06:13:25 -07:00 · 2024-03-24 15:00:46 -07:00 · 2024-03-24 15:00:46 -07:00 · b28128a7cb
commit b28128a7cb
parent 6b5202baac
3 changed files with 16 additions and 9 deletions
--- a/lib/bleach/init.py
+++ b/lib/bleach/init.py
@ -11,9 +11,9 @@ from bleach.sanitizer import (
 # yyyymmdd
-__releasedate__ = "20230123"
+__releasedate__ = "20231006"
 # x.y.z or x.y.z.dev0 -- semver
-__version__ = "6.0.0"
+__version__ = "6.1.0"
 __all__ = ["clean", "linkify"]
--- a/lib/bleach/html5lib_shim.py
+++ b/lib/bleach/html5lib_shim.py
@ -395,10 +395,17 @@ class BleachHTMLTokenizer(HTMLTokenizer):
                # followed by a series of characters. It's treated as a tag
                # name that abruptly ends, but we should treat that like
                # character data
-                yield {
+                yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
-                    "type": TAG_TOKEN_TYPE_CHARACTERS,
+            elif last_error_token["data"] in (
-                    "data": "<" + self.currentToken["name"],
+                "eof-in-attribute-name",
-                }
+                "eof-in-attribute-value-no-quotes",
            ):
                # Handle the case where the text being parsed ends with <
                # followed by a series of characters and then space and then
                # more characters. It's treated as a tag name followed by an
                # attribute that abruptly ends, but we should treat that like
                # character data.
                yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
            else:
                yield last_error_token
--- a/lib/bleach/linkifier.py
+++ b/lib/bleach/linkifier.py
@ -45,8 +45,8 @@ def build_url_re(tlds=TLDS, protocols=html5lib_shim.allowed_protocols):
        r"""\(*  # Match any opening parentheses.
        \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?  # http://
        ([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b   # xx.yy.tld(:##)?
-        (?:[/?][^\s\{{\}}\|\\\^\[\]`<>"]*)?
+        (?:[/?][^\s\{{\}}\|\\\^`<>"]*)?
-            # /path/zz (excluding "unsafe" chars from RFC 1738,
+            # /path/zz (excluding "unsafe" chars from RFC 3986,
            # except for # and ~, which happen in practice)
        """.format(
            "|".join(sorted(protocols)), "|".join(sorted(tlds))
@ -591,7 +591,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    in_a = False
                    token_buffer = []
                else:
-                    token_buffer.append(token)
+                    token_buffer.extend(list(self.extract_entities(token)))
                continue
            if token["type"] in ["StartTag", "EmptyTag"]: