Bump bleach from 6.0.0 to 6.1.0 (#2177)

* Bump bleach from 6.0.0 to 6.1.0 Bumps [bleach](https://github.com/mozilla/bleach) from 6.0.0 to 6.1.0. - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v6.0.0...v6.1.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update bleach==6.1.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-06 05:01:14 -07:00 · 2024-03-24 15:21:33 -07:00 · 2024-03-24 15:21:33 -07:00 · dbffb519f5
commit dbffb519f5
parent e307796475
4 changed files with 17 additions and 10 deletions
--- a/lib/bleach/init.py
+++ b/lib/bleach/init.py
@ -11,9 +11,9 @@ from bleach.sanitizer import (
 # yyyymmdd
-__releasedate__ = "20230123"
+__releasedate__ = "20231006"
 # x.y.z or x.y.z.dev0 -- semver
-__version__ = "6.0.0"
+__version__ = "6.1.0"
 __all__ = ["clean", "linkify"]
--- a/lib/bleach/html5lib_shim.py
+++ b/lib/bleach/html5lib_shim.py
@ -395,10 +395,17 @@ class BleachHTMLTokenizer(HTMLTokenizer):
                # followed by a series of characters. It's treated as a tag
                # name that abruptly ends, but we should treat that like
                # character data
-                yield {
+                yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
-                    "type": TAG_TOKEN_TYPE_CHARACTERS,
+            elif last_error_token["data"] in (
-                    "data": "<" + self.currentToken["name"],
+                "eof-in-attribute-name",
-                }
+                "eof-in-attribute-value-no-quotes",
            ):
                # Handle the case where the text being parsed ends with <
                # followed by a series of characters and then space and then
                # more characters. It's treated as a tag name followed by an
                # attribute that abruptly ends, but we should treat that like
                # character data.
                yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
            else:
                yield last_error_token
--- a/lib/bleach/linkifier.py
+++ b/lib/bleach/linkifier.py
@ -45,8 +45,8 @@ def build_url_re(tlds=TLDS, protocols=html5lib_shim.allowed_protocols):
        r"""\(*  # Match any opening parentheses.
        \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?  # http://
        ([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b   # xx.yy.tld(:##)?
-        (?:[/?][^\s\{{\}}\|\\\^\[\]`<>"]*)?
+        (?:[/?][^\s\{{\}}\|\\\^`<>"]*)?
-            # /path/zz (excluding "unsafe" chars from RFC 1738,
+            # /path/zz (excluding "unsafe" chars from RFC 3986,
            # except for # and ~, which happen in practice)
        """.format(
            "|".join(sorted(protocols)), "|".join(sorted(tlds))
@ -591,7 +591,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    in_a = False
                    token_buffer = []
                else:
-                    token_buffer.append(token)
+                    token_buffer.extend(list(self.extract_entities(token)))
                continue
            if token["type"] in ["StartTag", "EmptyTag"]:
--- a/requirements.txt
+++ b/requirements.txt
@ -4,7 +4,7 @@ backports.csv==1.0.7
 backports.functools-lru-cache==1.6.6
 backports.zoneinfo==0.2.1;python_version<"3.9"
 beautifulsoup4==4.12.2
-bleach==6.0.0
+bleach==6.1.0
 certifi==2023.7.22
 cheroot==10.0.0
 cherrypy==18.8.0