From dbffb519f53fbd5565a55566a54e29a280608669 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 24 Mar 2024 15:21:33 -0700 Subject: [PATCH] Bump bleach from 6.0.0 to 6.1.0 (#2177) * Bump bleach from 6.0.0 to 6.1.0 Bumps [bleach](https://github.com/mozilla/bleach) from 6.0.0 to 6.1.0. - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v6.0.0...v6.1.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update bleach==6.1.0 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci] --- lib/bleach/__init__.py | 4 ++-- lib/bleach/html5lib_shim.py | 15 +++++++++++---- lib/bleach/linkifier.py | 6 +++--- requirements.txt | 2 +- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/lib/bleach/__init__.py b/lib/bleach/__init__.py index 4e87eb80..12e93b4d 100644 --- a/lib/bleach/__init__.py +++ b/lib/bleach/__init__.py @@ -11,9 +11,9 @@ from bleach.sanitizer import ( # yyyymmdd -__releasedate__ = "20230123" +__releasedate__ = "20231006" # x.y.z or x.y.z.dev0 -- semver -__version__ = "6.0.0" +__version__ = "6.1.0" __all__ = ["clean", "linkify"] diff --git a/lib/bleach/html5lib_shim.py b/lib/bleach/html5lib_shim.py index aa5189b1..ca1cc8c8 100644 --- a/lib/bleach/html5lib_shim.py +++ b/lib/bleach/html5lib_shim.py @@ -395,10 +395,17 @@ class BleachHTMLTokenizer(HTMLTokenizer): # followed by a series of characters. It's treated as a tag # name that abruptly ends, but we should treat that like # character data - yield { - "type": TAG_TOKEN_TYPE_CHARACTERS, - "data": "<" + self.currentToken["name"], - } + yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()} + elif last_error_token["data"] in ( + "eof-in-attribute-name", + "eof-in-attribute-value-no-quotes", + ): + # Handle the case where the text being parsed ends with < + # followed by a series of characters and then space and then + # more characters. It's treated as a tag name followed by an + # attribute that abruptly ends, but we should treat that like + # character data. + yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()} else: yield last_error_token diff --git a/lib/bleach/linkifier.py b/lib/bleach/linkifier.py index 679d7ead..8fcefb2c 100644 --- a/lib/bleach/linkifier.py +++ b/lib/bleach/linkifier.py @@ -45,8 +45,8 @@ def build_url_re(tlds=TLDS, protocols=html5lib_shim.allowed_protocols): r"""\(* # Match any opening parentheses. \b(?"]*)? - # /path/zz (excluding "unsafe" chars from RFC 1738, + (?:[/?][^\s\{{\}}\|\\\^`<>"]*)? + # /path/zz (excluding "unsafe" chars from RFC 3986, # except for # and ~, which happen in practice) """.format( "|".join(sorted(protocols)), "|".join(sorted(tlds)) @@ -591,7 +591,7 @@ class LinkifyFilter(html5lib_shim.Filter): in_a = False token_buffer = [] else: - token_buffer.append(token) + token_buffer.extend(list(self.extract_entities(token))) continue if token["type"] in ["StartTag", "EmptyTag"]: diff --git a/requirements.txt b/requirements.txt index 7b0b98a8..8c8e91f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ backports.csv==1.0.7 backports.functools-lru-cache==1.6.6 backports.zoneinfo==0.2.1;python_version<"3.9" beautifulsoup4==4.12.2 -bleach==6.0.0 +bleach==6.1.0 certifi==2023.7.22 cheroot==10.0.0 cherrypy==18.8.0