Bump bleach from 4.1.0 to 5.0.0 (#1708)

* Bump bleach from 4.1.0 to 5.0.0 Bumps [bleach](https://github.com/mozilla/bleach) from 4.1.0 to 5.0.0. - [Release notes](https://github.com/mozilla/bleach/releases) - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v4.1.0...v5.0.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * Update bleach==5.0.0 Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-07 13:41:15 -07:00 · 2022-05-16 20:41:47 -07:00 · 2022-05-16 20:41:47 -07:00 · a1fe0b04d7
commit a1fe0b04d7
parent d510e0f600
10 changed files with 264 additions and 151 deletions
--- a/lib/bleach/init.py
+++ b/lib/bleach/init.py
@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-import packaging.version
-
 from bleach.linkifier import (
    DEFAULT_CALLBACKS,
    Linker,
@ -9,17 +5,15 @@ from bleach.linkifier import (
 from bleach.sanitizer import (
    ALLOWED_ATTRIBUTES,
    ALLOWED_PROTOCOLS,
-    ALLOWED_STYLES,
    ALLOWED_TAGS,
    Cleaner,
 )


 # yyyymmdd
-__releasedate__ = "20210825"
+__releasedate__ = "20220407"
 # x.y.z or x.y.z.dev0 -- semver
-__version__ = "4.1.0"
-VERSION = packaging.version.Version(__version__)
+__version__ = "5.0.0"


 __all__ = ["clean", "linkify"]
@ -29,10 +23,10 @@ def clean(
    text,
    tags=ALLOWED_TAGS,
    attributes=ALLOWED_ATTRIBUTES,
-    styles=ALLOWED_STYLES,
    protocols=ALLOWED_PROTOCOLS,
    strip=False,
    strip_comments=True,
+    css_sanitizer=None,
 ):
    """Clean an HTML fragment of malicious content and return it

@ -64,9 +58,6 @@ def clean(
    :arg dict attributes: allowed attributes; can be a callable, list or dict;
        defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

-    :arg list styles: allowed list of css styles; defaults to
-        ``bleach.sanitizer.ALLOWED_STYLES``
-
    :arg list protocols: allowed list of protocols for links; defaults
        to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

@ -74,16 +65,19 @@ def clean(

    :arg bool strip_comments: whether or not to strip HTML comments

+    :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
+        sanitizing style attribute values and style text; defaults to None
+
    :returns: cleaned text as unicode

    """
    cleaner = Cleaner(
        tags=tags,
        attributes=attributes,
-        styles=styles,
        protocols=protocols,
        strip=strip,
        strip_comments=strip_comments,
+        css_sanitizer=css_sanitizer,
    )
    return cleaner.clean(text)

--- a/lib/bleach/_vendor/html5lib-1.1.dist-info/LICENSE
+++ b/lib/bleach/_vendor/html5lib-1.1.dist-info/LICENSE
@ -0,0 +1,20 @@
+Copyright (c) 2006-2013 James Graham and other contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/lib/bleach/_vendor/html5lib-1.1.dist-info/REQUESTED
+++ b/lib/bleach/_vendor/html5lib-1.1.dist-info/REQUESTED
--- a/lib/bleach/_vendor/parse.py.SHA256SUM
+++ b/lib/bleach/_vendor/parse.py.SHA256SUM
@ -0,0 +1 @@
+46af966e33b6247ae1d57d9459115a3eb46cda9f809c9f14e052abc2fe8dacb2  parse.py
--- a/lib/bleach/css_sanitizer.py
+++ b/lib/bleach/css_sanitizer.py
@ -0,0 +1,104 @@
+import tinycss2
+
+
+ALLOWED_CSS_PROPERTIES = frozenset(
+    (
+        "azimuth",
+        "background-color",
+        "border-bottom-color",
+        "border-collapse",
+        "border-color",
+        "border-left-color",
+        "border-right-color",
+        "border-top-color",
+        "clear",
+        "color",
+        "cursor",
+        "direction",
+        "display",
+        "elevation",
+        "float",
+        "font",
+        "font-family",
+        "font-size",
+        "font-style",
+        "font-variant",
+        "font-weight",
+        "height",
+        "letter-spacing",
+        "line-height",
+        "overflow",
+        "pause",
+        "pause-after",
+        "pause-before",
+        "pitch",
+        "pitch-range",
+        "richness",
+        "speak",
+        "speak-header",
+        "speak-numeral",
+        "speak-punctuation",
+        "speech-rate",
+        "stress",
+        "text-align",
+        "text-decoration",
+        "text-indent",
+        "unicode-bidi",
+        "vertical-align",
+        "voice-family",
+        "volume",
+        "white-space",
+        "width",
+    )
+)
+
+
+ALLOWED_SVG_PROPERTIES = frozenset(
+    (
+        "fill",
+        "fill-opacity",
+        "fill-rule",
+        "stroke",
+        "stroke-width",
+        "stroke-linecap",
+        "stroke-linejoin",
+        "stroke-opacity",
+    )
+)
+
+
+class CSSSanitizer:
+    def __init__(
+        self,
+        allowed_css_properties=ALLOWED_CSS_PROPERTIES,
+        allowed_svg_properties=ALLOWED_SVG_PROPERTIES,
+    ):
+        self.allowed_css_properties = allowed_css_properties
+        self.allowed_svg_properties = allowed_svg_properties
+
+    def sanitize_css(self, style):
+        """Sanitizes css in style tags"""
+        parsed = tinycss2.parse_declaration_list(style)
+
+        if not parsed:
+            return ""
+
+        new_tokens = []
+        for token in parsed:
+            if token.type == "declaration":
+                if (
+                    token.lower_name in self.allowed_css_properties
+                    or token.lower_name in self.allowed_svg_properties
+                ):
+                    new_tokens.append(token)
+            elif token.type in ("comment", "whitespace"):
+                if new_tokens and new_tokens[-1].type != token.type:
+                    new_tokens.append(token)
+
+            # NOTE(willkg): We currently don't handle AtRule or ParseError and
+            # so both get silently thrown out
+
+        if not new_tokens:
+            return ""
+
+        return tinycss2.serialize(new_tokens).strip()
--- a/lib/bleach/html5lib_shim.py
+++ b/lib/bleach/html5lib_shim.py
@ -36,6 +36,8 @@ from bleach._vendor.html5lib.filters.base import (
 )  # noqa: E402 module level import not at top of file
 from bleach._vendor.html5lib.filters.sanitizer import (
    allowed_protocols,
+    allowed_css_properties,
+    allowed_svg_properties,
 )  # noqa: E402 module level import not at top of file
 from bleach._vendor.html5lib.filters.sanitizer import (
    Filter as SanitizerFilter,
@ -68,8 +70,10 @@ TAG_TOKEN_TYPES = {
    constants.tokenTypes["EndTag"],
    constants.tokenTypes["EmptyTag"],
 }
-CHARACTERS_TYPE = constants.tokenTypes["Characters"]
-PARSEERROR_TYPE = constants.tokenTypes["ParseError"]
+TAG_TOKEN_TYPE_START = constants.tokenTypes["StartTag"]
+TAG_TOKEN_TYPE_END = constants.tokenTypes["EndTag"]
+TAG_TOKEN_TYPE_CHARACTERS = constants.tokenTypes["Characters"]
+TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]


 #: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
@ -190,6 +194,48 @@ HTML_TAGS = [
 ]


+#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
+#: from mozilla on 2019.07.11
+#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
+HTML_TAGS_BLOCK_LEVEL = frozenset(
+    [
+        "address",
+        "article",
+        "aside",
+        "blockquote",
+        "details",
+        "dialog",
+        "dd",
+        "div",
+        "dl",
+        "dt",
+        "fieldset",
+        "figcaption",
+        "figure",
+        "footer",
+        "form",
+        "h1",
+        "h2",
+        "h3",
+        "h4",
+        "h5",
+        "h6",
+        "header",
+        "hgroup",
+        "hr",
+        "li",
+        "main",
+        "nav",
+        "ol",
+        "p",
+        "pre",
+        "section",
+        "table",
+        "ul",
+    ]
+)
+
+
 class InputStreamWithMemory:
    """Wraps an HTMLInputStream to remember characters since last <

@ -257,17 +303,20 @@ class BleachHTMLTokenizer(HTMLTokenizer):
    """Tokenizer that doesn't consume character entities"""

    def __init__(self, consume_entities=False, **kwargs):
-        super(BleachHTMLTokenizer, self).__init__(**kwargs)
+        super().__init__(**kwargs)

        self.consume_entities = consume_entities

        # Wrap the stream with one that remembers the history
        self.stream = InputStreamWithMemory(self.stream)

+        # Remember the last token emitted; needed for block element spacing
+        self.emitted_last_token = None
+
    def __iter__(self):
        last_error_token = None

-        for token in super(BleachHTMLTokenizer, self).__iter__():
+        for token in super().__iter__():
            if last_error_token is not None:
                if (
                    last_error_token["data"] == "invalid-character-in-attribute-name"
@ -309,12 +358,12 @@ class BleachHTMLTokenizer(HTMLTokenizer):
                    # If this is not an allowed tag, then we convert it to
                    # characters and it'll get escaped in the sanitizer.
                    token["data"] = self.stream.get_tag()
-                    token["type"] = CHARACTERS_TYPE
+                    token["type"] = TAG_TOKEN_TYPE_CHARACTERS

                    last_error_token = None
                    yield token

-                elif token["type"] == PARSEERROR_TYPE:
+                elif token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
                    # If the token is a parse error, then let the last_error_token
                    # go, and make token the new last_error_token
                    yield last_error_token
@ -329,7 +378,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):

            # If the token is a ParseError, we hold on to it so we can get the
            # next token and potentially fix it.
-            if token["type"] == PARSEERROR_TYPE:
+            if token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
                last_error_token = token
                continue

@ -342,9 +391,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
        # If this tokenizer is set to consume entities, then we can let the
        # superclass do its thing.
        if self.consume_entities:
-            return super(BleachHTMLTokenizer, self).consumeEntity(
-                allowedChar, fromAttribute
-            )
+            return super().consumeEntity(allowedChar, fromAttribute)

        # If this tokenizer is set to not consume entities, then we don't want
        # to consume and convert them, so this overrides the html5lib tokenizer's
@ -356,7 +403,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
            self.currentToken["data"][-1][1] += "&"

        else:
-            self.tokenQueue.append({"type": CHARACTERS_TYPE, "data": "&"})
+            self.tokenQueue.append({"type": TAG_TOKEN_TYPE_CHARACTERS, "data": "&"})

    def tagOpenState(self):
        # This state marks a < that is either a StartTag, EndTag, EmptyTag,
@ -364,7 +411,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
        # we've collected so far and we do that by calling start_tag() on
        # the input stream wrapper.
        self.stream.start_tag()
-        return super(BleachHTMLTokenizer, self).tagOpenState()
+        return super().tagOpenState()

    def emitCurrentToken(self):
        token = self.currentToken
@ -378,9 +425,19 @@ class BleachHTMLTokenizer(HTMLTokenizer):
            # allowed list, then it gets stripped or escaped. In both of these
            # cases it gets converted to a Characters token.
            if self.parser.strip:
-                # If we're stripping the token, we just throw in an empty
-                # string token.
-                new_data = ""
+                if (
+                    self.emitted_last_token
+                    and token["type"] == TAG_TOKEN_TYPE_START
+                    and token["name"].lower() in HTML_TAGS_BLOCK_LEVEL
+                ):
+                    # If this is a block level tag we're stripping, we drop it
+                    # for a newline because that's what a browser would parse
+                    # it as
+                    new_data = "\n"
+                else:
+                    # For all other things being stripped, we throw in an empty
+                    # string token
+                    new_data = ""

            else:
                # If we're escaping the token, we want to escape the exact
@ -390,14 +447,15 @@ class BleachHTMLTokenizer(HTMLTokenizer):
                # string and use that.
                new_data = self.stream.get_tag()

-            new_token = {"type": CHARACTERS_TYPE, "data": new_data}
+            new_token = {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": new_data}

-            self.currentToken = new_token
+            self.currentToken = self.emitted_last_token = new_token
            self.tokenQueue.append(new_token)
            self.state = self.dataState
            return

-        super(BleachHTMLTokenizer, self).emitCurrentToken()
+        self.emitted_last_token = self.currentToken
+        super().emitCurrentToken()


 class BleachHTMLParser(HTMLParser):
@ -416,7 +474,7 @@ class BleachHTMLParser(HTMLParser):
        self.tags = [tag.lower() for tag in tags] if tags is not None else None
        self.strip = strip
        self.consume_entities = consume_entities
-        super(BleachHTMLParser, self).__init__(**kwargs)
+        super().__init__(**kwargs)

    def _parse(
        self, stream, innerHTML=False, container="div", scripting=True, **kwargs
@ -514,13 +572,13 @@ def convert_entities(text):
 def match_entity(stream):
    """Returns first entity in stream or None if no entity exists

-    Note: For Bleach purposes, entities must start with a "&" and end with
-    a ";". This ignoresambiguous character entities that have no ";" at the
-    end.
+    Note: For Bleach purposes, entities must start with a "&" and end with a
+    ";". This ignores ambiguous character entities that have no ";" at the end.

    :arg stream: the character stream

-    :returns: ``None`` or the entity string without "&" or ";"
+    :returns: the entity string without "&" or ";" if it's a valid character
+        entity; ``None`` otherwise

    """
    # Nix the & at the beginning
@ -559,9 +617,11 @@ def match_entity(stream):
    # Handle character entities
    while stream and stream[0] not in end_characters:
        c = stream.pop(0)
-        if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
-            break
        possible_entity += c
+        if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
+            # If it's not a prefix, then it's not an entity and we're
+            # out
+            return None

    if possible_entity and stream and stream[0] == ";":
        return possible_entity
@ -642,15 +702,14 @@ class BleachHTMLSerializer(HTMLSerializer):
        in_tag = False
        after_equals = False

-        for stoken in super(BleachHTMLSerializer, self).serialize(treewalker, encoding):
+        for stoken in super().serialize(treewalker, encoding):
            if in_tag:
                if stoken == ">":
                    in_tag = False

                elif after_equals:
                    if stoken != '"':
-                        for part in self.escape_base_amp(stoken):
-                            yield part
+                        yield from self.escape_base_amp(stoken)

                        after_equals = False
                        continue
--- a/lib/bleach/linkifier.py
+++ b/lib/bleach/linkifier.py
@ -2,7 +2,6 @@ import re

 from bleach import callbacks as linkify_callbacks
 from bleach import html5lib_shim
-from bleach.utils import alphabetize_attributes


 #: List of default callbacks
@ -155,7 +154,7 @@ class Linker:
            omit_optional_tags=False,
            # linkify does not sanitize
            sanitize=False,
-            # linkify alphabetizes
+            # linkify preserves attr order
            alphabetical_attributes=False,
        )

@ -228,7 +227,7 @@ class LinkifyFilter(html5lib_shim.Filter):
        :arg re email_re: email matching regex

        """
-        super(LinkifyFilter, self).__init__(source)
+        super().__init__(source)

        self.callbacks = callbacks or []
        self.skip_tags = skip_tags or []
@ -316,7 +315,6 @@ class LinkifyFilter(html5lib_shim.Filter):
                    else:
                        # Add an "a" tag for the new link
                        _text = attrs.pop("_text", "")
-                        attrs = alphabetize_attributes(attrs)
                        new_tokens.extend(
                            [
                                {"type": "StartTag", "name": "a", "data": attrs},
@ -332,8 +330,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    if end < len(text):
                        new_tokens.append({"type": "Characters", "data": text[end:]})

-                    for new_token in new_tokens:
-                        yield new_token
+                    yield from new_tokens

                    continue

@ -439,8 +436,6 @@ class LinkifyFilter(html5lib_shim.Filter):
                            new_tokens.append({"type": "Characters", "data": prefix})

                        _text = attrs.pop("_text", "")
-                        attrs = alphabetize_attributes(attrs)
-
                        new_tokens.extend(
                            [
                                {"type": "StartTag", "name": "a", "data": attrs},
@ -460,8 +455,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    if end < len(text):
                        new_tokens.append({"type": "Characters", "data": text[end:]})

-                    for new_token in new_tokens:
-                        yield new_token
+                    yield from new_tokens

                    continue

@ -493,14 +487,13 @@ class LinkifyFilter(html5lib_shim.Filter):

        else:
            new_text = attrs.pop("_text", "")
-            a_token["data"] = alphabetize_attributes(attrs)
+            a_token["data"] = attrs

            if text == new_text:
                # The callbacks didn't change the text, so we yield the new "a"
                # token, then whatever else was there, then the end "a" token
                yield a_token
-                for mem in token_buffer[1:]:
-                    yield mem
+                yield from token_buffer[1:]

            else:
                # If the callbacks changed the text, then we're going to drop
@ -516,7 +509,7 @@ class LinkifyFilter(html5lib_shim.Filter):

        token_buffer = []

-        for token in super(LinkifyFilter, self).__iter__():
+        for token in super().__iter__():
            if in_a:
                # Handle the case where we're in an "a" tag--we want to buffer tokens
                # until we hit an end "a" tag.
@ -524,8 +517,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    # Add the end tag to the token buffer and then handle them
                    # and yield anything returned
                    token_buffer.append(token)
-                    for new_token in self.handle_a_tag(token_buffer):
-                        yield new_token
+                    yield from self.handle_a_tag(token_buffer)

                    # Clear "a" related state and continue since we've yielded all
                    # the tokens we're going to yield
--- a/lib/bleach/sanitizer.py
+++ b/lib/bleach/sanitizer.py
@ -6,7 +6,6 @@ from bleach._vendor.parse import urlparse
 from xml.sax.saxutils import unescape

 from bleach import html5lib_shim
-from bleach.utils import alphabetize_attributes


 #: List of allowed tags
@ -33,9 +32,6 @@ ALLOWED_ATTRIBUTES = {
    "acronym": ["title"],
 }

-#: List of allowed styles
-ALLOWED_STYLES = []
-
 #: List of allowed protocols
 ALLOWED_PROTOCOLS = ["http", "https", "mailto"]

@ -85,11 +81,11 @@ class Cleaner:
        self,
        tags=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRIBUTES,
-        styles=ALLOWED_STYLES,
        protocols=ALLOWED_PROTOCOLS,
        strip=False,
        strip_comments=True,
        filters=None,
+        css_sanitizer=None,
    ):
        """Initializes a Cleaner

@ -99,9 +95,6 @@ class Cleaner:
        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

-        :arg list styles: allowed list of css styles; defaults to
-            ``bleach.sanitizer.ALLOWED_STYLES``
-
        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

@ -118,14 +111,17 @@ class Cleaner:
               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

+        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
+            sanitizing style attribute values and style text; defaults to None
+
        """
        self.tags = tags
        self.attributes = attributes
-        self.styles = styles
        self.protocols = protocols
        self.strip = strip
        self.strip_comments = strip_comments
        self.filters = filters or []
+        self.css_sanitizer = css_sanitizer

        self.parser = html5lib_shim.BleachHTMLParser(
            tags=self.tags,
@ -143,7 +139,7 @@ class Cleaner:
            resolve_entities=False,
            # Bleach has its own sanitizer, so don't use the html5lib one
            sanitize=False,
-            # Bleach sanitizer alphabetizes already, so don't use the html5lib one
+            # clean preserves attr order
            alphabetical_attributes=False,
        )

@ -175,11 +171,10 @@ class Cleaner:
            attributes=self.attributes,
            strip_disallowed_elements=self.strip,
            strip_html_comments=self.strip_comments,
+            css_sanitizer=self.css_sanitizer,
            # html5lib-sanitizer things
            allowed_elements=self.tags,
-            allowed_css_properties=self.styles,
            allowed_protocols=self.protocols,
-            allowed_svg_properties=[],
        )

        # Apply any filters after the BleachSanitizerFilter
@ -242,25 +237,25 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
    def __init__(
        self,
        source,
+        allowed_elements=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRIBUTES,
+        allowed_protocols=ALLOWED_PROTOCOLS,
        strip_disallowed_elements=False,
        strip_html_comments=True,
+        css_sanitizer=None,
        **kwargs,
    ):
        """Creates a BleachSanitizerFilter instance

        :arg Treewalker source: stream

-        :arg list tags: allowed list of tags; defaults to
+        :arg list allowed_elements: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

-        :arg list styles: allowed list of css styles; defaults to
-            ``bleach.sanitizer.ALLOWED_STYLES``
-
-        :arg list protocols: allowed list of protocols for links; defaults
+        :arg list allowed_protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip_disallowed_elements: whether or not to strip disallowed
@ -268,10 +263,14 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):

        :arg bool strip_html_comments: whether or not to strip HTML comments

+        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
+            sanitizing style attribute values and style text; defaults to None
+
        """
        self.attr_filter = attribute_filter_factory(attributes)
        self.strip_disallowed_elements = strip_disallowed_elements
        self.strip_html_comments = strip_html_comments
+        self.css_sanitizer = css_sanitizer

        # filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
        warnings.filterwarnings(
@ -280,7 +279,12 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
            category=DeprecationWarning,
            module="bleach._vendor.html5lib",
        )
-        return super(BleachSanitizerFilter, self).__init__(source, **kwargs)
+        return super().__init__(
+            source,
+            allowed_elements=allowed_elements,
+            allowed_protocols=allowed_protocols,
+            **kwargs,
+        )

    def sanitize_stream(self, token_iterator):
        for token in token_iterator:
@ -290,8 +294,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                continue

            if isinstance(ret, list):
-                for subtoken in ret:
-                    yield subtoken
+                yield from ret
            else:
                yield ret

@ -358,10 +361,6 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                return None

            else:
-                if "data" in token:
-                    # Alphabetize the attributes before calling .disallowed_token()
-                    # so that the resulting string is stable
-                    token["data"] = alphabetize_attributes(token["data"])
                return self.disallowed_token(token)

        elif token_type == "Comment":
@ -547,12 +546,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):

                # If it's a style attribute, sanitize it
                if namespaced_name == (None, "style"):
-                    val = self.sanitize_css(val)
+                    if self.css_sanitizer:
+                        val = self.css_sanitizer.sanitize_css(val)
+                    else:
+                        # FIXME(willkg): if style is allowed, but no
+                        # css_sanitizer was set up, then this is probably a
+                        # mistake and we should raise an error here
+                        #
+                        # For now, we're going to set the value to "" because
+                        # there was no sanitizer set
+                        val = ""

                # At this point, we want to keep the attribute, so add it in
                attrs[namespaced_name] = val

-            token["data"] = alphabetize_attributes(attrs)
+            token["data"] = attrs

        return token

@ -575,7 +583,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                if ns is None or ns not in html5lib_shim.prefixes:
                    namespaced_name = name
                else:
-                    namespaced_name = "%s:%s" % (html5lib_shim.prefixes[ns], name)
+                    namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)

                attrs.append(
                    ' %s="%s"'
@ -587,7 +595,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                        v,
                    )
                )
-            token["data"] = "<%s%s>" % (token["name"], "".join(attrs))
+            token["data"] = "<{}{}>".format(token["name"], "".join(attrs))

        else:
            token["data"] = "<%s>" % token["name"]
@ -599,47 +607,3 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):

        del token["name"]
        return token
-
-    def sanitize_css(self, style):
-        """Sanitizes css in style tags"""
-        # Convert entities in the style so that it can be parsed as CSS
-        style = html5lib_shim.convert_entities(style)
-
-        # Drop any url values before we do anything else
-        style = re.compile(r"url\s*\(\s*[^\s)]+?\s*\)\s*").sub(" ", style)
-
-        # The gauntlet of sanitization
-
-        # Validate the css in the style tag and if it's not valid, then drop
-        # the whole thing.
-        parts = style.split(";")
-        gauntlet = re.compile(
-            r"""^(  # consider a style attribute value as composed of:
-[/:,#%!.\s\w]    # a non-newline character
-|\w-\w           # 3 characters in the form \w-\w
-|'[\s\w]+'\s*    # a single quoted string of [\s\w]+ with trailing space
-|"[\s\w]+"       # a double quoted string of [\s\w]+
-|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
-)*$""",  # ... percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'
-            flags=re.U | re.VERBOSE,
-        )
-
-        for part in parts:
-            if not gauntlet.match(part):
-                return ""
-
-        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
-            return ""
-
-        clean = []
-        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
-            if not value:
-                continue
-
-            if prop.lower() in self.allowed_css_properties:
-                clean.append(prop + ": " + value + ";")
-
-            elif prop.lower() in self.allowed_svg_properties:
-                clean.append(prop + ": " + value + ";")
-
-        return " ".join(clean)
--- a/lib/bleach/utils.py
+++ b/lib/bleach/utils.py
@ -1,21 +0,0 @@
-from collections import OrderedDict
-
-
-def _attr_key(attr):
-    """Returns appropriate key for sorting attribute names
-
-    Attribute names are a tuple of ``(namespace, name)`` where namespace can be
-    ``None`` or a string. These can't be compared in Python 3, so we conver the
-    ``None`` to an empty string.
-
-    """
-    key = (attr[0][0] or ""), attr[0][1]
-    return key
-
-
-def alphabetize_attributes(attrs):
-    """Takes a dict of attributes (or None) and returns them alphabetized"""
-    if not attrs:
-        return attrs
-
-    return OrderedDict([(k, v) for k, v in sorted(attrs.items(), key=_attr_key)])
--- a/requirements.txt
+++ b/requirements.txt
@ -5,7 +5,7 @@ backports.csv==1.0.7
 backports.functools-lru-cache==1.6.4
 backports.zoneinfo==0.2.1
 beautifulsoup4==4.10.0
-bleach==4.1.0
+bleach==5.0.0
 certifi==2021.10.8
 cheroot==8.6.0
 cherrypy==18.6.1
				`@ -0,0 +1 @@`
				`46af966e33b6247ae1d57d9459115a3eb46cda9f809c9f14e052abc2fe8dacb2 parse.py`