Bump bleach from 4.1.0 to 5.0.0 (#1708)

* Bump bleach from 4.1.0 to 5.0.0 Bumps [bleach](https://github.com/mozilla/bleach) from 4.1.0 to 5.0.0. - [Release notes](https://github.com/mozilla/bleach/releases) - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v4.1.0...v5.0.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * Update bleach==5.0.0 Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-06 21:21:15 -07:00 · 2022-05-16 20:41:47 -07:00 · 2022-05-16 20:41:47 -07:00 · a1fe0b04d7
commit a1fe0b04d7
parent d510e0f600
10 changed files with 264 additions and 151 deletions
--- a/lib/bleach/init.py
+++ b/lib/bleach/init.py
@ -1,7 +1,3 @@
 # -*- coding: utf-8 -*-
 import packaging.version
 from bleach.linkifier import (
    DEFAULT_CALLBACKS,
    Linker,
@ -9,17 +5,15 @@ from bleach.linkifier import (
 from bleach.sanitizer import (
    ALLOWED_ATTRIBUTES,
    ALLOWED_PROTOCOLS,
    ALLOWED_STYLES,
    ALLOWED_TAGS,
    Cleaner,
 )
 # yyyymmdd
-__releasedate__ = "20210825"
+__releasedate__ = "20220407"
 # x.y.z or x.y.z.dev0 -- semver
-__version__ = "4.1.0"
+__version__ = "5.0.0"
 VERSION = packaging.version.Version(__version__)
 __all__ = ["clean", "linkify"]
@ -29,10 +23,10 @@ def clean(
    text,
    tags=ALLOWED_TAGS,
    attributes=ALLOWED_ATTRIBUTES,
    styles=ALLOWED_STYLES,
    protocols=ALLOWED_PROTOCOLS,
    strip=False,
    strip_comments=True,
    css_sanitizer=None,
 ):
    """Clean an HTML fragment of malicious content and return it
@ -64,9 +58,6 @@ def clean(
    :arg dict attributes: allowed attributes; can be a callable, list or dict;
        defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
    :arg list styles: allowed list of css styles; defaults to
        ``bleach.sanitizer.ALLOWED_STYLES``
    :arg list protocols: allowed list of protocols for links; defaults
        to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
@ -74,16 +65,19 @@ def clean(
    :arg bool strip_comments: whether or not to strip HTML comments
    :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
        sanitizing style attribute values and style text; defaults to None
    :returns: cleaned text as unicode
    """
    cleaner = Cleaner(
        tags=tags,
        attributes=attributes,
        styles=styles,
        protocols=protocols,
        strip=strip,
        strip_comments=strip_comments,
        css_sanitizer=css_sanitizer,
    )
    return cleaner.clean(text)
--- a/lib/bleach/_vendor/html5lib-1.1.dist-info/LICENSE
+++ b/lib/bleach/_vendor/html5lib-1.1.dist-info/LICENSE
@ -0,0 +1,20 @@
 Copyright (c) 2006-2013 James Graham and other contributors
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be
 included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/lib/bleach/_vendor/html5lib-1.1.dist-info/REQUESTED
+++ b/lib/bleach/_vendor/html5lib-1.1.dist-info/REQUESTED
--- a/lib/bleach/_vendor/parse.py.SHA256SUM
+++ b/lib/bleach/_vendor/parse.py.SHA256SUM
@ -0,0 +1 @@
 46af966e33b6247ae1d57d9459115a3eb46cda9f809c9f14e052abc2fe8dacb2  parse.py
--- a/lib/bleach/css_sanitizer.py
+++ b/lib/bleach/css_sanitizer.py
@ -0,0 +1,104 @@
 import tinycss2
 ALLOWED_CSS_PROPERTIES = frozenset(
    (
        "azimuth",
        "background-color",
        "border-bottom-color",
        "border-collapse",
        "border-color",
        "border-left-color",
        "border-right-color",
        "border-top-color",
        "clear",
        "color",
        "cursor",
        "direction",
        "display",
        "elevation",
        "float",
        "font",
        "font-family",
        "font-size",
        "font-style",
        "font-variant",
        "font-weight",
        "height",
        "letter-spacing",
        "line-height",
        "overflow",
        "pause",
        "pause-after",
        "pause-before",
        "pitch",
        "pitch-range",
        "richness",
        "speak",
        "speak-header",
        "speak-numeral",
        "speak-punctuation",
        "speech-rate",
        "stress",
        "text-align",
        "text-decoration",
        "text-indent",
        "unicode-bidi",
        "vertical-align",
        "voice-family",
        "volume",
        "white-space",
        "width",
    )
 )
 ALLOWED_SVG_PROPERTIES = frozenset(
    (
        "fill",
        "fill-opacity",
        "fill-rule",
        "stroke",
        "stroke-width",
        "stroke-linecap",
        "stroke-linejoin",
        "stroke-opacity",
    )
 )
 class CSSSanitizer:
    def __init__(
        self,
        allowed_css_properties=ALLOWED_CSS_PROPERTIES,
        allowed_svg_properties=ALLOWED_SVG_PROPERTIES,
    ):
        self.allowed_css_properties = allowed_css_properties
        self.allowed_svg_properties = allowed_svg_properties
    def sanitize_css(self, style):
        """Sanitizes css in style tags"""
        parsed = tinycss2.parse_declaration_list(style)
        if not parsed:
            return ""
        new_tokens = []
        for token in parsed:
            if token.type == "declaration":
                if (
                    token.lower_name in self.allowed_css_properties
                    or token.lower_name in self.allowed_svg_properties
                ):
                    new_tokens.append(token)
            elif token.type in ("comment", "whitespace"):
                if new_tokens and new_tokens[-1].type != token.type:
                    new_tokens.append(token)
            # NOTE(willkg): We currently don't handle AtRule or ParseError and
            # so both get silently thrown out
        if not new_tokens:
            return ""
        return tinycss2.serialize(new_tokens).strip()
--- a/lib/bleach/html5lib_shim.py
+++ b/lib/bleach/html5lib_shim.py
@ -36,6 +36,8 @@ from bleach._vendor.html5lib.filters.base import (
 )  # noqa: E402 module level import not at top of file
 from bleach._vendor.html5lib.filters.sanitizer import (
    allowed_protocols,
    allowed_css_properties,
    allowed_svg_properties,
 )  # noqa: E402 module level import not at top of file
 from bleach._vendor.html5lib.filters.sanitizer import (
    Filter as SanitizerFilter,
@ -68,8 +70,10 @@ TAG_TOKEN_TYPES = {
    constants.tokenTypes["EndTag"],
    constants.tokenTypes["EmptyTag"],
 }
-CHARACTERS_TYPE = constants.tokenTypes["Characters"]
+TAG_TOKEN_TYPE_START = constants.tokenTypes["StartTag"]
-PARSEERROR_TYPE = constants.tokenTypes["ParseError"]
+TAG_TOKEN_TYPE_END = constants.tokenTypes["EndTag"]
 TAG_TOKEN_TYPE_CHARACTERS = constants.tokenTypes["Characters"]
 TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]
 #: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
@ -190,6 +194,48 @@ HTML_TAGS = [
 ]
 #: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
 #: from mozilla on 2019.07.11
 #: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
 HTML_TAGS_BLOCK_LEVEL = frozenset(
    [
        "address",
        "article",
        "aside",
        "blockquote",
        "details",
        "dialog",
        "dd",
        "div",
        "dl",
        "dt",
        "fieldset",
        "figcaption",
        "figure",
        "footer",
        "form",
        "h1",
        "h2",
        "h3",
        "h4",
        "h5",
        "h6",
        "header",
        "hgroup",
        "hr",
        "li",
        "main",
        "nav",
        "ol",
        "p",
        "pre",
        "section",
        "table",
        "ul",
    ]
 )
 class InputStreamWithMemory:
    """Wraps an HTMLInputStream to remember characters since last <
@ -257,17 +303,20 @@ class BleachHTMLTokenizer(HTMLTokenizer):
    """Tokenizer that doesn't consume character entities"""
    def __init__(self, consume_entities=False, **kwargs):
-        super(BleachHTMLTokenizer, self).__init__(**kwargs)
+        super().__init__(**kwargs)
        self.consume_entities = consume_entities
        # Wrap the stream with one that remembers the history
        self.stream = InputStreamWithMemory(self.stream)
        # Remember the last token emitted; needed for block element spacing
        self.emitted_last_token = None
    def __iter__(self):
        last_error_token = None
-        for token in super(BleachHTMLTokenizer, self).__iter__():
+        for token in super().__iter__():
            if last_error_token is not None:
                if (
                    last_error_token["data"] == "invalid-character-in-attribute-name"
@ -309,12 +358,12 @@ class BleachHTMLTokenizer(HTMLTokenizer):
                    # If this is not an allowed tag, then we convert it to
                    # characters and it'll get escaped in the sanitizer.
                    token["data"] = self.stream.get_tag()
-                    token["type"] = CHARACTERS_TYPE
+                    token["type"] = TAG_TOKEN_TYPE_CHARACTERS
                    last_error_token = None
                    yield token
-                elif token["type"] == PARSEERROR_TYPE:
+                elif token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
                    # If the token is a parse error, then let the last_error_token
                    # go, and make token the new last_error_token
                    yield last_error_token
@ -329,7 +378,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
            # If the token is a ParseError, we hold on to it so we can get the
            # next token and potentially fix it.
-            if token["type"] == PARSEERROR_TYPE:
+            if token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
                last_error_token = token
                continue
@ -342,9 +391,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
        # If this tokenizer is set to consume entities, then we can let the
        # superclass do its thing.
        if self.consume_entities:
-            return super(BleachHTMLTokenizer, self).consumeEntity(
+            return super().consumeEntity(allowedChar, fromAttribute)
                allowedChar, fromAttribute
            )
        # If this tokenizer is set to not consume entities, then we don't want
        # to consume and convert them, so this overrides the html5lib tokenizer's
@ -356,7 +403,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
            self.currentToken["data"][-1][1] += "&"
        else:
-            self.tokenQueue.append({"type": CHARACTERS_TYPE, "data": "&"})
+            self.tokenQueue.append({"type": TAG_TOKEN_TYPE_CHARACTERS, "data": "&"})
    def tagOpenState(self):
        # This state marks a < that is either a StartTag, EndTag, EmptyTag,
@ -364,7 +411,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
        # we've collected so far and we do that by calling start_tag() on
        # the input stream wrapper.
        self.stream.start_tag()
-        return super(BleachHTMLTokenizer, self).tagOpenState()
+        return super().tagOpenState()
    def emitCurrentToken(self):
        token = self.currentToken
@ -378,8 +425,18 @@ class BleachHTMLTokenizer(HTMLTokenizer):
            # allowed list, then it gets stripped or escaped. In both of these
            # cases it gets converted to a Characters token.
            if self.parser.strip:
-                # If we're stripping the token, we just throw in an empty
+                if (
-                # string token.
+                    self.emitted_last_token
                    and token["type"] == TAG_TOKEN_TYPE_START
                    and token["name"].lower() in HTML_TAGS_BLOCK_LEVEL
                ):
                    # If this is a block level tag we're stripping, we drop it
                    # for a newline because that's what a browser would parse
                    # it as
                    new_data = "\n"
                else:
                    # For all other things being stripped, we throw in an empty
                    # string token
                    new_data = ""
            else:
@ -390,14 +447,15 @@ class BleachHTMLTokenizer(HTMLTokenizer):
                # string and use that.
                new_data = self.stream.get_tag()
-            new_token = {"type": CHARACTERS_TYPE, "data": new_data}
+            new_token = {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": new_data}
-            self.currentToken = new_token
+            self.currentToken = self.emitted_last_token = new_token
            self.tokenQueue.append(new_token)
            self.state = self.dataState
            return
-        super(BleachHTMLTokenizer, self).emitCurrentToken()
+        self.emitted_last_token = self.currentToken
        super().emitCurrentToken()
 class BleachHTMLParser(HTMLParser):
@ -416,7 +474,7 @@ class BleachHTMLParser(HTMLParser):
        self.tags = [tag.lower() for tag in tags] if tags is not None else None
        self.strip = strip
        self.consume_entities = consume_entities
-        super(BleachHTMLParser, self).__init__(**kwargs)
+        super().__init__(**kwargs)
    def _parse(
        self, stream, innerHTML=False, container="div", scripting=True, **kwargs
@ -514,13 +572,13 @@ def convert_entities(text):
 def match_entity(stream):
    """Returns first entity in stream or None if no entity exists
-    Note: For Bleach purposes, entities must start with a "&" and end with
+    Note: For Bleach purposes, entities must start with a "&" and end with a
-    a ";". This ignoresambiguous character entities that have no ";" at the
+    ";". This ignores ambiguous character entities that have no ";" at the end.
    end.
    :arg stream: the character stream
-    :returns: ``None`` or the entity string without "&" or ";"
+    :returns: the entity string without "&" or ";" if it's a valid character
        entity; ``None`` otherwise
    """
    # Nix the & at the beginning
@ -559,9 +617,11 @@ def match_entity(stream):
    # Handle character entities
    while stream and stream[0] not in end_characters:
        c = stream.pop(0)
        if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
            break
        possible_entity += c
        if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
            # If it's not a prefix, then it's not an entity and we're
            # out
            return None
    if possible_entity and stream and stream[0] == ";":
        return possible_entity
@ -642,15 +702,14 @@ class BleachHTMLSerializer(HTMLSerializer):
        in_tag = False
        after_equals = False
-        for stoken in super(BleachHTMLSerializer, self).serialize(treewalker, encoding):
+        for stoken in super().serialize(treewalker, encoding):
            if in_tag:
                if stoken == ">":
                    in_tag = False
                elif after_equals:
                    if stoken != '"':
-                        for part in self.escape_base_amp(stoken):
+                        yield from self.escape_base_amp(stoken)
                            yield part
                        after_equals = False
                        continue
--- a/lib/bleach/linkifier.py
+++ b/lib/bleach/linkifier.py
@ -2,7 +2,6 @@ import re
 from bleach import callbacks as linkify_callbacks
 from bleach import html5lib_shim
 from bleach.utils import alphabetize_attributes
 #: List of default callbacks
@ -155,7 +154,7 @@ class Linker:
            omit_optional_tags=False,
            # linkify does not sanitize
            sanitize=False,
-            # linkify alphabetizes
+            # linkify preserves attr order
            alphabetical_attributes=False,
        )
@ -228,7 +227,7 @@ class LinkifyFilter(html5lib_shim.Filter):
        :arg re email_re: email matching regex
        """
-        super(LinkifyFilter, self).__init__(source)
+        super().__init__(source)
        self.callbacks = callbacks or []
        self.skip_tags = skip_tags or []
@ -316,7 +315,6 @@ class LinkifyFilter(html5lib_shim.Filter):
                    else:
                        # Add an "a" tag for the new link
                        _text = attrs.pop("_text", "")
                        attrs = alphabetize_attributes(attrs)
                        new_tokens.extend(
                            [
                                {"type": "StartTag", "name": "a", "data": attrs},
@ -332,8 +330,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    if end < len(text):
                        new_tokens.append({"type": "Characters", "data": text[end:]})
-                    for new_token in new_tokens:
+                    yield from new_tokens
                        yield new_token
                    continue
@ -439,8 +436,6 @@ class LinkifyFilter(html5lib_shim.Filter):
                            new_tokens.append({"type": "Characters", "data": prefix})
                        _text = attrs.pop("_text", "")
                        attrs = alphabetize_attributes(attrs)
                        new_tokens.extend(
                            [
                                {"type": "StartTag", "name": "a", "data": attrs},
@ -460,8 +455,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    if end < len(text):
                        new_tokens.append({"type": "Characters", "data": text[end:]})
-                    for new_token in new_tokens:
+                    yield from new_tokens
                        yield new_token
                    continue
@ -493,14 +487,13 @@ class LinkifyFilter(html5lib_shim.Filter):
        else:
            new_text = attrs.pop("_text", "")
-            a_token["data"] = alphabetize_attributes(attrs)
+            a_token["data"] = attrs
            if text == new_text:
                # The callbacks didn't change the text, so we yield the new "a"
                # token, then whatever else was there, then the end "a" token
                yield a_token
-                for mem in token_buffer[1:]:
+                yield from token_buffer[1:]
                    yield mem
            else:
                # If the callbacks changed the text, then we're going to drop
@ -516,7 +509,7 @@ class LinkifyFilter(html5lib_shim.Filter):
        token_buffer = []
-        for token in super(LinkifyFilter, self).__iter__():
+        for token in super().__iter__():
            if in_a:
                # Handle the case where we're in an "a" tag--we want to buffer tokens
                # until we hit an end "a" tag.
@ -524,8 +517,7 @@ class LinkifyFilter(html5lib_shim.Filter):
                    # Add the end tag to the token buffer and then handle them
                    # and yield anything returned
                    token_buffer.append(token)
-                    for new_token in self.handle_a_tag(token_buffer):
+                    yield from self.handle_a_tag(token_buffer)
                        yield new_token
                    # Clear "a" related state and continue since we've yielded all
                    # the tokens we're going to yield
--- a/lib/bleach/sanitizer.py
+++ b/lib/bleach/sanitizer.py
@ -6,7 +6,6 @@ from bleach._vendor.parse import urlparse
 from xml.sax.saxutils import unescape
 from bleach import html5lib_shim
 from bleach.utils import alphabetize_attributes
 #: List of allowed tags
@ -33,9 +32,6 @@ ALLOWED_ATTRIBUTES = {
    "acronym": ["title"],
 }
 #: List of allowed styles
 ALLOWED_STYLES = []
 #: List of allowed protocols
 ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
@ -85,11 +81,11 @@ class Cleaner:
        self,
        tags=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRIBUTES,
        styles=ALLOWED_STYLES,
        protocols=ALLOWED_PROTOCOLS,
        strip=False,
        strip_comments=True,
        filters=None,
        css_sanitizer=None,
    ):
        """Initializes a Cleaner
@ -99,9 +95,6 @@ class Cleaner:
        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``
        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
@ -118,14 +111,17 @@ class Cleaner:
               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.
        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None
        """
        self.tags = tags
        self.attributes = attributes
        self.styles = styles
        self.protocols = protocols
        self.strip = strip
        self.strip_comments = strip_comments
        self.filters = filters or []
        self.css_sanitizer = css_sanitizer
        self.parser = html5lib_shim.BleachHTMLParser(
            tags=self.tags,
@ -143,7 +139,7 @@ class Cleaner:
            resolve_entities=False,
            # Bleach has its own sanitizer, so don't use the html5lib one
            sanitize=False,
-            # Bleach sanitizer alphabetizes already, so don't use the html5lib one
+            # clean preserves attr order
            alphabetical_attributes=False,
        )
@ -175,11 +171,10 @@ class Cleaner:
            attributes=self.attributes,
            strip_disallowed_elements=self.strip,
            strip_html_comments=self.strip_comments,
            css_sanitizer=self.css_sanitizer,
            # html5lib-sanitizer things
            allowed_elements=self.tags,
            allowed_css_properties=self.styles,
            allowed_protocols=self.protocols,
            allowed_svg_properties=[],
        )
        # Apply any filters after the BleachSanitizerFilter
@ -242,25 +237,25 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
    def __init__(
        self,
        source,
        allowed_elements=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRIBUTES,
        allowed_protocols=ALLOWED_PROTOCOLS,
        strip_disallowed_elements=False,
        strip_html_comments=True,
        css_sanitizer=None,
        **kwargs,
    ):
        """Creates a BleachSanitizerFilter instance
        :arg Treewalker source: stream
-        :arg list tags: allowed list of tags; defaults to
+        :arg list allowed_elements: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``
        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
-        :arg list styles: allowed list of css styles; defaults to
+        :arg list allowed_protocols: allowed list of protocols for links; defaults
            ``bleach.sanitizer.ALLOWED_STYLES``
        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
        :arg bool strip_disallowed_elements: whether or not to strip disallowed
@ -268,10 +263,14 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
        :arg bool strip_html_comments: whether or not to strip HTML comments
        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None
        """
        self.attr_filter = attribute_filter_factory(attributes)
        self.strip_disallowed_elements = strip_disallowed_elements
        self.strip_html_comments = strip_html_comments
        self.css_sanitizer = css_sanitizer
        # filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
        warnings.filterwarnings(
@ -280,7 +279,12 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
            category=DeprecationWarning,
            module="bleach._vendor.html5lib",
        )
-        return super(BleachSanitizerFilter, self).__init__(source, **kwargs)
+        return super().__init__(
            source,
            allowed_elements=allowed_elements,
            allowed_protocols=allowed_protocols,
            **kwargs,
        )
    def sanitize_stream(self, token_iterator):
        for token in token_iterator:
@ -290,8 +294,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                continue
            if isinstance(ret, list):
-                for subtoken in ret:
+                yield from ret
                    yield subtoken
            else:
                yield ret
@ -358,10 +361,6 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                return None
            else:
                if "data" in token:
                    # Alphabetize the attributes before calling .disallowed_token()
                    # so that the resulting string is stable
                    token["data"] = alphabetize_attributes(token["data"])
                return self.disallowed_token(token)
        elif token_type == "Comment":
@ -547,12 +546,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                # If it's a style attribute, sanitize it
                if namespaced_name == (None, "style"):
-                    val = self.sanitize_css(val)
+                    if self.css_sanitizer:
                        val = self.css_sanitizer.sanitize_css(val)
                    else:
                        # FIXME(willkg): if style is allowed, but no
                        # css_sanitizer was set up, then this is probably a
                        # mistake and we should raise an error here
                        #
                        # For now, we're going to set the value to "" because
                        # there was no sanitizer set
                        val = ""
                # At this point, we want to keep the attribute, so add it in
                attrs[namespaced_name] = val
-            token["data"] = alphabetize_attributes(attrs)
+            token["data"] = attrs
        return token
@ -575,7 +583,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                if ns is None or ns not in html5lib_shim.prefixes:
                    namespaced_name = name
                else:
-                    namespaced_name = "%s:%s" % (html5lib_shim.prefixes[ns], name)
+                    namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
                attrs.append(
                    ' %s="%s"'
@ -587,7 +595,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
                        v,
                    )
                )
-            token["data"] = "<%s%s>" % (token["name"], "".join(attrs))
+            token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
        else:
            token["data"] = "<%s>" % token["name"]
@ -599,47 +607,3 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
        del token["name"]
        return token
    def sanitize_css(self, style):
        """Sanitizes css in style tags"""
        # Convert entities in the style so that it can be parsed as CSS
        style = html5lib_shim.convert_entities(style)
        # Drop any url values before we do anything else
        style = re.compile(r"url\s*\(\s*[^\s)]+?\s*\)\s*").sub(" ", style)
        # The gauntlet of sanitization
        # Validate the css in the style tag and if it's not valid, then drop
        # the whole thing.
        parts = style.split(";")
        gauntlet = re.compile(
            r"""^(  # consider a style attribute value as composed of:
 [/:,#%!.\s\w]    # a non-newline character
 |\w-\w           # 3 characters in the form \w-\w
 |'[\s\w]+'\s*    # a single quoted string of [\s\w]+ with trailing space
 |"[\s\w]+"       # a double quoted string of [\s\w]+
 |\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
 )*$""",  # ... percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'
            flags=re.U | re.VERBOSE,
        )
        for part in parts:
            if not gauntlet.match(part):
                return ""
        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
            return ""
        clean = []
        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
            if not value:
                continue
            if prop.lower() in self.allowed_css_properties:
                clean.append(prop + ": " + value + ";")
            elif prop.lower() in self.allowed_svg_properties:
                clean.append(prop + ": " + value + ";")
        return " ".join(clean)
--- a/lib/bleach/utils.py
+++ b/lib/bleach/utils.py
@ -1,21 +0,0 @@
 from collections import OrderedDict
 def _attr_key(attr):
    """Returns appropriate key for sorting attribute names
    Attribute names are a tuple of ``(namespace, name)`` where namespace can be
    ``None`` or a string. These can't be compared in Python 3, so we conver the
    ``None`` to an empty string.
    """
    key = (attr[0][0] or ""), attr[0][1]
    return key
 def alphabetize_attributes(attrs):
    """Takes a dict of attributes (or None) and returns them alphabetized"""
    if not attrs:
        return attrs
    return OrderedDict([(k, v) for k, v in sorted(attrs.items(), key=_attr_key)])
--- a/requirements.txt
+++ b/requirements.txt
@ -5,7 +5,7 @@ backports.csv==1.0.7
 backports.functools-lru-cache==1.6.4
 backports.zoneinfo==0.2.1
 beautifulsoup4==4.10.0
-bleach==4.1.0
+bleach==5.0.0
 certifi==2021.10.8
 cheroot==8.6.0
 cherrypy==18.6.1
		`@ -0,0 +1 @@`
							`46af966e33b6247ae1d57d9459115a3eb46cda9f809c9f14e052abc2fe8dacb2 parse.py`