mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-07 13:41:15 -07:00
Bump bleach from 4.1.0 to 5.0.0 (#1708)
* Bump bleach from 4.1.0 to 5.0.0 Bumps [bleach](https://github.com/mozilla/bleach) from 4.1.0 to 5.0.0. - [Release notes](https://github.com/mozilla/bleach/releases) - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v4.1.0...v5.0.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * Update bleach==5.0.0 Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
d510e0f600
commit
a1fe0b04d7
10 changed files with 264 additions and 151 deletions
|
@ -1,7 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import packaging.version
|
||||
|
||||
from bleach.linkifier import (
|
||||
DEFAULT_CALLBACKS,
|
||||
Linker,
|
||||
|
@ -9,17 +5,15 @@ from bleach.linkifier import (
|
|||
from bleach.sanitizer import (
|
||||
ALLOWED_ATTRIBUTES,
|
||||
ALLOWED_PROTOCOLS,
|
||||
ALLOWED_STYLES,
|
||||
ALLOWED_TAGS,
|
||||
Cleaner,
|
||||
)
|
||||
|
||||
|
||||
# yyyymmdd
|
||||
__releasedate__ = "20210825"
|
||||
__releasedate__ = "20220407"
|
||||
# x.y.z or x.y.z.dev0 -- semver
|
||||
__version__ = "4.1.0"
|
||||
VERSION = packaging.version.Version(__version__)
|
||||
__version__ = "5.0.0"
|
||||
|
||||
|
||||
__all__ = ["clean", "linkify"]
|
||||
|
@ -29,10 +23,10 @@ def clean(
|
|||
text,
|
||||
tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
styles=ALLOWED_STYLES,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
strip=False,
|
||||
strip_comments=True,
|
||||
css_sanitizer=None,
|
||||
):
|
||||
"""Clean an HTML fragment of malicious content and return it
|
||||
|
||||
|
@ -64,9 +58,6 @@ def clean(
|
|||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
||||
|
||||
:arg list styles: allowed list of css styles; defaults to
|
||||
``bleach.sanitizer.ALLOWED_STYLES``
|
||||
|
||||
:arg list protocols: allowed list of protocols for links; defaults
|
||||
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
||||
|
||||
|
@ -74,16 +65,19 @@ def clean(
|
|||
|
||||
:arg bool strip_comments: whether or not to strip HTML comments
|
||||
|
||||
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
||||
sanitizing style attribute values and style text; defaults to None
|
||||
|
||||
:returns: cleaned text as unicode
|
||||
|
||||
"""
|
||||
cleaner = Cleaner(
|
||||
tags=tags,
|
||||
attributes=attributes,
|
||||
styles=styles,
|
||||
protocols=protocols,
|
||||
strip=strip,
|
||||
strip_comments=strip_comments,
|
||||
css_sanitizer=css_sanitizer,
|
||||
)
|
||||
return cleaner.clean(text)
|
||||
|
||||
|
|
20
lib/bleach/_vendor/html5lib-1.1.dist-info/LICENSE
Normal file
20
lib/bleach/_vendor/html5lib-1.1.dist-info/LICENSE
Normal file
|
@ -0,0 +1,20 @@
|
|||
Copyright (c) 2006-2013 James Graham and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
0
lib/bleach/_vendor/html5lib-1.1.dist-info/REQUESTED
Normal file
0
lib/bleach/_vendor/html5lib-1.1.dist-info/REQUESTED
Normal file
1
lib/bleach/_vendor/parse.py.SHA256SUM
Normal file
1
lib/bleach/_vendor/parse.py.SHA256SUM
Normal file
|
@ -0,0 +1 @@
|
|||
46af966e33b6247ae1d57d9459115a3eb46cda9f809c9f14e052abc2fe8dacb2 parse.py
|
104
lib/bleach/css_sanitizer.py
Normal file
104
lib/bleach/css_sanitizer.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
import tinycss2
|
||||
|
||||
|
||||
ALLOWED_CSS_PROPERTIES = frozenset(
|
||||
(
|
||||
"azimuth",
|
||||
"background-color",
|
||||
"border-bottom-color",
|
||||
"border-collapse",
|
||||
"border-color",
|
||||
"border-left-color",
|
||||
"border-right-color",
|
||||
"border-top-color",
|
||||
"clear",
|
||||
"color",
|
||||
"cursor",
|
||||
"direction",
|
||||
"display",
|
||||
"elevation",
|
||||
"float",
|
||||
"font",
|
||||
"font-family",
|
||||
"font-size",
|
||||
"font-style",
|
||||
"font-variant",
|
||||
"font-weight",
|
||||
"height",
|
||||
"letter-spacing",
|
||||
"line-height",
|
||||
"overflow",
|
||||
"pause",
|
||||
"pause-after",
|
||||
"pause-before",
|
||||
"pitch",
|
||||
"pitch-range",
|
||||
"richness",
|
||||
"speak",
|
||||
"speak-header",
|
||||
"speak-numeral",
|
||||
"speak-punctuation",
|
||||
"speech-rate",
|
||||
"stress",
|
||||
"text-align",
|
||||
"text-decoration",
|
||||
"text-indent",
|
||||
"unicode-bidi",
|
||||
"vertical-align",
|
||||
"voice-family",
|
||||
"volume",
|
||||
"white-space",
|
||||
"width",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
ALLOWED_SVG_PROPERTIES = frozenset(
|
||||
(
|
||||
"fill",
|
||||
"fill-opacity",
|
||||
"fill-rule",
|
||||
"stroke",
|
||||
"stroke-width",
|
||||
"stroke-linecap",
|
||||
"stroke-linejoin",
|
||||
"stroke-opacity",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class CSSSanitizer:
|
||||
def __init__(
|
||||
self,
|
||||
allowed_css_properties=ALLOWED_CSS_PROPERTIES,
|
||||
allowed_svg_properties=ALLOWED_SVG_PROPERTIES,
|
||||
):
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
self.allowed_svg_properties = allowed_svg_properties
|
||||
|
||||
def sanitize_css(self, style):
|
||||
"""Sanitizes css in style tags"""
|
||||
parsed = tinycss2.parse_declaration_list(style)
|
||||
|
||||
if not parsed:
|
||||
return ""
|
||||
|
||||
new_tokens = []
|
||||
for token in parsed:
|
||||
if token.type == "declaration":
|
||||
if (
|
||||
token.lower_name in self.allowed_css_properties
|
||||
or token.lower_name in self.allowed_svg_properties
|
||||
):
|
||||
new_tokens.append(token)
|
||||
elif token.type in ("comment", "whitespace"):
|
||||
if new_tokens and new_tokens[-1].type != token.type:
|
||||
new_tokens.append(token)
|
||||
|
||||
# NOTE(willkg): We currently don't handle AtRule or ParseError and
|
||||
# so both get silently thrown out
|
||||
|
||||
if not new_tokens:
|
||||
return ""
|
||||
|
||||
return tinycss2.serialize(new_tokens).strip()
|
|
@ -36,6 +36,8 @@ from bleach._vendor.html5lib.filters.base import (
|
|||
) # noqa: E402 module level import not at top of file
|
||||
from bleach._vendor.html5lib.filters.sanitizer import (
|
||||
allowed_protocols,
|
||||
allowed_css_properties,
|
||||
allowed_svg_properties,
|
||||
) # noqa: E402 module level import not at top of file
|
||||
from bleach._vendor.html5lib.filters.sanitizer import (
|
||||
Filter as SanitizerFilter,
|
||||
|
@ -68,8 +70,10 @@ TAG_TOKEN_TYPES = {
|
|||
constants.tokenTypes["EndTag"],
|
||||
constants.tokenTypes["EmptyTag"],
|
||||
}
|
||||
CHARACTERS_TYPE = constants.tokenTypes["Characters"]
|
||||
PARSEERROR_TYPE = constants.tokenTypes["ParseError"]
|
||||
TAG_TOKEN_TYPE_START = constants.tokenTypes["StartTag"]
|
||||
TAG_TOKEN_TYPE_END = constants.tokenTypes["EndTag"]
|
||||
TAG_TOKEN_TYPE_CHARACTERS = constants.tokenTypes["Characters"]
|
||||
TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]
|
||||
|
||||
|
||||
#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
|
||||
|
@ -190,6 +194,48 @@ HTML_TAGS = [
|
|||
]
|
||||
|
||||
|
||||
#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
|
||||
#: from mozilla on 2019.07.11
|
||||
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
|
||||
HTML_TAGS_BLOCK_LEVEL = frozenset(
|
||||
[
|
||||
"address",
|
||||
"article",
|
||||
"aside",
|
||||
"blockquote",
|
||||
"details",
|
||||
"dialog",
|
||||
"dd",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"header",
|
||||
"hgroup",
|
||||
"hr",
|
||||
"li",
|
||||
"main",
|
||||
"nav",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"section",
|
||||
"table",
|
||||
"ul",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class InputStreamWithMemory:
|
||||
"""Wraps an HTMLInputStream to remember characters since last <
|
||||
|
||||
|
@ -257,17 +303,20 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
"""Tokenizer that doesn't consume character entities"""
|
||||
|
||||
def __init__(self, consume_entities=False, **kwargs):
|
||||
super(BleachHTMLTokenizer, self).__init__(**kwargs)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.consume_entities = consume_entities
|
||||
|
||||
# Wrap the stream with one that remembers the history
|
||||
self.stream = InputStreamWithMemory(self.stream)
|
||||
|
||||
# Remember the last token emitted; needed for block element spacing
|
||||
self.emitted_last_token = None
|
||||
|
||||
def __iter__(self):
|
||||
last_error_token = None
|
||||
|
||||
for token in super(BleachHTMLTokenizer, self).__iter__():
|
||||
for token in super().__iter__():
|
||||
if last_error_token is not None:
|
||||
if (
|
||||
last_error_token["data"] == "invalid-character-in-attribute-name"
|
||||
|
@ -309,12 +358,12 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
# If this is not an allowed tag, then we convert it to
|
||||
# characters and it'll get escaped in the sanitizer.
|
||||
token["data"] = self.stream.get_tag()
|
||||
token["type"] = CHARACTERS_TYPE
|
||||
token["type"] = TAG_TOKEN_TYPE_CHARACTERS
|
||||
|
||||
last_error_token = None
|
||||
yield token
|
||||
|
||||
elif token["type"] == PARSEERROR_TYPE:
|
||||
elif token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
|
||||
# If the token is a parse error, then let the last_error_token
|
||||
# go, and make token the new last_error_token
|
||||
yield last_error_token
|
||||
|
@ -329,7 +378,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
|
||||
# If the token is a ParseError, we hold on to it so we can get the
|
||||
# next token and potentially fix it.
|
||||
if token["type"] == PARSEERROR_TYPE:
|
||||
if token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
|
||||
last_error_token = token
|
||||
continue
|
||||
|
||||
|
@ -342,9 +391,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
# If this tokenizer is set to consume entities, then we can let the
|
||||
# superclass do its thing.
|
||||
if self.consume_entities:
|
||||
return super(BleachHTMLTokenizer, self).consumeEntity(
|
||||
allowedChar, fromAttribute
|
||||
)
|
||||
return super().consumeEntity(allowedChar, fromAttribute)
|
||||
|
||||
# If this tokenizer is set to not consume entities, then we don't want
|
||||
# to consume and convert them, so this overrides the html5lib tokenizer's
|
||||
|
@ -356,7 +403,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
self.currentToken["data"][-1][1] += "&"
|
||||
|
||||
else:
|
||||
self.tokenQueue.append({"type": CHARACTERS_TYPE, "data": "&"})
|
||||
self.tokenQueue.append({"type": TAG_TOKEN_TYPE_CHARACTERS, "data": "&"})
|
||||
|
||||
def tagOpenState(self):
|
||||
# This state marks a < that is either a StartTag, EndTag, EmptyTag,
|
||||
|
@ -364,7 +411,7 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
# we've collected so far and we do that by calling start_tag() on
|
||||
# the input stream wrapper.
|
||||
self.stream.start_tag()
|
||||
return super(BleachHTMLTokenizer, self).tagOpenState()
|
||||
return super().tagOpenState()
|
||||
|
||||
def emitCurrentToken(self):
|
||||
token = self.currentToken
|
||||
|
@ -378,9 +425,19 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
# allowed list, then it gets stripped or escaped. In both of these
|
||||
# cases it gets converted to a Characters token.
|
||||
if self.parser.strip:
|
||||
# If we're stripping the token, we just throw in an empty
|
||||
# string token.
|
||||
new_data = ""
|
||||
if (
|
||||
self.emitted_last_token
|
||||
and token["type"] == TAG_TOKEN_TYPE_START
|
||||
and token["name"].lower() in HTML_TAGS_BLOCK_LEVEL
|
||||
):
|
||||
# If this is a block level tag we're stripping, we drop it
|
||||
# for a newline because that's what a browser would parse
|
||||
# it as
|
||||
new_data = "\n"
|
||||
else:
|
||||
# For all other things being stripped, we throw in an empty
|
||||
# string token
|
||||
new_data = ""
|
||||
|
||||
else:
|
||||
# If we're escaping the token, we want to escape the exact
|
||||
|
@ -390,14 +447,15 @@ class BleachHTMLTokenizer(HTMLTokenizer):
|
|||
# string and use that.
|
||||
new_data = self.stream.get_tag()
|
||||
|
||||
new_token = {"type": CHARACTERS_TYPE, "data": new_data}
|
||||
new_token = {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": new_data}
|
||||
|
||||
self.currentToken = new_token
|
||||
self.currentToken = self.emitted_last_token = new_token
|
||||
self.tokenQueue.append(new_token)
|
||||
self.state = self.dataState
|
||||
return
|
||||
|
||||
super(BleachHTMLTokenizer, self).emitCurrentToken()
|
||||
self.emitted_last_token = self.currentToken
|
||||
super().emitCurrentToken()
|
||||
|
||||
|
||||
class BleachHTMLParser(HTMLParser):
|
||||
|
@ -416,7 +474,7 @@ class BleachHTMLParser(HTMLParser):
|
|||
self.tags = [tag.lower() for tag in tags] if tags is not None else None
|
||||
self.strip = strip
|
||||
self.consume_entities = consume_entities
|
||||
super(BleachHTMLParser, self).__init__(**kwargs)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def _parse(
|
||||
self, stream, innerHTML=False, container="div", scripting=True, **kwargs
|
||||
|
@ -514,13 +572,13 @@ def convert_entities(text):
|
|||
def match_entity(stream):
|
||||
"""Returns first entity in stream or None if no entity exists
|
||||
|
||||
Note: For Bleach purposes, entities must start with a "&" and end with
|
||||
a ";". This ignoresambiguous character entities that have no ";" at the
|
||||
end.
|
||||
Note: For Bleach purposes, entities must start with a "&" and end with a
|
||||
";". This ignores ambiguous character entities that have no ";" at the end.
|
||||
|
||||
:arg stream: the character stream
|
||||
|
||||
:returns: ``None`` or the entity string without "&" or ";"
|
||||
:returns: the entity string without "&" or ";" if it's a valid character
|
||||
entity; ``None`` otherwise
|
||||
|
||||
"""
|
||||
# Nix the & at the beginning
|
||||
|
@ -559,9 +617,11 @@ def match_entity(stream):
|
|||
# Handle character entities
|
||||
while stream and stream[0] not in end_characters:
|
||||
c = stream.pop(0)
|
||||
if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
|
||||
break
|
||||
possible_entity += c
|
||||
if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
|
||||
# If it's not a prefix, then it's not an entity and we're
|
||||
# out
|
||||
return None
|
||||
|
||||
if possible_entity and stream and stream[0] == ";":
|
||||
return possible_entity
|
||||
|
@ -642,15 +702,14 @@ class BleachHTMLSerializer(HTMLSerializer):
|
|||
in_tag = False
|
||||
after_equals = False
|
||||
|
||||
for stoken in super(BleachHTMLSerializer, self).serialize(treewalker, encoding):
|
||||
for stoken in super().serialize(treewalker, encoding):
|
||||
if in_tag:
|
||||
if stoken == ">":
|
||||
in_tag = False
|
||||
|
||||
elif after_equals:
|
||||
if stoken != '"':
|
||||
for part in self.escape_base_amp(stoken):
|
||||
yield part
|
||||
yield from self.escape_base_amp(stoken)
|
||||
|
||||
after_equals = False
|
||||
continue
|
||||
|
|
|
@ -2,7 +2,6 @@ import re
|
|||
|
||||
from bleach import callbacks as linkify_callbacks
|
||||
from bleach import html5lib_shim
|
||||
from bleach.utils import alphabetize_attributes
|
||||
|
||||
|
||||
#: List of default callbacks
|
||||
|
@ -155,7 +154,7 @@ class Linker:
|
|||
omit_optional_tags=False,
|
||||
# linkify does not sanitize
|
||||
sanitize=False,
|
||||
# linkify alphabetizes
|
||||
# linkify preserves attr order
|
||||
alphabetical_attributes=False,
|
||||
)
|
||||
|
||||
|
@ -228,7 +227,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
:arg re email_re: email matching regex
|
||||
|
||||
"""
|
||||
super(LinkifyFilter, self).__init__(source)
|
||||
super().__init__(source)
|
||||
|
||||
self.callbacks = callbacks or []
|
||||
self.skip_tags = skip_tags or []
|
||||
|
@ -316,7 +315,6 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
else:
|
||||
# Add an "a" tag for the new link
|
||||
_text = attrs.pop("_text", "")
|
||||
attrs = alphabetize_attributes(attrs)
|
||||
new_tokens.extend(
|
||||
[
|
||||
{"type": "StartTag", "name": "a", "data": attrs},
|
||||
|
@ -332,8 +330,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
if end < len(text):
|
||||
new_tokens.append({"type": "Characters", "data": text[end:]})
|
||||
|
||||
for new_token in new_tokens:
|
||||
yield new_token
|
||||
yield from new_tokens
|
||||
|
||||
continue
|
||||
|
||||
|
@ -439,8 +436,6 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
new_tokens.append({"type": "Characters", "data": prefix})
|
||||
|
||||
_text = attrs.pop("_text", "")
|
||||
attrs = alphabetize_attributes(attrs)
|
||||
|
||||
new_tokens.extend(
|
||||
[
|
||||
{"type": "StartTag", "name": "a", "data": attrs},
|
||||
|
@ -460,8 +455,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
if end < len(text):
|
||||
new_tokens.append({"type": "Characters", "data": text[end:]})
|
||||
|
||||
for new_token in new_tokens:
|
||||
yield new_token
|
||||
yield from new_tokens
|
||||
|
||||
continue
|
||||
|
||||
|
@ -493,14 +487,13 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
|
||||
else:
|
||||
new_text = attrs.pop("_text", "")
|
||||
a_token["data"] = alphabetize_attributes(attrs)
|
||||
a_token["data"] = attrs
|
||||
|
||||
if text == new_text:
|
||||
# The callbacks didn't change the text, so we yield the new "a"
|
||||
# token, then whatever else was there, then the end "a" token
|
||||
yield a_token
|
||||
for mem in token_buffer[1:]:
|
||||
yield mem
|
||||
yield from token_buffer[1:]
|
||||
|
||||
else:
|
||||
# If the callbacks changed the text, then we're going to drop
|
||||
|
@ -516,7 +509,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
|
||||
token_buffer = []
|
||||
|
||||
for token in super(LinkifyFilter, self).__iter__():
|
||||
for token in super().__iter__():
|
||||
if in_a:
|
||||
# Handle the case where we're in an "a" tag--we want to buffer tokens
|
||||
# until we hit an end "a" tag.
|
||||
|
@ -524,8 +517,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
|||
# Add the end tag to the token buffer and then handle them
|
||||
# and yield anything returned
|
||||
token_buffer.append(token)
|
||||
for new_token in self.handle_a_tag(token_buffer):
|
||||
yield new_token
|
||||
yield from self.handle_a_tag(token_buffer)
|
||||
|
||||
# Clear "a" related state and continue since we've yielded all
|
||||
# the tokens we're going to yield
|
||||
|
|
|
@ -6,7 +6,6 @@ from bleach._vendor.parse import urlparse
|
|||
from xml.sax.saxutils import unescape
|
||||
|
||||
from bleach import html5lib_shim
|
||||
from bleach.utils import alphabetize_attributes
|
||||
|
||||
|
||||
#: List of allowed tags
|
||||
|
@ -33,9 +32,6 @@ ALLOWED_ATTRIBUTES = {
|
|||
"acronym": ["title"],
|
||||
}
|
||||
|
||||
#: List of allowed styles
|
||||
ALLOWED_STYLES = []
|
||||
|
||||
#: List of allowed protocols
|
||||
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
|
||||
|
||||
|
@ -85,11 +81,11 @@ class Cleaner:
|
|||
self,
|
||||
tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
styles=ALLOWED_STYLES,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
strip=False,
|
||||
strip_comments=True,
|
||||
filters=None,
|
||||
css_sanitizer=None,
|
||||
):
|
||||
"""Initializes a Cleaner
|
||||
|
||||
|
@ -99,9 +95,6 @@ class Cleaner:
|
|||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
||||
|
||||
:arg list styles: allowed list of css styles; defaults to
|
||||
``bleach.sanitizer.ALLOWED_STYLES``
|
||||
|
||||
:arg list protocols: allowed list of protocols for links; defaults
|
||||
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
||||
|
||||
|
@ -118,14 +111,17 @@ class Cleaner:
|
|||
Using filters changes the output of ``bleach.Cleaner.clean``.
|
||||
Make sure the way the filters change the output are secure.
|
||||
|
||||
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
||||
sanitizing style attribute values and style text; defaults to None
|
||||
|
||||
"""
|
||||
self.tags = tags
|
||||
self.attributes = attributes
|
||||
self.styles = styles
|
||||
self.protocols = protocols
|
||||
self.strip = strip
|
||||
self.strip_comments = strip_comments
|
||||
self.filters = filters or []
|
||||
self.css_sanitizer = css_sanitizer
|
||||
|
||||
self.parser = html5lib_shim.BleachHTMLParser(
|
||||
tags=self.tags,
|
||||
|
@ -143,7 +139,7 @@ class Cleaner:
|
|||
resolve_entities=False,
|
||||
# Bleach has its own sanitizer, so don't use the html5lib one
|
||||
sanitize=False,
|
||||
# Bleach sanitizer alphabetizes already, so don't use the html5lib one
|
||||
# clean preserves attr order
|
||||
alphabetical_attributes=False,
|
||||
)
|
||||
|
||||
|
@ -175,11 +171,10 @@ class Cleaner:
|
|||
attributes=self.attributes,
|
||||
strip_disallowed_elements=self.strip,
|
||||
strip_html_comments=self.strip_comments,
|
||||
css_sanitizer=self.css_sanitizer,
|
||||
# html5lib-sanitizer things
|
||||
allowed_elements=self.tags,
|
||||
allowed_css_properties=self.styles,
|
||||
allowed_protocols=self.protocols,
|
||||
allowed_svg_properties=[],
|
||||
)
|
||||
|
||||
# Apply any filters after the BleachSanitizerFilter
|
||||
|
@ -242,25 +237,25 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
def __init__(
|
||||
self,
|
||||
source,
|
||||
allowed_elements=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
allowed_protocols=ALLOWED_PROTOCOLS,
|
||||
strip_disallowed_elements=False,
|
||||
strip_html_comments=True,
|
||||
css_sanitizer=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Creates a BleachSanitizerFilter instance
|
||||
|
||||
:arg Treewalker source: stream
|
||||
|
||||
:arg list tags: allowed list of tags; defaults to
|
||||
:arg list allowed_elements: allowed list of tags; defaults to
|
||||
``bleach.sanitizer.ALLOWED_TAGS``
|
||||
|
||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
||||
|
||||
:arg list styles: allowed list of css styles; defaults to
|
||||
``bleach.sanitizer.ALLOWED_STYLES``
|
||||
|
||||
:arg list protocols: allowed list of protocols for links; defaults
|
||||
:arg list allowed_protocols: allowed list of protocols for links; defaults
|
||||
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
||||
|
||||
:arg bool strip_disallowed_elements: whether or not to strip disallowed
|
||||
|
@ -268,10 +263,14 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
|
||||
:arg bool strip_html_comments: whether or not to strip HTML comments
|
||||
|
||||
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
||||
sanitizing style attribute values and style text; defaults to None
|
||||
|
||||
"""
|
||||
self.attr_filter = attribute_filter_factory(attributes)
|
||||
self.strip_disallowed_elements = strip_disallowed_elements
|
||||
self.strip_html_comments = strip_html_comments
|
||||
self.css_sanitizer = css_sanitizer
|
||||
|
||||
# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
|
||||
warnings.filterwarnings(
|
||||
|
@ -280,7 +279,12 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
category=DeprecationWarning,
|
||||
module="bleach._vendor.html5lib",
|
||||
)
|
||||
return super(BleachSanitizerFilter, self).__init__(source, **kwargs)
|
||||
return super().__init__(
|
||||
source,
|
||||
allowed_elements=allowed_elements,
|
||||
allowed_protocols=allowed_protocols,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def sanitize_stream(self, token_iterator):
|
||||
for token in token_iterator:
|
||||
|
@ -290,8 +294,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
continue
|
||||
|
||||
if isinstance(ret, list):
|
||||
for subtoken in ret:
|
||||
yield subtoken
|
||||
yield from ret
|
||||
else:
|
||||
yield ret
|
||||
|
||||
|
@ -358,10 +361,6 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
return None
|
||||
|
||||
else:
|
||||
if "data" in token:
|
||||
# Alphabetize the attributes before calling .disallowed_token()
|
||||
# so that the resulting string is stable
|
||||
token["data"] = alphabetize_attributes(token["data"])
|
||||
return self.disallowed_token(token)
|
||||
|
||||
elif token_type == "Comment":
|
||||
|
@ -547,12 +546,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
|
||||
# If it's a style attribute, sanitize it
|
||||
if namespaced_name == (None, "style"):
|
||||
val = self.sanitize_css(val)
|
||||
if self.css_sanitizer:
|
||||
val = self.css_sanitizer.sanitize_css(val)
|
||||
else:
|
||||
# FIXME(willkg): if style is allowed, but no
|
||||
# css_sanitizer was set up, then this is probably a
|
||||
# mistake and we should raise an error here
|
||||
#
|
||||
# For now, we're going to set the value to "" because
|
||||
# there was no sanitizer set
|
||||
val = ""
|
||||
|
||||
# At this point, we want to keep the attribute, so add it in
|
||||
attrs[namespaced_name] = val
|
||||
|
||||
token["data"] = alphabetize_attributes(attrs)
|
||||
token["data"] = attrs
|
||||
|
||||
return token
|
||||
|
||||
|
@ -575,7 +583,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
if ns is None or ns not in html5lib_shim.prefixes:
|
||||
namespaced_name = name
|
||||
else:
|
||||
namespaced_name = "%s:%s" % (html5lib_shim.prefixes[ns], name)
|
||||
namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
|
||||
|
||||
attrs.append(
|
||||
' %s="%s"'
|
||||
|
@ -587,7 +595,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
v,
|
||||
)
|
||||
)
|
||||
token["data"] = "<%s%s>" % (token["name"], "".join(attrs))
|
||||
token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
|
||||
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
|
@ -599,47 +607,3 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
"""Sanitizes css in style tags"""
|
||||
# Convert entities in the style so that it can be parsed as CSS
|
||||
style = html5lib_shim.convert_entities(style)
|
||||
|
||||
# Drop any url values before we do anything else
|
||||
style = re.compile(r"url\s*\(\s*[^\s)]+?\s*\)\s*").sub(" ", style)
|
||||
|
||||
# The gauntlet of sanitization
|
||||
|
||||
# Validate the css in the style tag and if it's not valid, then drop
|
||||
# the whole thing.
|
||||
parts = style.split(";")
|
||||
gauntlet = re.compile(
|
||||
r"""^( # consider a style attribute value as composed of:
|
||||
[/:,#%!.\s\w] # a non-newline character
|
||||
|\w-\w # 3 characters in the form \w-\w
|
||||
|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space
|
||||
|"[\s\w]+" # a double quoted string of [\s\w]+
|
||||
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
|
||||
)*$""", # ... percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'
|
||||
flags=re.U | re.VERBOSE,
|
||||
)
|
||||
|
||||
for part in parts:
|
||||
if not gauntlet.match(part):
|
||||
return ""
|
||||
|
||||
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ""
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ": " + value + ";")
|
||||
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ": " + value + ";")
|
||||
|
||||
return " ".join(clean)
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
from collections import OrderedDict
|
||||
|
||||
|
||||
def _attr_key(attr):
|
||||
"""Returns appropriate key for sorting attribute names
|
||||
|
||||
Attribute names are a tuple of ``(namespace, name)`` where namespace can be
|
||||
``None`` or a string. These can't be compared in Python 3, so we conver the
|
||||
``None`` to an empty string.
|
||||
|
||||
"""
|
||||
key = (attr[0][0] or ""), attr[0][1]
|
||||
return key
|
||||
|
||||
|
||||
def alphabetize_attributes(attrs):
|
||||
"""Takes a dict of attributes (or None) and returns them alphabetized"""
|
||||
if not attrs:
|
||||
return attrs
|
||||
|
||||
return OrderedDict([(k, v) for k, v in sorted(attrs.items(), key=_attr_key)])
|
|
@ -5,7 +5,7 @@ backports.csv==1.0.7
|
|||
backports.functools-lru-cache==1.6.4
|
||||
backports.zoneinfo==0.2.1
|
||||
beautifulsoup4==4.10.0
|
||||
bleach==4.1.0
|
||||
bleach==5.0.0
|
||||
certifi==2021.10.8
|
||||
cheroot==8.6.0
|
||||
cherrypy==18.6.1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue