mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-06 05:01:14 -07:00
Bump bleach from 5.0.1 to 6.0.0 (#1979)
* Bump bleach from 5.0.1 to 6.0.0 Bumps [bleach](https://github.com/mozilla/bleach) from 5.0.1 to 6.0.0. - [Release notes](https://github.com/mozilla/bleach/releases) - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v5.0.1...v6.0.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * Update bleach==6.0.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
6b1b6d0f32
commit
1466a391d1
5 changed files with 291 additions and 198 deletions
|
@ -11,9 +11,9 @@ from bleach.sanitizer import (
|
||||||
|
|
||||||
|
|
||||||
# yyyymmdd
|
# yyyymmdd
|
||||||
__releasedate__ = "20220627"
|
__releasedate__ = "20230123"
|
||||||
# x.y.z or x.y.z.dev0 -- semver
|
# x.y.z or x.y.z.dev0 -- semver
|
||||||
__version__ = "5.0.1"
|
__version__ = "6.0.0"
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["clean", "linkify"]
|
__all__ = ["clean", "linkify"]
|
||||||
|
@ -52,7 +52,7 @@ def clean(
|
||||||
|
|
||||||
:arg str text: the text to clean
|
:arg str text: the text to clean
|
||||||
|
|
||||||
:arg list tags: allowed list of tags; defaults to
|
:arg set tags: set of allowed tags; defaults to
|
||||||
``bleach.sanitizer.ALLOWED_TAGS``
|
``bleach.sanitizer.ALLOWED_TAGS``
|
||||||
|
|
||||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||||
|
|
|
@ -38,6 +38,9 @@ from bleach._vendor.html5lib.filters.sanitizer import (
|
||||||
allowed_protocols,
|
allowed_protocols,
|
||||||
allowed_css_properties,
|
allowed_css_properties,
|
||||||
allowed_svg_properties,
|
allowed_svg_properties,
|
||||||
|
attr_val_is_uri,
|
||||||
|
svg_attr_val_allows_ref,
|
||||||
|
svg_allow_local_href,
|
||||||
) # noqa: E402 module level import not at top of file
|
) # noqa: E402 module level import not at top of file
|
||||||
from bleach._vendor.html5lib.filters.sanitizer import (
|
from bleach._vendor.html5lib.filters.sanitizer import (
|
||||||
Filter as SanitizerFilter,
|
Filter as SanitizerFilter,
|
||||||
|
@ -78,127 +81,129 @@ TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]
|
||||||
|
|
||||||
#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
|
#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
|
||||||
#: https://html.spec.whatwg.org/multipage/indices.html#elements-3
|
#: https://html.spec.whatwg.org/multipage/indices.html#elements-3
|
||||||
HTML_TAGS = [
|
HTML_TAGS = frozenset(
|
||||||
"a",
|
(
|
||||||
"abbr",
|
"a",
|
||||||
"address",
|
"abbr",
|
||||||
"area",
|
"address",
|
||||||
"article",
|
"area",
|
||||||
"aside",
|
"article",
|
||||||
"audio",
|
"aside",
|
||||||
"b",
|
"audio",
|
||||||
"base",
|
"b",
|
||||||
"bdi",
|
"base",
|
||||||
"bdo",
|
"bdi",
|
||||||
"blockquote",
|
"bdo",
|
||||||
"body",
|
"blockquote",
|
||||||
"br",
|
"body",
|
||||||
"button",
|
"br",
|
||||||
"canvas",
|
"button",
|
||||||
"caption",
|
"canvas",
|
||||||
"cite",
|
"caption",
|
||||||
"code",
|
"cite",
|
||||||
"col",
|
"code",
|
||||||
"colgroup",
|
"col",
|
||||||
"data",
|
"colgroup",
|
||||||
"datalist",
|
"data",
|
||||||
"dd",
|
"datalist",
|
||||||
"del",
|
"dd",
|
||||||
"details",
|
"del",
|
||||||
"dfn",
|
"details",
|
||||||
"dialog",
|
"dfn",
|
||||||
"div",
|
"dialog",
|
||||||
"dl",
|
"div",
|
||||||
"dt",
|
"dl",
|
||||||
"em",
|
"dt",
|
||||||
"embed",
|
"em",
|
||||||
"fieldset",
|
"embed",
|
||||||
"figcaption",
|
"fieldset",
|
||||||
"figure",
|
"figcaption",
|
||||||
"footer",
|
"figure",
|
||||||
"form",
|
"footer",
|
||||||
"h1",
|
"form",
|
||||||
"h2",
|
"h1",
|
||||||
"h3",
|
"h2",
|
||||||
"h4",
|
"h3",
|
||||||
"h5",
|
"h4",
|
||||||
"h6",
|
"h5",
|
||||||
"head",
|
"h6",
|
||||||
"header",
|
"head",
|
||||||
"hgroup",
|
"header",
|
||||||
"hr",
|
"hgroup",
|
||||||
"html",
|
"hr",
|
||||||
"i",
|
"html",
|
||||||
"iframe",
|
"i",
|
||||||
"img",
|
"iframe",
|
||||||
"input",
|
"img",
|
||||||
"ins",
|
"input",
|
||||||
"kbd",
|
"ins",
|
||||||
"keygen",
|
"kbd",
|
||||||
"label",
|
"keygen",
|
||||||
"legend",
|
"label",
|
||||||
"li",
|
"legend",
|
||||||
"link",
|
"li",
|
||||||
"map",
|
"link",
|
||||||
"mark",
|
"map",
|
||||||
"menu",
|
"mark",
|
||||||
"meta",
|
"menu",
|
||||||
"meter",
|
"meta",
|
||||||
"nav",
|
"meter",
|
||||||
"noscript",
|
"nav",
|
||||||
"object",
|
"noscript",
|
||||||
"ol",
|
"object",
|
||||||
"optgroup",
|
"ol",
|
||||||
"option",
|
"optgroup",
|
||||||
"output",
|
"option",
|
||||||
"p",
|
"output",
|
||||||
"param",
|
"p",
|
||||||
"picture",
|
"param",
|
||||||
"pre",
|
"picture",
|
||||||
"progress",
|
"pre",
|
||||||
"q",
|
"progress",
|
||||||
"rp",
|
"q",
|
||||||
"rt",
|
"rp",
|
||||||
"ruby",
|
"rt",
|
||||||
"s",
|
"ruby",
|
||||||
"samp",
|
"s",
|
||||||
"script",
|
"samp",
|
||||||
"section",
|
"script",
|
||||||
"select",
|
"section",
|
||||||
"slot",
|
"select",
|
||||||
"small",
|
"slot",
|
||||||
"source",
|
"small",
|
||||||
"span",
|
"source",
|
||||||
"strong",
|
"span",
|
||||||
"style",
|
"strong",
|
||||||
"sub",
|
"style",
|
||||||
"summary",
|
"sub",
|
||||||
"sup",
|
"summary",
|
||||||
"table",
|
"sup",
|
||||||
"tbody",
|
"table",
|
||||||
"td",
|
"tbody",
|
||||||
"template",
|
"td",
|
||||||
"textarea",
|
"template",
|
||||||
"tfoot",
|
"textarea",
|
||||||
"th",
|
"tfoot",
|
||||||
"thead",
|
"th",
|
||||||
"time",
|
"thead",
|
||||||
"title",
|
"time",
|
||||||
"tr",
|
"title",
|
||||||
"track",
|
"tr",
|
||||||
"u",
|
"track",
|
||||||
"ul",
|
"u",
|
||||||
"var",
|
"ul",
|
||||||
"video",
|
"var",
|
||||||
"wbr",
|
"video",
|
||||||
]
|
"wbr",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
|
#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
|
||||||
#: from mozilla on 2019.07.11
|
#: from mozilla on 2019.07.11
|
||||||
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
|
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
|
||||||
HTML_TAGS_BLOCK_LEVEL = frozenset(
|
HTML_TAGS_BLOCK_LEVEL = frozenset(
|
||||||
[
|
(
|
||||||
"address",
|
"address",
|
||||||
"article",
|
"article",
|
||||||
"aside",
|
"aside",
|
||||||
|
@ -232,7 +237,7 @@ HTML_TAGS_BLOCK_LEVEL = frozenset(
|
||||||
"section",
|
"section",
|
||||||
"table",
|
"table",
|
||||||
"ul",
|
"ul",
|
||||||
]
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -473,7 +478,7 @@ class BleachHTMLParser(HTMLParser):
|
||||||
|
|
||||||
def __init__(self, tags, strip, consume_entities, **kwargs):
|
def __init__(self, tags, strip, consume_entities, **kwargs):
|
||||||
"""
|
"""
|
||||||
:arg tags: list of allowed tags--everything else is either stripped or
|
:arg tags: set of allowed tags--everything else is either stripped or
|
||||||
escaped; if None, then this doesn't look at tags at all
|
escaped; if None, then this doesn't look at tags at all
|
||||||
:arg strip: whether to strip disallowed tags (True) or escape them (False);
|
:arg strip: whether to strip disallowed tags (True) or escape them (False);
|
||||||
if tags=None, then this doesn't have any effect
|
if tags=None, then this doesn't have any effect
|
||||||
|
@ -481,7 +486,9 @@ class BleachHTMLParser(HTMLParser):
|
||||||
leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
|
leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.tags = [tag.lower() for tag in tags] if tags is not None else None
|
self.tags = (
|
||||||
|
frozenset((tag.lower() for tag in tags)) if tags is not None else None
|
||||||
|
)
|
||||||
self.strip = strip
|
self.strip = strip
|
||||||
self.consume_entities = consume_entities
|
self.consume_entities = consume_entities
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
@ -691,7 +698,7 @@ class BleachHTMLSerializer(HTMLSerializer):
|
||||||
# Only leave entities in that are not ambiguous. If they're
|
# Only leave entities in that are not ambiguous. If they're
|
||||||
# ambiguous, then we escape the ampersand.
|
# ambiguous, then we escape the ampersand.
|
||||||
if entity is not None and convert_entity(entity) is not None:
|
if entity is not None and convert_entity(entity) is not None:
|
||||||
yield "&" + entity + ";"
|
yield f"&{entity};"
|
||||||
|
|
||||||
# Length of the entity plus 2--one for & at the beginning
|
# Length of the entity plus 2--one for & at the beginning
|
||||||
# and one for ; at the end
|
# and one for ; at the end
|
||||||
|
|
|
@ -120,9 +120,10 @@ class Linker:
|
||||||
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
||||||
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
||||||
|
|
||||||
:arg list skip_tags: list of tags that you don't want to linkify the
|
:arg set skip_tags: set of tags that you don't want to linkify the
|
||||||
contents of; for example, you could set this to ``['pre']`` to skip
|
contents of; for example, you could set this to ``{'pre'}`` to skip
|
||||||
linkifying contents of ``pre`` tags
|
linkifying contents of ``pre`` tags; ``None`` means you don't
|
||||||
|
want linkify to skip any tags
|
||||||
|
|
||||||
:arg bool parse_email: whether or not to linkify email addresses
|
:arg bool parse_email: whether or not to linkify email addresses
|
||||||
|
|
||||||
|
@ -130,7 +131,7 @@ class Linker:
|
||||||
|
|
||||||
:arg email_re: email matching regex
|
:arg email_re: email matching regex
|
||||||
|
|
||||||
:arg list recognized_tags: the list of tags that linkify knows about;
|
:arg set recognized_tags: the set of tags that linkify knows about;
|
||||||
everything else gets escaped
|
everything else gets escaped
|
||||||
|
|
||||||
:returns: linkified text as unicode
|
:returns: linkified text as unicode
|
||||||
|
@ -145,15 +146,18 @@ class Linker:
|
||||||
# Create a parser/tokenizer that allows all HTML tags and escapes
|
# Create a parser/tokenizer that allows all HTML tags and escapes
|
||||||
# anything not in that list.
|
# anything not in that list.
|
||||||
self.parser = html5lib_shim.BleachHTMLParser(
|
self.parser = html5lib_shim.BleachHTMLParser(
|
||||||
tags=recognized_tags,
|
tags=frozenset(recognized_tags),
|
||||||
strip=False,
|
strip=False,
|
||||||
consume_entities=True,
|
consume_entities=False,
|
||||||
namespaceHTMLElements=False,
|
namespaceHTMLElements=False,
|
||||||
)
|
)
|
||||||
self.walker = html5lib_shim.getTreeWalker("etree")
|
self.walker = html5lib_shim.getTreeWalker("etree")
|
||||||
self.serializer = html5lib_shim.BleachHTMLSerializer(
|
self.serializer = html5lib_shim.BleachHTMLSerializer(
|
||||||
quote_attr_values="always",
|
quote_attr_values="always",
|
||||||
omit_optional_tags=False,
|
omit_optional_tags=False,
|
||||||
|
# We want to leave entities as they are without escaping or
|
||||||
|
# resolving or expanding
|
||||||
|
resolve_entities=False,
|
||||||
# linkify does not sanitize
|
# linkify does not sanitize
|
||||||
sanitize=False,
|
sanitize=False,
|
||||||
# linkify preserves attr order
|
# linkify preserves attr order
|
||||||
|
@ -218,8 +222,8 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||||
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
||||||
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
||||||
|
|
||||||
:arg list skip_tags: list of tags that you don't want to linkify the
|
:arg set skip_tags: set of tags that you don't want to linkify the
|
||||||
contents of; for example, you could set this to ``['pre']`` to skip
|
contents of; for example, you could set this to ``{'pre'}`` to skip
|
||||||
linkifying contents of ``pre`` tags
|
linkifying contents of ``pre`` tags
|
||||||
|
|
||||||
:arg bool parse_email: whether or not to linkify email addresses
|
:arg bool parse_email: whether or not to linkify email addresses
|
||||||
|
@ -232,7 +236,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||||
super().__init__(source)
|
super().__init__(source)
|
||||||
|
|
||||||
self.callbacks = callbacks or []
|
self.callbacks = callbacks or []
|
||||||
self.skip_tags = skip_tags or []
|
self.skip_tags = skip_tags or {}
|
||||||
self.parse_email = parse_email
|
self.parse_email = parse_email
|
||||||
|
|
||||||
self.url_re = url_re
|
self.url_re = url_re
|
||||||
|
@ -510,6 +514,62 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||||
yield {"type": "Characters", "data": str(new_text)}
|
yield {"type": "Characters", "data": str(new_text)}
|
||||||
yield token_buffer[-1]
|
yield token_buffer[-1]
|
||||||
|
|
||||||
|
def extract_entities(self, token):
|
||||||
|
"""Handles Characters tokens with entities
|
||||||
|
|
||||||
|
Our overridden tokenizer doesn't do anything with entities. However,
|
||||||
|
that means that the serializer will convert all ``&`` in Characters
|
||||||
|
tokens to ``&``.
|
||||||
|
|
||||||
|
Since we don't want that, we extract entities here and convert them to
|
||||||
|
Entity tokens so the serializer will let them be.
|
||||||
|
|
||||||
|
:arg token: the Characters token to work on
|
||||||
|
|
||||||
|
:returns: generator of tokens
|
||||||
|
|
||||||
|
"""
|
||||||
|
data = token.get("data", "")
|
||||||
|
|
||||||
|
# If there isn't a & in the data, we can return now
|
||||||
|
if "&" not in data:
|
||||||
|
yield token
|
||||||
|
return
|
||||||
|
|
||||||
|
new_tokens = []
|
||||||
|
|
||||||
|
# For each possible entity that starts with a "&", we try to extract an
|
||||||
|
# actual entity and re-tokenize accordingly
|
||||||
|
for part in html5lib_shim.next_possible_entity(data):
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if part.startswith("&"):
|
||||||
|
entity = html5lib_shim.match_entity(part)
|
||||||
|
if entity is not None:
|
||||||
|
if entity == "amp":
|
||||||
|
# LinkifyFilter can't match urls across token boundaries
|
||||||
|
# which is problematic with & since that shows up in
|
||||||
|
# querystrings all the time. This special-cases &
|
||||||
|
# and converts it to a & and sticks it in as a
|
||||||
|
# Characters token. It'll get merged with surrounding
|
||||||
|
# tokens in the BleachSanitizerfilter.__iter__ and
|
||||||
|
# escaped in the serializer.
|
||||||
|
new_tokens.append({"type": "Characters", "data": "&"})
|
||||||
|
else:
|
||||||
|
new_tokens.append({"type": "Entity", "name": entity})
|
||||||
|
|
||||||
|
# Length of the entity plus 2--one for & at the beginning
|
||||||
|
# and one for ; at the end
|
||||||
|
remainder = part[len(entity) + 2 :]
|
||||||
|
if remainder:
|
||||||
|
new_tokens.append({"type": "Characters", "data": remainder})
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_tokens.append({"type": "Characters", "data": part})
|
||||||
|
|
||||||
|
yield from new_tokens
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
in_a = False
|
in_a = False
|
||||||
in_skip_tag = None
|
in_skip_tag = None
|
||||||
|
@ -564,8 +624,8 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||||
|
|
||||||
new_stream = self.handle_links(new_stream)
|
new_stream = self.handle_links(new_stream)
|
||||||
|
|
||||||
for token in new_stream:
|
for new_token in new_stream:
|
||||||
yield token
|
yield from self.extract_entities(new_token)
|
||||||
|
|
||||||
# We've already yielded this token, so continue
|
# We've already yielded this token, so continue
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -8,21 +8,23 @@ from bleach import html5lib_shim
|
||||||
from bleach import parse_shim
|
from bleach import parse_shim
|
||||||
|
|
||||||
|
|
||||||
#: List of allowed tags
|
#: Set of allowed tags
|
||||||
ALLOWED_TAGS = [
|
ALLOWED_TAGS = frozenset(
|
||||||
"a",
|
(
|
||||||
"abbr",
|
"a",
|
||||||
"acronym",
|
"abbr",
|
||||||
"b",
|
"acronym",
|
||||||
"blockquote",
|
"b",
|
||||||
"code",
|
"blockquote",
|
||||||
"em",
|
"code",
|
||||||
"i",
|
"em",
|
||||||
"li",
|
"i",
|
||||||
"ol",
|
"li",
|
||||||
"strong",
|
"ol",
|
||||||
"ul",
|
"strong",
|
||||||
]
|
"ul",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
#: Map of allowed attributes by tag
|
#: Map of allowed attributes by tag
|
||||||
|
@ -33,7 +35,7 @@ ALLOWED_ATTRIBUTES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
#: List of allowed protocols
|
#: List of allowed protocols
|
||||||
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
|
ALLOWED_PROTOCOLS = frozenset(("http", "https", "mailto"))
|
||||||
|
|
||||||
#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
|
#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
|
||||||
INVISIBLE_CHARACTERS = "".join(
|
INVISIBLE_CHARACTERS = "".join(
|
||||||
|
@ -48,6 +50,10 @@ INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICOD
|
||||||
INVISIBLE_REPLACEMENT_CHAR = "?"
|
INVISIBLE_REPLACEMENT_CHAR = "?"
|
||||||
|
|
||||||
|
|
||||||
|
class NoCssSanitizerWarning(UserWarning):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Cleaner:
|
class Cleaner:
|
||||||
"""Cleaner for cleaning HTML fragments of malicious content
|
"""Cleaner for cleaning HTML fragments of malicious content
|
||||||
|
|
||||||
|
@ -89,7 +95,7 @@ class Cleaner:
|
||||||
):
|
):
|
||||||
"""Initializes a Cleaner
|
"""Initializes a Cleaner
|
||||||
|
|
||||||
:arg list tags: allowed list of tags; defaults to
|
:arg set tags: set of allowed tags; defaults to
|
||||||
``bleach.sanitizer.ALLOWED_TAGS``
|
``bleach.sanitizer.ALLOWED_TAGS``
|
||||||
|
|
||||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||||
|
@ -143,6 +149,25 @@ class Cleaner:
|
||||||
alphabetical_attributes=False,
|
alphabetical_attributes=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if css_sanitizer is None:
|
||||||
|
# FIXME(willkg): this doesn't handle when attributes or an
|
||||||
|
# attributes value is a callable
|
||||||
|
attributes_values = []
|
||||||
|
if isinstance(attributes, list):
|
||||||
|
attributes_values = attributes
|
||||||
|
|
||||||
|
elif isinstance(attributes, dict):
|
||||||
|
attributes_values = []
|
||||||
|
for values in attributes.values():
|
||||||
|
if isinstance(values, (list, tuple)):
|
||||||
|
attributes_values.extend(values)
|
||||||
|
|
||||||
|
if "style" in attributes_values:
|
||||||
|
warnings.warn(
|
||||||
|
"'style' attribute specified, but css_sanitizer not set.",
|
||||||
|
category=NoCssSanitizerWarning,
|
||||||
|
)
|
||||||
|
|
||||||
def clean(self, text):
|
def clean(self, text):
|
||||||
"""Cleans text and returns sanitized result as unicode
|
"""Cleans text and returns sanitized result as unicode
|
||||||
|
|
||||||
|
@ -155,9 +180,8 @@ class Cleaner:
|
||||||
"""
|
"""
|
||||||
if not isinstance(text, str):
|
if not isinstance(text, str):
|
||||||
message = (
|
message = (
|
||||||
"argument cannot be of '{name}' type, must be of text type".format(
|
f"argument cannot be of {text.__class__.__name__!r} type, "
|
||||||
name=text.__class__.__name__
|
+ "must be of text type"
|
||||||
)
|
|
||||||
)
|
)
|
||||||
raise TypeError(message)
|
raise TypeError(message)
|
||||||
|
|
||||||
|
@ -167,13 +191,11 @@ class Cleaner:
|
||||||
dom = self.parser.parseFragment(text)
|
dom = self.parser.parseFragment(text)
|
||||||
filtered = BleachSanitizerFilter(
|
filtered = BleachSanitizerFilter(
|
||||||
source=self.walker(dom),
|
source=self.walker(dom),
|
||||||
# Bleach-sanitizer-specific things
|
allowed_tags=self.tags,
|
||||||
attributes=self.attributes,
|
attributes=self.attributes,
|
||||||
strip_disallowed_elements=self.strip,
|
strip_disallowed_tags=self.strip,
|
||||||
strip_html_comments=self.strip_comments,
|
strip_html_comments=self.strip_comments,
|
||||||
css_sanitizer=self.css_sanitizer,
|
css_sanitizer=self.css_sanitizer,
|
||||||
# html5lib-sanitizer things
|
|
||||||
allowed_elements=self.tags,
|
|
||||||
allowed_protocols=self.protocols,
|
allowed_protocols=self.protocols,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -237,19 +259,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
source,
|
source,
|
||||||
allowed_elements=ALLOWED_TAGS,
|
allowed_tags=ALLOWED_TAGS,
|
||||||
attributes=ALLOWED_ATTRIBUTES,
|
attributes=ALLOWED_ATTRIBUTES,
|
||||||
allowed_protocols=ALLOWED_PROTOCOLS,
|
allowed_protocols=ALLOWED_PROTOCOLS,
|
||||||
strip_disallowed_elements=False,
|
attr_val_is_uri=html5lib_shim.attr_val_is_uri,
|
||||||
|
svg_attr_val_allows_ref=html5lib_shim.svg_attr_val_allows_ref,
|
||||||
|
svg_allow_local_href=html5lib_shim.svg_allow_local_href,
|
||||||
|
strip_disallowed_tags=False,
|
||||||
strip_html_comments=True,
|
strip_html_comments=True,
|
||||||
css_sanitizer=None,
|
css_sanitizer=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
"""Creates a BleachSanitizerFilter instance
|
"""Creates a BleachSanitizerFilter instance
|
||||||
|
|
||||||
:arg source: html5lib TreeWalker stream as an html5lib TreeWalker
|
:arg source: html5lib TreeWalker stream as an html5lib TreeWalker
|
||||||
|
|
||||||
:arg list allowed_elements: allowed list of tags; defaults to
|
:arg set allowed_tags: set of allowed tags; defaults to
|
||||||
``bleach.sanitizer.ALLOWED_TAGS``
|
``bleach.sanitizer.ALLOWED_TAGS``
|
||||||
|
|
||||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||||
|
@ -258,8 +282,16 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||||
:arg list allowed_protocols: allowed list of protocols for links; defaults
|
:arg list allowed_protocols: allowed list of protocols for links; defaults
|
||||||
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
||||||
|
|
||||||
:arg bool strip_disallowed_elements: whether or not to strip disallowed
|
:arg attr_val_is_uri: set of attributes that have URI values
|
||||||
elements
|
|
||||||
|
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
||||||
|
references
|
||||||
|
|
||||||
|
:arg svg_allow_local_href: set of SVG elements that can have local
|
||||||
|
hrefs
|
||||||
|
|
||||||
|
:arg bool strip_disallowed_tags: whether or not to strip disallowed
|
||||||
|
tags
|
||||||
|
|
||||||
:arg bool strip_html_comments: whether or not to strip HTML comments
|
:arg bool strip_html_comments: whether or not to strip HTML comments
|
||||||
|
|
||||||
|
@ -267,24 +299,24 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||||
sanitizing style attribute values and style text; defaults to None
|
sanitizing style attribute values and style text; defaults to None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.attr_filter = attribute_filter_factory(attributes)
|
# NOTE(willkg): This is the superclass of
|
||||||
self.strip_disallowed_elements = strip_disallowed_elements
|
# html5lib.filters.sanitizer.Filter. We call this directly skipping the
|
||||||
self.strip_html_comments = strip_html_comments
|
# __init__ for html5lib.filters.sanitizer.Filter because that does
|
||||||
self.css_sanitizer = css_sanitizer
|
# things we don't need to do and kicks up the deprecation warning for
|
||||||
|
# using Sanitizer.
|
||||||
|
html5lib_shim.Filter.__init__(self, source)
|
||||||
|
|
||||||
# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
|
self.allowed_tags = frozenset(allowed_tags)
|
||||||
warnings.filterwarnings(
|
self.allowed_protocols = frozenset(allowed_protocols)
|
||||||
"ignore",
|
|
||||||
message="html5lib's sanitizer is deprecated",
|
self.attr_filter = attribute_filter_factory(attributes)
|
||||||
category=DeprecationWarning,
|
self.strip_disallowed_tags = strip_disallowed_tags
|
||||||
module="bleach._vendor.html5lib",
|
self.strip_html_comments = strip_html_comments
|
||||||
)
|
|
||||||
return super().__init__(
|
self.attr_val_is_uri = attr_val_is_uri
|
||||||
source,
|
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
||||||
allowed_elements=allowed_elements,
|
self.css_sanitizer = css_sanitizer
|
||||||
allowed_protocols=allowed_protocols,
|
self.svg_allow_local_href = svg_allow_local_href
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
def sanitize_stream(self, token_iterator):
|
def sanitize_stream(self, token_iterator):
|
||||||
for token in token_iterator:
|
for token in token_iterator:
|
||||||
|
@ -354,10 +386,10 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||||
"""
|
"""
|
||||||
token_type = token["type"]
|
token_type = token["type"]
|
||||||
if token_type in ["StartTag", "EndTag", "EmptyTag"]:
|
if token_type in ["StartTag", "EndTag", "EmptyTag"]:
|
||||||
if token["name"] in self.allowed_elements:
|
if token["name"] in self.allowed_tags:
|
||||||
return self.allow_token(token)
|
return self.allow_token(token)
|
||||||
|
|
||||||
elif self.strip_disallowed_elements:
|
elif self.strip_disallowed_tags:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -570,7 +602,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||||
def disallowed_token(self, token):
|
def disallowed_token(self, token):
|
||||||
token_type = token["type"]
|
token_type = token["type"]
|
||||||
if token_type == "EndTag":
|
if token_type == "EndTag":
|
||||||
token["data"] = "</%s>" % token["name"]
|
token["data"] = f"</{token['name']}>"
|
||||||
|
|
||||||
elif token["data"]:
|
elif token["data"]:
|
||||||
assert token_type in ("StartTag", "EmptyTag")
|
assert token_type in ("StartTag", "EmptyTag")
|
||||||
|
@ -586,25 +618,19 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||||
if ns is None or ns not in html5lib_shim.prefixes:
|
if ns is None or ns not in html5lib_shim.prefixes:
|
||||||
namespaced_name = name
|
namespaced_name = name
|
||||||
else:
|
else:
|
||||||
namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
|
namespaced_name = f"{html5lib_shim.prefixes[ns]}:{name}"
|
||||||
|
|
||||||
attrs.append(
|
# NOTE(willkg): HTMLSerializer escapes attribute values
|
||||||
' %s="%s"'
|
# already, so if we do it here (like HTMLSerializer does),
|
||||||
% (
|
# then we end up double-escaping.
|
||||||
namespaced_name,
|
attrs.append(f' {namespaced_name}="{v}"')
|
||||||
# NOTE(willkg): HTMLSerializer escapes attribute values
|
token["data"] = f"<{token['name']}{''.join(attrs)}>"
|
||||||
# already, so if we do it here (like HTMLSerializer does),
|
|
||||||
# then we end up double-escaping.
|
|
||||||
v,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
token["data"] = "<%s>" % token["name"]
|
token["data"] = f"<{token['name']}>"
|
||||||
|
|
||||||
if token.get("selfClosing"):
|
if token.get("selfClosing"):
|
||||||
token["data"] = token["data"][:-1] + "/>"
|
token["data"] = f"{token['data'][:-1]}/>"
|
||||||
|
|
||||||
token["type"] = "Characters"
|
token["type"] = "Characters"
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ backports.csv==1.0.7
|
||||||
backports.functools-lru-cache==1.6.4
|
backports.functools-lru-cache==1.6.4
|
||||||
backports.zoneinfo==0.2.1;python_version<"3.9"
|
backports.zoneinfo==0.2.1;python_version<"3.9"
|
||||||
beautifulsoup4==4.11.1
|
beautifulsoup4==4.11.1
|
||||||
bleach==5.0.1
|
bleach==6.0.0
|
||||||
certifi==2022.12.7
|
certifi==2022.12.7
|
||||||
cheroot==9.0.0
|
cheroot==9.0.0
|
||||||
cherrypy==18.8.0
|
cherrypy==18.8.0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue