Bump bleach from 4.1.0 to 5.0.0 (#1708)

* Bump bleach from 4.1.0 to 5.0.0

Bumps [bleach](https://github.com/mozilla/bleach) from 4.1.0 to 5.0.0.
- [Release notes](https://github.com/mozilla/bleach/releases)
- [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES)
- [Commits](https://github.com/mozilla/bleach/compare/v4.1.0...v5.0.0)

---
updated-dependencies:
- dependency-name: bleach
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update bleach==5.0.0

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2022-05-16 20:41:47 -07:00 committed by GitHub
commit a1fe0b04d7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 264 additions and 151 deletions

View file

@ -6,7 +6,6 @@ from bleach._vendor.parse import urlparse
from xml.sax.saxutils import unescape
from bleach import html5lib_shim
from bleach.utils import alphabetize_attributes
#: List of allowed tags
@ -33,9 +32,6 @@ ALLOWED_ATTRIBUTES = {
"acronym": ["title"],
}
#: List of allowed styles
ALLOWED_STYLES = []
#: List of allowed protocols
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
@ -85,11 +81,11 @@ class Cleaner:
self,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
styles=ALLOWED_STYLES,
protocols=ALLOWED_PROTOCOLS,
strip=False,
strip_comments=True,
filters=None,
css_sanitizer=None,
):
"""Initializes a Cleaner
@ -99,9 +95,6 @@ class Cleaner:
:arg dict attributes: allowed attributes; can be a callable, list or dict;
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
:arg list styles: allowed list of css styles; defaults to
``bleach.sanitizer.ALLOWED_STYLES``
:arg list protocols: allowed list of protocols for links; defaults
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
@ -118,14 +111,17 @@ class Cleaner:
Using filters changes the output of ``bleach.Cleaner.clean``.
Make sure the way the filters change the output are secure.
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
sanitizing style attribute values and style text; defaults to None
"""
self.tags = tags
self.attributes = attributes
self.styles = styles
self.protocols = protocols
self.strip = strip
self.strip_comments = strip_comments
self.filters = filters or []
self.css_sanitizer = css_sanitizer
self.parser = html5lib_shim.BleachHTMLParser(
tags=self.tags,
@ -143,7 +139,7 @@ class Cleaner:
resolve_entities=False,
# Bleach has its own sanitizer, so don't use the html5lib one
sanitize=False,
# Bleach sanitizer alphabetizes already, so don't use the html5lib one
# clean preserves attr order
alphabetical_attributes=False,
)
@ -175,11 +171,10 @@ class Cleaner:
attributes=self.attributes,
strip_disallowed_elements=self.strip,
strip_html_comments=self.strip_comments,
css_sanitizer=self.css_sanitizer,
# html5lib-sanitizer things
allowed_elements=self.tags,
allowed_css_properties=self.styles,
allowed_protocols=self.protocols,
allowed_svg_properties=[],
)
# Apply any filters after the BleachSanitizerFilter
@ -242,25 +237,25 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
def __init__(
self,
source,
allowed_elements=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
allowed_protocols=ALLOWED_PROTOCOLS,
strip_disallowed_elements=False,
strip_html_comments=True,
css_sanitizer=None,
**kwargs,
):
"""Creates a BleachSanitizerFilter instance
:arg Treewalker source: stream
:arg list tags: allowed list of tags; defaults to
:arg list allowed_elements: allowed list of tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
:arg list styles: allowed list of css styles; defaults to
``bleach.sanitizer.ALLOWED_STYLES``
:arg list protocols: allowed list of protocols for links; defaults
:arg list allowed_protocols: allowed list of protocols for links; defaults
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
:arg bool strip_disallowed_elements: whether or not to strip disallowed
@ -268,10 +263,14 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
:arg bool strip_html_comments: whether or not to strip HTML comments
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
sanitizing style attribute values and style text; defaults to None
"""
self.attr_filter = attribute_filter_factory(attributes)
self.strip_disallowed_elements = strip_disallowed_elements
self.strip_html_comments = strip_html_comments
self.css_sanitizer = css_sanitizer
# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
warnings.filterwarnings(
@ -280,7 +279,12 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
category=DeprecationWarning,
module="bleach._vendor.html5lib",
)
return super(BleachSanitizerFilter, self).__init__(source, **kwargs)
return super().__init__(
source,
allowed_elements=allowed_elements,
allowed_protocols=allowed_protocols,
**kwargs,
)
def sanitize_stream(self, token_iterator):
for token in token_iterator:
@ -290,8 +294,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
continue
if isinstance(ret, list):
for subtoken in ret:
yield subtoken
yield from ret
else:
yield ret
@ -358,10 +361,6 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
return None
else:
if "data" in token:
# Alphabetize the attributes before calling .disallowed_token()
# so that the resulting string is stable
token["data"] = alphabetize_attributes(token["data"])
return self.disallowed_token(token)
elif token_type == "Comment":
@ -547,12 +546,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
# If it's a style attribute, sanitize it
if namespaced_name == (None, "style"):
val = self.sanitize_css(val)
if self.css_sanitizer:
val = self.css_sanitizer.sanitize_css(val)
else:
# FIXME(willkg): if style is allowed, but no
# css_sanitizer was set up, then this is probably a
# mistake and we should raise an error here
#
# For now, we're going to set the value to "" because
# there was no sanitizer set
val = ""
# At this point, we want to keep the attribute, so add it in
attrs[namespaced_name] = val
token["data"] = alphabetize_attributes(attrs)
token["data"] = attrs
return token
@ -575,7 +583,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
if ns is None or ns not in html5lib_shim.prefixes:
namespaced_name = name
else:
namespaced_name = "%s:%s" % (html5lib_shim.prefixes[ns], name)
namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
attrs.append(
' %s="%s"'
@ -587,7 +595,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
v,
)
)
token["data"] = "<%s%s>" % (token["name"], "".join(attrs))
token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
else:
token["data"] = "<%s>" % token["name"]
@ -599,47 +607,3 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
del token["name"]
return token
def sanitize_css(self, style):
"""Sanitizes css in style tags"""
# Convert entities in the style so that it can be parsed as CSS
style = html5lib_shim.convert_entities(style)
# Drop any url values before we do anything else
style = re.compile(r"url\s*\(\s*[^\s)]+?\s*\)\s*").sub(" ", style)
# The gauntlet of sanitization
# Validate the css in the style tag and if it's not valid, then drop
# the whole thing.
parts = style.split(";")
gauntlet = re.compile(
r"""^( # consider a style attribute value as composed of:
[/:,#%!.\s\w] # a non-newline character
|\w-\w # 3 characters in the form \w-\w
|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space
|"[\s\w]+" # a double quoted string of [\s\w]+
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
)*$""", # ... percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'
flags=re.U | re.VERBOSE,
)
for part in parts:
if not gauntlet.match(part):
return ""
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
return ""
clean = []
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
if not value:
continue
if prop.lower() in self.allowed_css_properties:
clean.append(prop + ": " + value + ";")
elif prop.lower() in self.allowed_svg_properties:
clean.append(prop + ": " + value + ";")
return " ".join(clean)