mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-06 05:01:14 -07:00
Update html5lib-1.1
This commit is contained in:
parent
3a116486e7
commit
586fd15464
142 changed files with 90234 additions and 2393 deletions
|
@ -1,20 +1,29 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from ordereddict import OrderedDict
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
def _attr_key(attr):
|
||||
"""Return an appropriate key for an attribute for sorting
|
||||
|
||||
Attributes have a namespace that can be either ``None`` or a string. We
|
||||
can't compare the two because they're different types, so we convert
|
||||
``None`` to an empty string first.
|
||||
|
||||
"""
|
||||
return (attr[0][0] or ''), attr[0][1]
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Alphabetizes attributes for elements"""
|
||||
def __iter__(self):
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
if token["type"] in ("StartTag", "EmptyTag"):
|
||||
attrs = OrderedDict()
|
||||
for name, value in sorted(token["data"].items(),
|
||||
key=lambda x: x[0]):
|
||||
key=_attr_key):
|
||||
attrs[name] = value
|
||||
token["data"] = attrs
|
||||
yield token
|
||||
|
|
|
@ -1,11 +1,19 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
class Filter(base.Filter):
|
||||
"""Injects ``<meta charset=ENCODING>`` tag into head of document"""
|
||||
def __init__(self, source, encoding):
|
||||
_base.Filter.__init__(self, source)
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg encoding: the encoding to set
|
||||
|
||||
"""
|
||||
base.Filter.__init__(self, source)
|
||||
self.encoding = encoding
|
||||
|
||||
def __iter__(self):
|
||||
|
@ -13,7 +21,7 @@ class Filter(_base.Filter):
|
|||
meta_found = (self.encoding is None)
|
||||
pending = []
|
||||
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag":
|
||||
if token["name"].lower() == "head":
|
||||
|
|
|
@ -1,93 +1,93 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from gettext import gettext
|
||||
_ = gettext
|
||||
from six import text_type
|
||||
|
||||
from . import _base
|
||||
from ..constants import cdataElements, rcdataElements, voidElements
|
||||
from . import base
|
||||
from ..constants import namespaces, voidElements
|
||||
|
||||
from ..constants import spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
|
||||
class LintError(Exception):
|
||||
pass
|
||||
class Filter(base.Filter):
|
||||
"""Lints the token stream for errors
|
||||
|
||||
If it finds any errors, it'll raise an ``AssertionError``.
|
||||
|
||||
"""
|
||||
def __init__(self, source, require_matching_tags=True):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg require_matching_tags: whether or not to require matching tags
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
self.require_matching_tags = require_matching_tags
|
||||
|
||||
class Filter(_base.Filter):
|
||||
def __iter__(self):
|
||||
open_elements = []
|
||||
contentModelFlag = "PCDATA"
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty tag name"))
|
||||
if type == "StartTag" and name in voidElements:
|
||||
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
|
||||
elif type == "EmptyTag" and name not in voidElements:
|
||||
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
|
||||
if type == "StartTag":
|
||||
open_elements.append(name)
|
||||
for name, value in token["data"]:
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty attribute name"))
|
||||
if not isinstance(value, str):
|
||||
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
|
||||
if name in cdataElements:
|
||||
contentModelFlag = "CDATA"
|
||||
elif name in rcdataElements:
|
||||
contentModelFlag = "RCDATA"
|
||||
elif name == "plaintext":
|
||||
contentModelFlag = "PLAINTEXT"
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(token["data"], dict)
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert type == "EmptyTag"
|
||||
else:
|
||||
assert type == "StartTag"
|
||||
if type == "StartTag" and self.require_matching_tags:
|
||||
open_elements.append((namespace, name))
|
||||
for (namespace, name), value in token["data"].items():
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(value, text_type)
|
||||
|
||||
elif type == "EndTag":
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty tag name"))
|
||||
if name in voidElements:
|
||||
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
|
||||
start_name = open_elements.pop()
|
||||
if start_name != name:
|
||||
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
|
||||
contentModelFlag = "PCDATA"
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
|
||||
elif self.require_matching_tags:
|
||||
start = open_elements.pop()
|
||||
assert start == (namespace, name)
|
||||
|
||||
elif type == "Comment":
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("Comment not in PCDATA content model flag"))
|
||||
data = token["data"]
|
||||
assert isinstance(data, text_type)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
data = token["data"]
|
||||
if not isinstance(data, str):
|
||||
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
|
||||
if not data:
|
||||
raise LintError(_("%(type)s token with empty data") % {"type": type})
|
||||
assert isinstance(data, text_type)
|
||||
assert data != ""
|
||||
if type == "SpaceCharacters":
|
||||
data = data.strip(spaceCharacters)
|
||||
if data:
|
||||
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
|
||||
assert data.strip(spaceCharacters) == ""
|
||||
|
||||
elif type == "Doctype":
|
||||
name = token["name"]
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
# XXX: what to do with token["data"] ?
|
||||
assert name is None or isinstance(name, text_type)
|
||||
assert token["publicId"] is None or isinstance(name, text_type)
|
||||
assert token["systemId"] is None or isinstance(name, text_type)
|
||||
|
||||
elif type in ("ParseError", "SerializeError"):
|
||||
pass
|
||||
elif type == "Entity":
|
||||
assert isinstance(token["name"], text_type)
|
||||
|
||||
elif type == "SerializerError":
|
||||
assert isinstance(token["data"], text_type)
|
||||
|
||||
else:
|
||||
raise LintError(_("Unknown token type: %(type)s") % {"type": type})
|
||||
assert False, "Unknown token type: %(type)s" % {"type": type}
|
||||
|
||||
yield token
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
class Filter(base.Filter):
|
||||
"""Removes optional tags from the token stream"""
|
||||
def slider(self):
|
||||
previous1 = previous2 = None
|
||||
for token in self.source:
|
||||
|
@ -11,7 +12,8 @@ class Filter(_base.Filter):
|
|||
yield previous2, previous1, token
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
yield previous2, previous1, None
|
||||
if previous1 is not None:
|
||||
yield previous2, previous1, None
|
||||
|
||||
def __iter__(self):
|
||||
for previous, token, next in self.slider():
|
||||
|
@ -58,7 +60,7 @@ class Filter(_base.Filter):
|
|||
elif tagname == 'colgroup':
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceeded by another colgroup element whose
|
||||
# is not immediately preceded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
|
@ -70,7 +72,7 @@ class Filter(_base.Filter):
|
|||
elif tagname == 'tbody':
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||
# not immediately preceded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == "StartTag":
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
|
|
|
@ -1,12 +1,916 @@
|
|||
"""Deprecated from html5lib 1.1.
|
||||
|
||||
See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
|
||||
information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
|
||||
is recommended as a replacement. Please let us know in the aforementioned issue
|
||||
if Bleach is unsuitable for your needs.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from ..sanitizer import HTMLSanitizerMixin
|
||||
import re
|
||||
import warnings
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from six.moves import urllib_parse as urlparse
|
||||
|
||||
from . import base
|
||||
from ..constants import namespaces, prefixes
|
||||
|
||||
__all__ = ["Filter"]
|
||||
|
||||
|
||||
class Filter(_base.Filter, HTMLSanitizerMixin):
|
||||
_deprecation_msg = (
|
||||
"html5lib's sanitizer is deprecated; see " +
|
||||
"https://github.com/html5lib/html5lib-python/issues/443 and please let " +
|
||||
"us know if Bleach is unsuitable for your needs"
|
||||
)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
allowed_elements = frozenset((
|
||||
(namespaces['html'], 'a'),
|
||||
(namespaces['html'], 'abbr'),
|
||||
(namespaces['html'], 'acronym'),
|
||||
(namespaces['html'], 'address'),
|
||||
(namespaces['html'], 'area'),
|
||||
(namespaces['html'], 'article'),
|
||||
(namespaces['html'], 'aside'),
|
||||
(namespaces['html'], 'audio'),
|
||||
(namespaces['html'], 'b'),
|
||||
(namespaces['html'], 'big'),
|
||||
(namespaces['html'], 'blockquote'),
|
||||
(namespaces['html'], 'br'),
|
||||
(namespaces['html'], 'button'),
|
||||
(namespaces['html'], 'canvas'),
|
||||
(namespaces['html'], 'caption'),
|
||||
(namespaces['html'], 'center'),
|
||||
(namespaces['html'], 'cite'),
|
||||
(namespaces['html'], 'code'),
|
||||
(namespaces['html'], 'col'),
|
||||
(namespaces['html'], 'colgroup'),
|
||||
(namespaces['html'], 'command'),
|
||||
(namespaces['html'], 'datagrid'),
|
||||
(namespaces['html'], 'datalist'),
|
||||
(namespaces['html'], 'dd'),
|
||||
(namespaces['html'], 'del'),
|
||||
(namespaces['html'], 'details'),
|
||||
(namespaces['html'], 'dfn'),
|
||||
(namespaces['html'], 'dialog'),
|
||||
(namespaces['html'], 'dir'),
|
||||
(namespaces['html'], 'div'),
|
||||
(namespaces['html'], 'dl'),
|
||||
(namespaces['html'], 'dt'),
|
||||
(namespaces['html'], 'em'),
|
||||
(namespaces['html'], 'event-source'),
|
||||
(namespaces['html'], 'fieldset'),
|
||||
(namespaces['html'], 'figcaption'),
|
||||
(namespaces['html'], 'figure'),
|
||||
(namespaces['html'], 'footer'),
|
||||
(namespaces['html'], 'font'),
|
||||
(namespaces['html'], 'form'),
|
||||
(namespaces['html'], 'header'),
|
||||
(namespaces['html'], 'h1'),
|
||||
(namespaces['html'], 'h2'),
|
||||
(namespaces['html'], 'h3'),
|
||||
(namespaces['html'], 'h4'),
|
||||
(namespaces['html'], 'h5'),
|
||||
(namespaces['html'], 'h6'),
|
||||
(namespaces['html'], 'hr'),
|
||||
(namespaces['html'], 'i'),
|
||||
(namespaces['html'], 'img'),
|
||||
(namespaces['html'], 'input'),
|
||||
(namespaces['html'], 'ins'),
|
||||
(namespaces['html'], 'keygen'),
|
||||
(namespaces['html'], 'kbd'),
|
||||
(namespaces['html'], 'label'),
|
||||
(namespaces['html'], 'legend'),
|
||||
(namespaces['html'], 'li'),
|
||||
(namespaces['html'], 'm'),
|
||||
(namespaces['html'], 'map'),
|
||||
(namespaces['html'], 'menu'),
|
||||
(namespaces['html'], 'meter'),
|
||||
(namespaces['html'], 'multicol'),
|
||||
(namespaces['html'], 'nav'),
|
||||
(namespaces['html'], 'nextid'),
|
||||
(namespaces['html'], 'ol'),
|
||||
(namespaces['html'], 'output'),
|
||||
(namespaces['html'], 'optgroup'),
|
||||
(namespaces['html'], 'option'),
|
||||
(namespaces['html'], 'p'),
|
||||
(namespaces['html'], 'pre'),
|
||||
(namespaces['html'], 'progress'),
|
||||
(namespaces['html'], 'q'),
|
||||
(namespaces['html'], 's'),
|
||||
(namespaces['html'], 'samp'),
|
||||
(namespaces['html'], 'section'),
|
||||
(namespaces['html'], 'select'),
|
||||
(namespaces['html'], 'small'),
|
||||
(namespaces['html'], 'sound'),
|
||||
(namespaces['html'], 'source'),
|
||||
(namespaces['html'], 'spacer'),
|
||||
(namespaces['html'], 'span'),
|
||||
(namespaces['html'], 'strike'),
|
||||
(namespaces['html'], 'strong'),
|
||||
(namespaces['html'], 'sub'),
|
||||
(namespaces['html'], 'sup'),
|
||||
(namespaces['html'], 'table'),
|
||||
(namespaces['html'], 'tbody'),
|
||||
(namespaces['html'], 'td'),
|
||||
(namespaces['html'], 'textarea'),
|
||||
(namespaces['html'], 'time'),
|
||||
(namespaces['html'], 'tfoot'),
|
||||
(namespaces['html'], 'th'),
|
||||
(namespaces['html'], 'thead'),
|
||||
(namespaces['html'], 'tr'),
|
||||
(namespaces['html'], 'tt'),
|
||||
(namespaces['html'], 'u'),
|
||||
(namespaces['html'], 'ul'),
|
||||
(namespaces['html'], 'var'),
|
||||
(namespaces['html'], 'video'),
|
||||
(namespaces['mathml'], 'maction'),
|
||||
(namespaces['mathml'], 'math'),
|
||||
(namespaces['mathml'], 'merror'),
|
||||
(namespaces['mathml'], 'mfrac'),
|
||||
(namespaces['mathml'], 'mi'),
|
||||
(namespaces['mathml'], 'mmultiscripts'),
|
||||
(namespaces['mathml'], 'mn'),
|
||||
(namespaces['mathml'], 'mo'),
|
||||
(namespaces['mathml'], 'mover'),
|
||||
(namespaces['mathml'], 'mpadded'),
|
||||
(namespaces['mathml'], 'mphantom'),
|
||||
(namespaces['mathml'], 'mprescripts'),
|
||||
(namespaces['mathml'], 'mroot'),
|
||||
(namespaces['mathml'], 'mrow'),
|
||||
(namespaces['mathml'], 'mspace'),
|
||||
(namespaces['mathml'], 'msqrt'),
|
||||
(namespaces['mathml'], 'mstyle'),
|
||||
(namespaces['mathml'], 'msub'),
|
||||
(namespaces['mathml'], 'msubsup'),
|
||||
(namespaces['mathml'], 'msup'),
|
||||
(namespaces['mathml'], 'mtable'),
|
||||
(namespaces['mathml'], 'mtd'),
|
||||
(namespaces['mathml'], 'mtext'),
|
||||
(namespaces['mathml'], 'mtr'),
|
||||
(namespaces['mathml'], 'munder'),
|
||||
(namespaces['mathml'], 'munderover'),
|
||||
(namespaces['mathml'], 'none'),
|
||||
(namespaces['svg'], 'a'),
|
||||
(namespaces['svg'], 'animate'),
|
||||
(namespaces['svg'], 'animateColor'),
|
||||
(namespaces['svg'], 'animateMotion'),
|
||||
(namespaces['svg'], 'animateTransform'),
|
||||
(namespaces['svg'], 'clipPath'),
|
||||
(namespaces['svg'], 'circle'),
|
||||
(namespaces['svg'], 'defs'),
|
||||
(namespaces['svg'], 'desc'),
|
||||
(namespaces['svg'], 'ellipse'),
|
||||
(namespaces['svg'], 'font-face'),
|
||||
(namespaces['svg'], 'font-face-name'),
|
||||
(namespaces['svg'], 'font-face-src'),
|
||||
(namespaces['svg'], 'g'),
|
||||
(namespaces['svg'], 'glyph'),
|
||||
(namespaces['svg'], 'hkern'),
|
||||
(namespaces['svg'], 'linearGradient'),
|
||||
(namespaces['svg'], 'line'),
|
||||
(namespaces['svg'], 'marker'),
|
||||
(namespaces['svg'], 'metadata'),
|
||||
(namespaces['svg'], 'missing-glyph'),
|
||||
(namespaces['svg'], 'mpath'),
|
||||
(namespaces['svg'], 'path'),
|
||||
(namespaces['svg'], 'polygon'),
|
||||
(namespaces['svg'], 'polyline'),
|
||||
(namespaces['svg'], 'radialGradient'),
|
||||
(namespaces['svg'], 'rect'),
|
||||
(namespaces['svg'], 'set'),
|
||||
(namespaces['svg'], 'stop'),
|
||||
(namespaces['svg'], 'svg'),
|
||||
(namespaces['svg'], 'switch'),
|
||||
(namespaces['svg'], 'text'),
|
||||
(namespaces['svg'], 'title'),
|
||||
(namespaces['svg'], 'tspan'),
|
||||
(namespaces['svg'], 'use'),
|
||||
))
|
||||
|
||||
allowed_attributes = frozenset((
|
||||
# HTML attributes
|
||||
(None, 'abbr'),
|
||||
(None, 'accept'),
|
||||
(None, 'accept-charset'),
|
||||
(None, 'accesskey'),
|
||||
(None, 'action'),
|
||||
(None, 'align'),
|
||||
(None, 'alt'),
|
||||
(None, 'autocomplete'),
|
||||
(None, 'autofocus'),
|
||||
(None, 'axis'),
|
||||
(None, 'background'),
|
||||
(None, 'balance'),
|
||||
(None, 'bgcolor'),
|
||||
(None, 'bgproperties'),
|
||||
(None, 'border'),
|
||||
(None, 'bordercolor'),
|
||||
(None, 'bordercolordark'),
|
||||
(None, 'bordercolorlight'),
|
||||
(None, 'bottompadding'),
|
||||
(None, 'cellpadding'),
|
||||
(None, 'cellspacing'),
|
||||
(None, 'ch'),
|
||||
(None, 'challenge'),
|
||||
(None, 'char'),
|
||||
(None, 'charoff'),
|
||||
(None, 'choff'),
|
||||
(None, 'charset'),
|
||||
(None, 'checked'),
|
||||
(None, 'cite'),
|
||||
(None, 'class'),
|
||||
(None, 'clear'),
|
||||
(None, 'color'),
|
||||
(None, 'cols'),
|
||||
(None, 'colspan'),
|
||||
(None, 'compact'),
|
||||
(None, 'contenteditable'),
|
||||
(None, 'controls'),
|
||||
(None, 'coords'),
|
||||
(None, 'data'),
|
||||
(None, 'datafld'),
|
||||
(None, 'datapagesize'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'datetime'),
|
||||
(None, 'default'),
|
||||
(None, 'delay'),
|
||||
(None, 'dir'),
|
||||
(None, 'disabled'),
|
||||
(None, 'draggable'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'enctype'),
|
||||
(None, 'end'),
|
||||
(None, 'face'),
|
||||
(None, 'for'),
|
||||
(None, 'form'),
|
||||
(None, 'frame'),
|
||||
(None, 'galleryimg'),
|
||||
(None, 'gutter'),
|
||||
(None, 'headers'),
|
||||
(None, 'height'),
|
||||
(None, 'hidefocus'),
|
||||
(None, 'hidden'),
|
||||
(None, 'high'),
|
||||
(None, 'href'),
|
||||
(None, 'hreflang'),
|
||||
(None, 'hspace'),
|
||||
(None, 'icon'),
|
||||
(None, 'id'),
|
||||
(None, 'inputmode'),
|
||||
(None, 'ismap'),
|
||||
(None, 'keytype'),
|
||||
(None, 'label'),
|
||||
(None, 'leftspacing'),
|
||||
(None, 'lang'),
|
||||
(None, 'list'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'loop'),
|
||||
(None, 'loopcount'),
|
||||
(None, 'loopend'),
|
||||
(None, 'loopstart'),
|
||||
(None, 'low'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'max'),
|
||||
(None, 'maxlength'),
|
||||
(None, 'media'),
|
||||
(None, 'method'),
|
||||
(None, 'min'),
|
||||
(None, 'multiple'),
|
||||
(None, 'name'),
|
||||
(None, 'nohref'),
|
||||
(None, 'noshade'),
|
||||
(None, 'nowrap'),
|
||||
(None, 'open'),
|
||||
(None, 'optimum'),
|
||||
(None, 'pattern'),
|
||||
(None, 'ping'),
|
||||
(None, 'point-size'),
|
||||
(None, 'poster'),
|
||||
(None, 'pqg'),
|
||||
(None, 'preload'),
|
||||
(None, 'prompt'),
|
||||
(None, 'radiogroup'),
|
||||
(None, 'readonly'),
|
||||
(None, 'rel'),
|
||||
(None, 'repeat-max'),
|
||||
(None, 'repeat-min'),
|
||||
(None, 'replace'),
|
||||
(None, 'required'),
|
||||
(None, 'rev'),
|
||||
(None, 'rightspacing'),
|
||||
(None, 'rows'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rules'),
|
||||
(None, 'scope'),
|
||||
(None, 'selected'),
|
||||
(None, 'shape'),
|
||||
(None, 'size'),
|
||||
(None, 'span'),
|
||||
(None, 'src'),
|
||||
(None, 'start'),
|
||||
(None, 'step'),
|
||||
(None, 'style'),
|
||||
(None, 'summary'),
|
||||
(None, 'suppress'),
|
||||
(None, 'tabindex'),
|
||||
(None, 'target'),
|
||||
(None, 'template'),
|
||||
(None, 'title'),
|
||||
(None, 'toppadding'),
|
||||
(None, 'type'),
|
||||
(None, 'unselectable'),
|
||||
(None, 'usemap'),
|
||||
(None, 'urn'),
|
||||
(None, 'valign'),
|
||||
(None, 'value'),
|
||||
(None, 'variable'),
|
||||
(None, 'volume'),
|
||||
(None, 'vspace'),
|
||||
(None, 'vrml'),
|
||||
(None, 'width'),
|
||||
(None, 'wrap'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
# MathML attributes
|
||||
(None, 'actiontype'),
|
||||
(None, 'align'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnlines'),
|
||||
(None, 'columnspacing'),
|
||||
(None, 'columnspan'),
|
||||
(None, 'depth'),
|
||||
(None, 'display'),
|
||||
(None, 'displaystyle'),
|
||||
(None, 'equalcolumns'),
|
||||
(None, 'equalrows'),
|
||||
(None, 'fence'),
|
||||
(None, 'fontstyle'),
|
||||
(None, 'fontweight'),
|
||||
(None, 'frame'),
|
||||
(None, 'height'),
|
||||
(None, 'linethickness'),
|
||||
(None, 'lspace'),
|
||||
(None, 'mathbackground'),
|
||||
(None, 'mathcolor'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'maxsize'),
|
||||
(None, 'minsize'),
|
||||
(None, 'other'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowlines'),
|
||||
(None, 'rowspacing'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rspace'),
|
||||
(None, 'scriptlevel'),
|
||||
(None, 'selection'),
|
||||
(None, 'separator'),
|
||||
(None, 'stretchy'),
|
||||
(None, 'width'),
|
||||
(None, 'width'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
# SVG attributes
|
||||
(None, 'accent-height'),
|
||||
(None, 'accumulate'),
|
||||
(None, 'additive'),
|
||||
(None, 'alphabetic'),
|
||||
(None, 'arabic-form'),
|
||||
(None, 'ascent'),
|
||||
(None, 'attributeName'),
|
||||
(None, 'attributeType'),
|
||||
(None, 'baseProfile'),
|
||||
(None, 'bbox'),
|
||||
(None, 'begin'),
|
||||
(None, 'by'),
|
||||
(None, 'calcMode'),
|
||||
(None, 'cap-height'),
|
||||
(None, 'class'),
|
||||
(None, 'clip-path'),
|
||||
(None, 'color'),
|
||||
(None, 'color-rendering'),
|
||||
(None, 'content'),
|
||||
(None, 'cx'),
|
||||
(None, 'cy'),
|
||||
(None, 'd'),
|
||||
(None, 'dx'),
|
||||
(None, 'dy'),
|
||||
(None, 'descent'),
|
||||
(None, 'display'),
|
||||
(None, 'dur'),
|
||||
(None, 'end'),
|
||||
(None, 'fill'),
|
||||
(None, 'fill-opacity'),
|
||||
(None, 'fill-rule'),
|
||||
(None, 'font-family'),
|
||||
(None, 'font-size'),
|
||||
(None, 'font-stretch'),
|
||||
(None, 'font-style'),
|
||||
(None, 'font-variant'),
|
||||
(None, 'font-weight'),
|
||||
(None, 'from'),
|
||||
(None, 'fx'),
|
||||
(None, 'fy'),
|
||||
(None, 'g1'),
|
||||
(None, 'g2'),
|
||||
(None, 'glyph-name'),
|
||||
(None, 'gradientUnits'),
|
||||
(None, 'hanging'),
|
||||
(None, 'height'),
|
||||
(None, 'horiz-adv-x'),
|
||||
(None, 'horiz-origin-x'),
|
||||
(None, 'id'),
|
||||
(None, 'ideographic'),
|
||||
(None, 'k'),
|
||||
(None, 'keyPoints'),
|
||||
(None, 'keySplines'),
|
||||
(None, 'keyTimes'),
|
||||
(None, 'lang'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'markerHeight'),
|
||||
(None, 'markerUnits'),
|
||||
(None, 'markerWidth'),
|
||||
(None, 'mathematical'),
|
||||
(None, 'max'),
|
||||
(None, 'min'),
|
||||
(None, 'name'),
|
||||
(None, 'offset'),
|
||||
(None, 'opacity'),
|
||||
(None, 'orient'),
|
||||
(None, 'origin'),
|
||||
(None, 'overline-position'),
|
||||
(None, 'overline-thickness'),
|
||||
(None, 'panose-1'),
|
||||
(None, 'path'),
|
||||
(None, 'pathLength'),
|
||||
(None, 'points'),
|
||||
(None, 'preserveAspectRatio'),
|
||||
(None, 'r'),
|
||||
(None, 'refX'),
|
||||
(None, 'refY'),
|
||||
(None, 'repeatCount'),
|
||||
(None, 'repeatDur'),
|
||||
(None, 'requiredExtensions'),
|
||||
(None, 'requiredFeatures'),
|
||||
(None, 'restart'),
|
||||
(None, 'rotate'),
|
||||
(None, 'rx'),
|
||||
(None, 'ry'),
|
||||
(None, 'slope'),
|
||||
(None, 'stemh'),
|
||||
(None, 'stemv'),
|
||||
(None, 'stop-color'),
|
||||
(None, 'stop-opacity'),
|
||||
(None, 'strikethrough-position'),
|
||||
(None, 'strikethrough-thickness'),
|
||||
(None, 'stroke'),
|
||||
(None, 'stroke-dasharray'),
|
||||
(None, 'stroke-dashoffset'),
|
||||
(None, 'stroke-linecap'),
|
||||
(None, 'stroke-linejoin'),
|
||||
(None, 'stroke-miterlimit'),
|
||||
(None, 'stroke-opacity'),
|
||||
(None, 'stroke-width'),
|
||||
(None, 'systemLanguage'),
|
||||
(None, 'target'),
|
||||
(None, 'text-anchor'),
|
||||
(None, 'to'),
|
||||
(None, 'transform'),
|
||||
(None, 'type'),
|
||||
(None, 'u1'),
|
||||
(None, 'u2'),
|
||||
(None, 'underline-position'),
|
||||
(None, 'underline-thickness'),
|
||||
(None, 'unicode'),
|
||||
(None, 'unicode-range'),
|
||||
(None, 'units-per-em'),
|
||||
(None, 'values'),
|
||||
(None, 'version'),
|
||||
(None, 'viewBox'),
|
||||
(None, 'visibility'),
|
||||
(None, 'width'),
|
||||
(None, 'widths'),
|
||||
(None, 'x'),
|
||||
(None, 'x-height'),
|
||||
(None, 'x1'),
|
||||
(None, 'x2'),
|
||||
(namespaces['xlink'], 'actuate'),
|
||||
(namespaces['xlink'], 'arcrole'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'role'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'title'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
(namespaces['xml'], 'base'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
(namespaces['xml'], 'space'),
|
||||
(None, 'y'),
|
||||
(None, 'y1'),
|
||||
(None, 'y2'),
|
||||
(None, 'zoomAndPan'),
|
||||
))
|
||||
|
||||
attr_val_is_uri = frozenset((
|
||||
(None, 'href'),
|
||||
(None, 'src'),
|
||||
(None, 'cite'),
|
||||
(None, 'action'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'poster'),
|
||||
(None, 'background'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'ping'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xml'], 'base'),
|
||||
))
|
||||
|
||||
svg_attr_val_allows_ref = frozenset((
|
||||
(None, 'clip-path'),
|
||||
(None, 'color-profile'),
|
||||
(None, 'cursor'),
|
||||
(None, 'fill'),
|
||||
(None, 'filter'),
|
||||
(None, 'marker'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'mask'),
|
||||
(None, 'stroke'),
|
||||
))
|
||||
|
||||
svg_allow_local_href = frozenset((
|
||||
(None, 'altGlyph'),
|
||||
(None, 'animate'),
|
||||
(None, 'animateColor'),
|
||||
(None, 'animateMotion'),
|
||||
(None, 'animateTransform'),
|
||||
(None, 'cursor'),
|
||||
(None, 'feImage'),
|
||||
(None, 'filter'),
|
||||
(None, 'linearGradient'),
|
||||
(None, 'pattern'),
|
||||
(None, 'radialGradient'),
|
||||
(None, 'textpath'),
|
||||
(None, 'tref'),
|
||||
(None, 'set'),
|
||||
(None, 'use')
|
||||
))
|
||||
|
||||
allowed_css_properties = frozenset((
|
||||
'azimuth',
|
||||
'background-color',
|
||||
'border-bottom-color',
|
||||
'border-collapse',
|
||||
'border-color',
|
||||
'border-left-color',
|
||||
'border-right-color',
|
||||
'border-top-color',
|
||||
'clear',
|
||||
'color',
|
||||
'cursor',
|
||||
'direction',
|
||||
'display',
|
||||
'elevation',
|
||||
'float',
|
||||
'font',
|
||||
'font-family',
|
||||
'font-size',
|
||||
'font-style',
|
||||
'font-variant',
|
||||
'font-weight',
|
||||
'height',
|
||||
'letter-spacing',
|
||||
'line-height',
|
||||
'overflow',
|
||||
'pause',
|
||||
'pause-after',
|
||||
'pause-before',
|
||||
'pitch',
|
||||
'pitch-range',
|
||||
'richness',
|
||||
'speak',
|
||||
'speak-header',
|
||||
'speak-numeral',
|
||||
'speak-punctuation',
|
||||
'speech-rate',
|
||||
'stress',
|
||||
'text-align',
|
||||
'text-decoration',
|
||||
'text-indent',
|
||||
'unicode-bidi',
|
||||
'vertical-align',
|
||||
'voice-family',
|
||||
'volume',
|
||||
'white-space',
|
||||
'width',
|
||||
))
|
||||
|
||||
allowed_css_keywords = frozenset((
|
||||
'auto',
|
||||
'aqua',
|
||||
'black',
|
||||
'block',
|
||||
'blue',
|
||||
'bold',
|
||||
'both',
|
||||
'bottom',
|
||||
'brown',
|
||||
'center',
|
||||
'collapse',
|
||||
'dashed',
|
||||
'dotted',
|
||||
'fuchsia',
|
||||
'gray',
|
||||
'green',
|
||||
'!important',
|
||||
'italic',
|
||||
'left',
|
||||
'lime',
|
||||
'maroon',
|
||||
'medium',
|
||||
'none',
|
||||
'navy',
|
||||
'normal',
|
||||
'nowrap',
|
||||
'olive',
|
||||
'pointer',
|
||||
'purple',
|
||||
'red',
|
||||
'right',
|
||||
'solid',
|
||||
'silver',
|
||||
'teal',
|
||||
'top',
|
||||
'transparent',
|
||||
'underline',
|
||||
'white',
|
||||
'yellow',
|
||||
))
|
||||
|
||||
allowed_svg_properties = frozenset((
|
||||
'fill',
|
||||
'fill-opacity',
|
||||
'fill-rule',
|
||||
'stroke',
|
||||
'stroke-width',
|
||||
'stroke-linecap',
|
||||
'stroke-linejoin',
|
||||
'stroke-opacity',
|
||||
))
|
||||
|
||||
allowed_protocols = frozenset((
|
||||
'ed2k',
|
||||
'ftp',
|
||||
'http',
|
||||
'https',
|
||||
'irc',
|
||||
'mailto',
|
||||
'news',
|
||||
'gopher',
|
||||
'nntp',
|
||||
'telnet',
|
||||
'webcal',
|
||||
'xmpp',
|
||||
'callto',
|
||||
'feed',
|
||||
'urn',
|
||||
'aim',
|
||||
'rsync',
|
||||
'tag',
|
||||
'ssh',
|
||||
'sftp',
|
||||
'rtsp',
|
||||
'afs',
|
||||
'data',
|
||||
))
|
||||
|
||||
allowed_content_types = frozenset((
|
||||
'image/png',
|
||||
'image/jpeg',
|
||||
'image/gif',
|
||||
'image/webp',
|
||||
'image/bmp',
|
||||
'text/plain',
|
||||
))
|
||||
|
||||
|
||||
data_content_type = re.compile(r'''
|
||||
^
|
||||
# Match a content type <application>/<type>
|
||||
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
|
||||
# Match any character set and encoding
|
||||
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|
||||
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
|
||||
# Assume the rest is data
|
||||
,.*
|
||||
$
|
||||
''',
|
||||
re.VERBOSE)
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
|
||||
def __init__(self,
|
||||
source,
|
||||
allowed_elements=allowed_elements,
|
||||
allowed_attributes=allowed_attributes,
|
||||
allowed_css_properties=allowed_css_properties,
|
||||
allowed_css_keywords=allowed_css_keywords,
|
||||
allowed_svg_properties=allowed_svg_properties,
|
||||
allowed_protocols=allowed_protocols,
|
||||
allowed_content_types=allowed_content_types,
|
||||
attr_val_is_uri=attr_val_is_uri,
|
||||
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
|
||||
svg_allow_local_href=svg_allow_local_href):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg allowed_elements: set of elements to allow--everything else will
|
||||
be escaped
|
||||
|
||||
:arg allowed_attributes: set of attributes to allow in
|
||||
elements--everything else will be stripped
|
||||
|
||||
:arg allowed_css_properties: set of CSS properties to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_css_keywords: set of CSS keywords to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_svg_properties: set of SVG properties to allow--everything
|
||||
else will be removed
|
||||
|
||||
:arg allowed_protocols: set of allowed protocols for URIs
|
||||
|
||||
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
|
||||
|
||||
:arg attr_val_is_uri: set of attributes that have URI values--values
|
||||
that have a scheme not listed in ``allowed_protocols`` are removed
|
||||
|
||||
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
||||
references
|
||||
|
||||
:arg svg_allow_local_href: set of SVG elements that can have local
|
||||
hrefs--these are removed
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
self.allowed_elements = allowed_elements
|
||||
self.allowed_attributes = allowed_attributes
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
self.allowed_css_keywords = allowed_css_keywords
|
||||
self.allowed_svg_properties = allowed_svg_properties
|
||||
self.allowed_protocols = allowed_protocols
|
||||
self.allowed_content_types = allowed_content_types
|
||||
self.attr_val_is_uri = attr_val_is_uri
|
||||
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
||||
self.svg_allow_local_href = svg_allow_local_href
|
||||
|
||||
def __iter__(self):
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
token = self.sanitize_token(token)
|
||||
if token:
|
||||
yield token
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
|
||||
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
|
||||
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
|
||||
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
|
||||
# allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def sanitize_token(self, token):
|
||||
|
||||
# accommodate filters which use token_type differently
|
||||
token_type = token["type"]
|
||||
if token_type in ("StartTag", "EndTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
namespace = token["namespace"]
|
||||
if ((namespace, name) in self.allowed_elements or
|
||||
(namespace is None and
|
||||
(namespaces["html"], name) in self.allowed_elements)):
|
||||
return self.allowed_token(token)
|
||||
else:
|
||||
return self.disallowed_token(token)
|
||||
elif token_type == "Comment":
|
||||
pass
|
||||
else:
|
||||
return token
|
||||
|
||||
def allowed_token(self, token):
|
||||
if "data" in token:
|
||||
attrs = token["data"]
|
||||
attr_names = set(attrs.keys())
|
||||
|
||||
# Remove forbidden attributes
|
||||
for to_remove in (attr_names - self.allowed_attributes):
|
||||
del token["data"][to_remove]
|
||||
attr_names.remove(to_remove)
|
||||
|
||||
# Remove attributes with disallowed URL values
|
||||
for attr in (attr_names & self.attr_val_is_uri):
|
||||
assert attr in attrs
|
||||
# I don't have a clue where this regexp comes from or why it matches those
|
||||
# characters, nor why we call unescape. I just know it's always been here.
|
||||
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
|
||||
# this will do is remove *more* than it otherwise would.
|
||||
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
|
||||
unescape(attrs[attr])).lower()
|
||||
# remove replacement characters from unescaped characters
|
||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||
try:
|
||||
uri = urlparse.urlparse(val_unescaped)
|
||||
except ValueError:
|
||||
uri = None
|
||||
del attrs[attr]
|
||||
if uri and uri.scheme:
|
||||
if uri.scheme not in self.allowed_protocols:
|
||||
del attrs[attr]
|
||||
if uri.scheme == 'data':
|
||||
m = data_content_type.match(uri.path)
|
||||
if not m:
|
||||
del attrs[attr]
|
||||
elif m.group('content_type') not in self.allowed_content_types:
|
||||
del attrs[attr]
|
||||
|
||||
for attr in self.svg_attr_val_allows_ref:
|
||||
if attr in attrs:
|
||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||
' ',
|
||||
unescape(attrs[attr]))
|
||||
if (token["name"] in self.svg_allow_local_href and
|
||||
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
|
||||
attrs[(namespaces['xlink'], 'href')])):
|
||||
del attrs[(namespaces['xlink'], 'href')]
|
||||
if (None, 'style') in attrs:
|
||||
attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
|
||||
token["data"] = attrs
|
||||
return token
|
||||
|
||||
def disallowed_token(self, token):
|
||||
token_type = token["type"]
|
||||
if token_type == "EndTag":
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
elif token["data"]:
|
||||
assert token_type in ("StartTag", "EmptyTag")
|
||||
attrs = []
|
||||
for (ns, name), v in token["data"].items():
|
||||
attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
|
||||
token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
|
||||
token["type"] = "Characters"
|
||||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
# disallow urls
|
||||
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||
return ''
|
||||
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ''
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||
'padding']:
|
||||
for keyword in value.split():
|
||||
if keyword not in self.allowed_css_keywords and \
|
||||
not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
|
||||
break
|
||||
else:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
|
||||
return ' '.join(clean)
|
||||
|
|
|
@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
|
|||
|
||||
import re
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
from ..constants import rcdataElements, spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Collapses whitespace except in pre, textarea, and script elements"""
|
||||
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
|
||||
|
||||
def __iter__(self):
|
||||
preserve = 0
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag" \
|
||||
and (preserve or token["name"] in self.spacePreserveElements):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue