mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-07 21:51:14 -07:00
Update html5lib-1.1
This commit is contained in:
parent
3a116486e7
commit
586fd15464
142 changed files with 90234 additions and 2393 deletions
|
@ -2,17 +2,18 @@
|
|||
tree, generating tokens identical to those produced by the tokenizer
|
||||
module.
|
||||
|
||||
To create a tree walker for a new type of tree, you need to do
|
||||
To create a tree walker for a new type of tree, you need to
|
||||
implement a tree walker object (called TreeWalker by convention) that
|
||||
implements a 'serialize' method taking a tree as sole argument and
|
||||
returning an iterator generating tokens.
|
||||
implements a 'serialize' method which takes a tree as sole argument and
|
||||
returns an iterator which generates tokens.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import sys
|
||||
from .. import constants
|
||||
from .._utils import default_etree
|
||||
|
||||
from ..utils import default_etree
|
||||
__all__ = ["getTreeWalker", "pprint"]
|
||||
|
||||
treeWalkerCache = {}
|
||||
|
||||
|
@ -20,34 +21,38 @@ treeWalkerCache = {}
|
|||
def getTreeWalker(treeType, implementation=None, **kwargs):
|
||||
"""Get a TreeWalker class for various types of tree with built-in support
|
||||
|
||||
treeType - the name of the tree type required (case-insensitive). Supported
|
||||
values are:
|
||||
:arg str treeType: the name of the tree type required (case-insensitive).
|
||||
Supported values are:
|
||||
|
||||
"dom" - The xml.dom.minidom DOM implementation
|
||||
"pulldom" - The xml.dom.pulldom event stream
|
||||
"etree" - A generic walker for tree implementations exposing an
|
||||
elementtree-like interface (known to work with
|
||||
ElementTree, cElementTree and lxml.etree).
|
||||
"lxml" - Optimized walker for lxml.etree
|
||||
"genshi" - a Genshi stream
|
||||
* "dom": The xml.dom.minidom DOM implementation
|
||||
* "etree": A generic walker for tree implementations exposing an
|
||||
elementtree-like interface (known to work with ElementTree,
|
||||
cElementTree and lxml.etree).
|
||||
* "lxml": Optimized walker for lxml.etree
|
||||
* "genshi": a Genshi stream
|
||||
|
||||
implementation - (Currently applies to the "etree" tree type only). A module
|
||||
implementing the tree type e.g. xml.etree.ElementTree or
|
||||
cElementTree."""
|
||||
:arg implementation: A module implementing the tree type e.g.
|
||||
xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
|
||||
tree type only).
|
||||
|
||||
:arg kwargs: keyword arguments passed to the etree walker--for other
|
||||
walkers, this has no effect
|
||||
|
||||
:returns: a TreeWalker class
|
||||
|
||||
"""
|
||||
|
||||
treeType = treeType.lower()
|
||||
if treeType not in treeWalkerCache:
|
||||
if treeType in ("dom", "pulldom"):
|
||||
name = "%s.%s" % (__name__, treeType)
|
||||
__import__(name)
|
||||
mod = sys.modules[name]
|
||||
treeWalkerCache[treeType] = mod.TreeWalker
|
||||
if treeType == "dom":
|
||||
from . import dom
|
||||
treeWalkerCache[treeType] = dom.TreeWalker
|
||||
elif treeType == "genshi":
|
||||
from . import genshistream
|
||||
treeWalkerCache[treeType] = genshistream.TreeWalker
|
||||
from . import genshi
|
||||
treeWalkerCache[treeType] = genshi.TreeWalker
|
||||
elif treeType == "lxml":
|
||||
from . import lxmletree
|
||||
treeWalkerCache[treeType] = lxmletree.TreeWalker
|
||||
from . import etree_lxml
|
||||
treeWalkerCache[treeType] = etree_lxml.TreeWalker
|
||||
elif treeType == "etree":
|
||||
from . import etree
|
||||
if implementation is None:
|
||||
|
@ -55,3 +60,95 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
|
|||
# XXX: NEVER cache here, caching is done in the etree submodule
|
||||
return etree.getETreeModule(implementation, **kwargs).TreeWalker
|
||||
return treeWalkerCache.get(treeType)
|
||||
|
||||
|
||||
def concatenateCharacterTokens(tokens):
|
||||
pendingCharacters = []
|
||||
for token in tokens:
|
||||
type = token["type"]
|
||||
if type in ("Characters", "SpaceCharacters"):
|
||||
pendingCharacters.append(token["data"])
|
||||
else:
|
||||
if pendingCharacters:
|
||||
yield {"type": "Characters", "data": "".join(pendingCharacters)}
|
||||
pendingCharacters = []
|
||||
yield token
|
||||
if pendingCharacters:
|
||||
yield {"type": "Characters", "data": "".join(pendingCharacters)}
|
||||
|
||||
|
||||
def pprint(walker):
|
||||
"""Pretty printer for tree walkers
|
||||
|
||||
Takes a TreeWalker instance and pretty prints the output of walking the tree.
|
||||
|
||||
:arg walker: a TreeWalker instance
|
||||
|
||||
"""
|
||||
output = []
|
||||
indent = 0
|
||||
for token in concatenateCharacterTokens(walker):
|
||||
type = token["type"]
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
# tag name
|
||||
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
|
||||
if token["namespace"] in constants.prefixes:
|
||||
ns = constants.prefixes[token["namespace"]]
|
||||
else:
|
||||
ns = token["namespace"]
|
||||
name = "%s %s" % (ns, token["name"])
|
||||
else:
|
||||
name = token["name"]
|
||||
output.append("%s<%s>" % (" " * indent, name))
|
||||
indent += 2
|
||||
# attributes (sorted for consistent ordering)
|
||||
attrs = token["data"]
|
||||
for (namespace, localname), value in sorted(attrs.items()):
|
||||
if namespace:
|
||||
if namespace in constants.prefixes:
|
||||
ns = constants.prefixes[namespace]
|
||||
else:
|
||||
ns = namespace
|
||||
name = "%s %s" % (ns, localname)
|
||||
else:
|
||||
name = localname
|
||||
output.append("%s%s=\"%s\"" % (" " * indent, name, value))
|
||||
# self-closing
|
||||
if type == "EmptyTag":
|
||||
indent -= 2
|
||||
|
||||
elif type == "EndTag":
|
||||
indent -= 2
|
||||
|
||||
elif type == "Comment":
|
||||
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
|
||||
|
||||
elif type == "Doctype":
|
||||
if token["name"]:
|
||||
if token["publicId"]:
|
||||
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(" " * indent,
|
||||
token["name"],
|
||||
token["publicId"],
|
||||
token["systemId"] if token["systemId"] else ""))
|
||||
elif token["systemId"]:
|
||||
output.append("""%s<!DOCTYPE %s "" "%s">""" %
|
||||
(" " * indent,
|
||||
token["name"],
|
||||
token["systemId"]))
|
||||
else:
|
||||
output.append("%s<!DOCTYPE %s>" % (" " * indent,
|
||||
token["name"]))
|
||||
else:
|
||||
output.append("%s<!DOCTYPE >" % (" " * indent,))
|
||||
|
||||
elif type == "Characters":
|
||||
output.append("%s\"%s\"" % (" " * indent, token["data"]))
|
||||
|
||||
elif type == "SpaceCharacters":
|
||||
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
|
||||
|
||||
else:
|
||||
raise ValueError("Unknown token type, %s" % type)
|
||||
|
||||
return "\n".join(output)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue