mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-06 13:11:15 -07:00
Initial Commit
This commit is contained in:
commit
88daa3fb91
1311 changed files with 256240 additions and 0 deletions
76
lib/html5lib/treebuilders/__init__.py
Normal file
76
lib/html5lib/treebuilders/__init__.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
"""A collection of modules for building different kinds of tree from
|
||||
HTML documents.
|
||||
|
||||
To create a treebuilder for a new type of tree, you need to do
|
||||
implement several things:
|
||||
|
||||
1) A set of classes for various types of elements: Document, Doctype,
|
||||
Comment, Element. These must implement the interface of
|
||||
_base.treebuilders.Node (although comment nodes have a different
|
||||
signature for their constructor, see treebuilders.etree.Comment)
|
||||
Textual content may also be implemented as another node type, or not, as
|
||||
your tree implementation requires.
|
||||
|
||||
2) A treebuilder object (called TreeBuilder by convention) that
|
||||
inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
|
||||
documentClass - the class to use for the bottommost node of a document
|
||||
elementClass - the class to use for HTML Elements
|
||||
commentClass - the class to use for comments
|
||||
doctypeClass - the class to use for doctypes
|
||||
It also has one required method:
|
||||
getDocument - Returns the root node of the complete document tree
|
||||
|
||||
3) If you wish to run the unit tests, you must also create a
|
||||
testSerializer method on your treebuilder which accepts a node and
|
||||
returns a string containing Node and its children serialized according
|
||||
to the format used in the unittests
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from ..utils import default_etree
|
||||
|
||||
treeBuilderCache = {}
|
||||
|
||||
|
||||
def getTreeBuilder(treeType, implementation=None, **kwargs):
|
||||
"""Get a TreeBuilder class for various types of tree with built-in support
|
||||
|
||||
treeType - the name of the tree type required (case-insensitive). Supported
|
||||
values are:
|
||||
|
||||
"dom" - A generic builder for DOM implementations, defaulting to
|
||||
a xml.dom.minidom based implementation.
|
||||
"etree" - A generic builder for tree implementations exposing an
|
||||
ElementTree-like interface, defaulting to
|
||||
xml.etree.cElementTree if available and
|
||||
xml.etree.ElementTree if not.
|
||||
"lxml" - A etree-based builder for lxml.etree, handling
|
||||
limitations of lxml's implementation.
|
||||
|
||||
implementation - (Currently applies to the "etree" and "dom" tree types). A
|
||||
module implementing the tree type e.g.
|
||||
xml.etree.ElementTree or xml.etree.cElementTree."""
|
||||
|
||||
treeType = treeType.lower()
|
||||
if treeType not in treeBuilderCache:
|
||||
if treeType == "dom":
|
||||
from . import dom
|
||||
# Come up with a sane default (pref. from the stdlib)
|
||||
if implementation is None:
|
||||
from xml.dom import minidom
|
||||
implementation = minidom
|
||||
# NEVER cache here, caching is done in the dom submodule
|
||||
return dom.getDomModule(implementation, **kwargs).TreeBuilder
|
||||
elif treeType == "lxml":
|
||||
from . import etree_lxml
|
||||
treeBuilderCache[treeType] = etree_lxml.TreeBuilder
|
||||
elif treeType == "etree":
|
||||
from . import etree
|
||||
if implementation is None:
|
||||
implementation = default_etree
|
||||
# NEVER cache here, caching is done in the etree submodule
|
||||
return etree.getETreeModule(implementation, **kwargs).TreeBuilder
|
||||
else:
|
||||
raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
|
||||
return treeBuilderCache.get(treeType)
|
377
lib/html5lib/treebuilders/_base.py
Normal file
377
lib/html5lib/treebuilders/_base.py
Normal file
|
@ -0,0 +1,377 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
|
||||
from ..constants import scopingElements, tableInsertModeElements, namespaces
|
||||
|
||||
# The scope markers are inserted when entering object elements,
|
||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||
# from "leaking" into tables, object elements, and marquees.
|
||||
Marker = None
|
||||
|
||||
listElementsMap = {
|
||||
None: (frozenset(scopingElements), False),
|
||||
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
|
||||
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
|
||||
(namespaces["html"], "ul")])), False),
|
||||
"table": (frozenset([(namespaces["html"], "html"),
|
||||
(namespaces["html"], "table")]), False),
|
||||
"select": (frozenset([(namespaces["html"], "optgroup"),
|
||||
(namespaces["html"], "option")]), True)
|
||||
}
|
||||
|
||||
|
||||
class Node(object):
|
||||
def __init__(self, name):
|
||||
"""Node representing an item in the tree.
|
||||
name - The tag name associated with the node
|
||||
parent - The parent of the current node (or None for the document node)
|
||||
value - The value of the current node (applies to text nodes and
|
||||
comments
|
||||
attributes - a dict holding name, value pairs for attributes of the node
|
||||
childNodes - a list of child nodes of the current node. This must
|
||||
include all elements but not necessarily other node types
|
||||
_flags - A list of miscellaneous flags that can be set on the node
|
||||
"""
|
||||
self.name = name
|
||||
self.parent = None
|
||||
self.value = None
|
||||
self.attributes = {}
|
||||
self.childNodes = []
|
||||
self._flags = []
|
||||
|
||||
def __str__(self):
|
||||
attributesStr = " ".join(["%s=\"%s\"" % (name, value)
|
||||
for name, value in
|
||||
self.attributes.items()])
|
||||
if attributesStr:
|
||||
return "<%s %s>" % (self.name, attributesStr)
|
||||
else:
|
||||
return "<%s>" % (self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s>" % (self.name)
|
||||
|
||||
def appendChild(self, node):
|
||||
"""Insert node as a child of the current node
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
"""Insert data as text in the current node, positioned before the
|
||||
start of node insertBefore or to the end of the node's text.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
"""Insert node as a child of the current node, before refNode in the
|
||||
list of child nodes. Raises ValueError if refNode is not a child of
|
||||
the current node"""
|
||||
raise NotImplementedError
|
||||
|
||||
def removeChild(self, node):
|
||||
"""Remove node from the children of the current node
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
"""Move all the children of the current node to newParent.
|
||||
This is needed so that trees that don't store text as nodes move the
|
||||
text in the correct way
|
||||
"""
|
||||
# XXX - should this method be made more general?
|
||||
for child in self.childNodes:
|
||||
newParent.appendChild(child)
|
||||
self.childNodes = []
|
||||
|
||||
def cloneNode(self):
|
||||
"""Return a shallow copy of the current node i.e. a node with the same
|
||||
name and attributes but with no parent or child nodes
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def hasContent(self):
|
||||
"""Return true if the node has children or text, false otherwise
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ActiveFormattingElements(list):
|
||||
def append(self, node):
|
||||
equalCount = 0
|
||||
if node != Marker:
|
||||
for element in self[::-1]:
|
||||
if element == Marker:
|
||||
break
|
||||
if self.nodesEqual(element, node):
|
||||
equalCount += 1
|
||||
if equalCount == 3:
|
||||
self.remove(element)
|
||||
break
|
||||
list.append(self, node)
|
||||
|
||||
def nodesEqual(self, node1, node2):
|
||||
if not node1.nameTuple == node2.nameTuple:
|
||||
return False
|
||||
|
||||
if not node1.attributes == node2.attributes:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Base treebuilder implementation
|
||||
documentClass - the class to use for the bottommost node of a document
|
||||
elementClass - the class to use for HTML Elements
|
||||
commentClass - the class to use for comments
|
||||
doctypeClass - the class to use for doctypes
|
||||
"""
|
||||
|
||||
# Document class
|
||||
documentClass = None
|
||||
|
||||
# The class to use for creating a node
|
||||
elementClass = None
|
||||
|
||||
# The class to use for creating comments
|
||||
commentClass = None
|
||||
|
||||
# The class to use for creating doctypes
|
||||
doctypeClass = None
|
||||
|
||||
# Fragment class
|
||||
fragmentClass = None
|
||||
|
||||
def __init__(self, namespaceHTMLElements):
|
||||
if namespaceHTMLElements:
|
||||
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
|
||||
else:
|
||||
self.defaultNamespace = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.openElements = []
|
||||
self.activeFormattingElements = ActiveFormattingElements()
|
||||
|
||||
# XXX - rename these to headElement, formElement
|
||||
self.headPointer = None
|
||||
self.formPointer = None
|
||||
|
||||
self.insertFromTable = False
|
||||
|
||||
self.document = self.documentClass()
|
||||
|
||||
def elementInScope(self, target, variant=None):
|
||||
|
||||
# If we pass a node in we match that. if we pass a string
|
||||
# match any node with that name
|
||||
exactNode = hasattr(target, "nameTuple")
|
||||
|
||||
listElements, invert = listElementsMap[variant]
|
||||
|
||||
for node in reversed(self.openElements):
|
||||
if (node.name == target and not exactNode or
|
||||
node == target and exactNode):
|
||||
return True
|
||||
elif (invert ^ (node.nameTuple in listElements)):
|
||||
return False
|
||||
|
||||
assert False # We should never reach this point
|
||||
|
||||
def reconstructActiveFormattingElements(self):
|
||||
# Within this algorithm the order of steps described in the
|
||||
# specification is not quite the same as the order of steps in the
|
||||
# code. It should still do the same though.
|
||||
|
||||
# Step 1: stop the algorithm when there's nothing to do.
|
||||
if not self.activeFormattingElements:
|
||||
return
|
||||
|
||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||
i = len(self.activeFormattingElements) - 1
|
||||
entry = self.activeFormattingElements[i]
|
||||
if entry == Marker or entry in self.openElements:
|
||||
return
|
||||
|
||||
# Step 6
|
||||
while entry != Marker and entry not in self.openElements:
|
||||
if i == 0:
|
||||
# This will be reset to 0 below
|
||||
i = -1
|
||||
break
|
||||
i -= 1
|
||||
# Step 5: let entry be one earlier in the list.
|
||||
entry = self.activeFormattingElements[i]
|
||||
|
||||
while True:
|
||||
# Step 7
|
||||
i += 1
|
||||
|
||||
# Step 8
|
||||
entry = self.activeFormattingElements[i]
|
||||
clone = entry.cloneNode() # Mainly to get a new copy of the attributes
|
||||
|
||||
# Step 9
|
||||
element = self.insertElement({"type": "StartTag",
|
||||
"name": clone.name,
|
||||
"namespace": clone.namespace,
|
||||
"data": clone.attributes})
|
||||
|
||||
# Step 10
|
||||
self.activeFormattingElements[i] = element
|
||||
|
||||
# Step 11
|
||||
if element == self.activeFormattingElements[-1]:
|
||||
break
|
||||
|
||||
def clearActiveFormattingElements(self):
|
||||
entry = self.activeFormattingElements.pop()
|
||||
while self.activeFormattingElements and entry != Marker:
|
||||
entry = self.activeFormattingElements.pop()
|
||||
|
||||
def elementInActiveFormattingElements(self, name):
|
||||
"""Check if an element exists between the end of the active
|
||||
formatting elements and the last marker. If it does, return it, else
|
||||
return false"""
|
||||
|
||||
for item in self.activeFormattingElements[::-1]:
|
||||
# Check for Marker first because if it's a Marker it doesn't have a
|
||||
# name attribute.
|
||||
if item == Marker:
|
||||
break
|
||||
elif item.name == name:
|
||||
return item
|
||||
return False
|
||||
|
||||
def insertRoot(self, token):
|
||||
element = self.createElement(token)
|
||||
self.openElements.append(element)
|
||||
self.document.appendChild(element)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
doctype = self.doctypeClass(name, publicId, systemId)
|
||||
self.document.appendChild(doctype)
|
||||
|
||||
def insertComment(self, token, parent=None):
|
||||
if parent is None:
|
||||
parent = self.openElements[-1]
|
||||
parent.appendChild(self.commentClass(token["data"]))
|
||||
|
||||
def createElement(self, token):
|
||||
"""Create an element but don't insert it anywhere"""
|
||||
name = token["name"]
|
||||
namespace = token.get("namespace", self.defaultNamespace)
|
||||
element = self.elementClass(name, namespace)
|
||||
element.attributes = token["data"]
|
||||
return element
|
||||
|
||||
def _getInsertFromTable(self):
|
||||
return self._insertFromTable
|
||||
|
||||
def _setInsertFromTable(self, value):
|
||||
"""Switch the function used to insert an element from the
|
||||
normal one to the misnested table one and back again"""
|
||||
self._insertFromTable = value
|
||||
if value:
|
||||
self.insertElement = self.insertElementTable
|
||||
else:
|
||||
self.insertElement = self.insertElementNormal
|
||||
|
||||
insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
|
||||
|
||||
def insertElementNormal(self, token):
|
||||
name = token["name"]
|
||||
assert isinstance(name, text_type), "Element %s not unicode" % name
|
||||
namespace = token.get("namespace", self.defaultNamespace)
|
||||
element = self.elementClass(name, namespace)
|
||||
element.attributes = token["data"]
|
||||
self.openElements[-1].appendChild(element)
|
||||
self.openElements.append(element)
|
||||
return element
|
||||
|
||||
def insertElementTable(self, token):
|
||||
"""Create an element and insert it into the tree"""
|
||||
element = self.createElement(token)
|
||||
if self.openElements[-1].name not in tableInsertModeElements:
|
||||
return self.insertElementNormal(token)
|
||||
else:
|
||||
# We should be in the InTable mode. This means we want to do
|
||||
# special magic element rearranging
|
||||
parent, insertBefore = self.getTableMisnestedNodePosition()
|
||||
if insertBefore is None:
|
||||
parent.appendChild(element)
|
||||
else:
|
||||
parent.insertBefore(element, insertBefore)
|
||||
self.openElements.append(element)
|
||||
return element
|
||||
|
||||
def insertText(self, data, parent=None):
|
||||
"""Insert text data."""
|
||||
if parent is None:
|
||||
parent = self.openElements[-1]
|
||||
|
||||
if (not self.insertFromTable or (self.insertFromTable and
|
||||
self.openElements[-1].name
|
||||
not in tableInsertModeElements)):
|
||||
parent.insertText(data)
|
||||
else:
|
||||
# We should be in the InTable mode. This means we want to do
|
||||
# special magic element rearranging
|
||||
parent, insertBefore = self.getTableMisnestedNodePosition()
|
||||
parent.insertText(data, insertBefore)
|
||||
|
||||
def getTableMisnestedNodePosition(self):
|
||||
"""Get the foster parent element, and sibling to insert before
|
||||
(or None) when inserting a misnested table node"""
|
||||
# The foster parent element is the one which comes before the most
|
||||
# recently opened table element
|
||||
# XXX - this is really inelegant
|
||||
lastTable = None
|
||||
fosterParent = None
|
||||
insertBefore = None
|
||||
for elm in self.openElements[::-1]:
|
||||
if elm.name == "table":
|
||||
lastTable = elm
|
||||
break
|
||||
if lastTable:
|
||||
# XXX - we should really check that this parent is actually a
|
||||
# node here
|
||||
if lastTable.parent:
|
||||
fosterParent = lastTable.parent
|
||||
insertBefore = lastTable
|
||||
else:
|
||||
fosterParent = self.openElements[
|
||||
self.openElements.index(lastTable) - 1]
|
||||
else:
|
||||
fosterParent = self.openElements[0]
|
||||
return fosterParent, insertBefore
|
||||
|
||||
def generateImpliedEndTags(self, exclude=None):
|
||||
name = self.openElements[-1].name
|
||||
# XXX td, th and tr are not actually needed
|
||||
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
|
||||
and name != exclude):
|
||||
self.openElements.pop()
|
||||
# XXX This is not entirely what the specification says. We should
|
||||
# investigate it more closely.
|
||||
self.generateImpliedEndTags(exclude)
|
||||
|
||||
def getDocument(self):
|
||||
"Return the final tree"
|
||||
return self.document
|
||||
|
||||
def getFragment(self):
|
||||
"Return the final fragment"
|
||||
# assert self.innerHTML
|
||||
fragment = self.fragmentClass()
|
||||
self.openElements[0].reparentChildren(fragment)
|
||||
return fragment
|
||||
|
||||
def testSerializer(self, node):
|
||||
"""Serialize the subtree of node in the format required by unit tests
|
||||
node - the node from which to start serializing"""
|
||||
raise NotImplementedError
|
227
lib/html5lib/treebuilders/dom.py
Normal file
227
lib/html5lib/treebuilders/dom.py
Normal file
|
@ -0,0 +1,227 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
from xml.dom import minidom, Node
|
||||
import weakref
|
||||
|
||||
from . import _base
|
||||
from .. import constants
|
||||
from ..constants import namespaces
|
||||
from ..utils import moduleFactoryFactory
|
||||
|
||||
|
||||
def getDomBuilder(DomImplementation):
|
||||
Dom = DomImplementation
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
|
||||
def __iter__(self):
|
||||
return list(self.element.attributes.items()).__iter__()
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
self.element.setAttribute(name, value)
|
||||
|
||||
def __len__(self):
|
||||
return len(list(self.element.attributes.items()))
|
||||
|
||||
def items(self):
|
||||
return [(item[0], item[1]) for item in
|
||||
list(self.element.attributes.items())]
|
||||
|
||||
def keys(self):
|
||||
return list(self.element.attributes.keys())
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self.element.getAttribute(name)
|
||||
|
||||
def __contains__(self, name):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return self.element.hasAttribute(name)
|
||||
|
||||
class NodeBuilder(_base.Node):
|
||||
def __init__(self, element):
|
||||
_base.Node.__init__(self, element.nodeName)
|
||||
self.element = element
|
||||
|
||||
namespace = property(lambda self: hasattr(self.element, "namespaceURI")
|
||||
and self.element.namespaceURI or None)
|
||||
|
||||
def appendChild(self, node):
|
||||
node.parent = self
|
||||
self.element.appendChild(node.element)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = self.element.ownerDocument.createTextNode(data)
|
||||
if insertBefore:
|
||||
self.element.insertBefore(text, insertBefore.element)
|
||||
else:
|
||||
self.element.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
self.element.insertBefore(node.element, refNode.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
if node.element.parentNode == self.element:
|
||||
self.element.removeChild(node.element)
|
||||
node.parent = None
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
while self.element.hasChildNodes():
|
||||
child = self.element.firstChild
|
||||
self.element.removeChild(child)
|
||||
newParent.element.appendChild(child)
|
||||
self.childNodes = []
|
||||
|
||||
def getAttributes(self):
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
if attributes:
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
if name[0] is not None:
|
||||
qualifiedName = (name[0] + ":" + name[1])
|
||||
else:
|
||||
qualifiedName = name[1]
|
||||
self.element.setAttributeNS(name[2], qualifiedName,
|
||||
value)
|
||||
else:
|
||||
self.element.setAttribute(
|
||||
name, value)
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def cloneNode(self):
|
||||
return NodeBuilder(self.element.cloneNode(False))
|
||||
|
||||
def hasContent(self):
|
||||
return self.element.hasChildNodes()
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace is None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
def documentClass(self):
|
||||
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
|
||||
return weakref.proxy(self)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
domimpl = Dom.getDOMImplementation()
|
||||
doctype = domimpl.createDocumentType(name, publicId, systemId)
|
||||
self.document.appendChild(NodeBuilder(doctype))
|
||||
if Dom == minidom:
|
||||
doctype.ownerDocument = self.dom
|
||||
|
||||
def elementClass(self, name, namespace=None):
|
||||
if namespace is None and self.defaultNamespace is None:
|
||||
node = self.dom.createElement(name)
|
||||
else:
|
||||
node = self.dom.createElementNS(namespace, name)
|
||||
|
||||
return NodeBuilder(node)
|
||||
|
||||
def commentClass(self, data):
|
||||
return NodeBuilder(self.dom.createComment(data))
|
||||
|
||||
def fragmentClass(self):
|
||||
return NodeBuilder(self.dom.createDocumentFragment())
|
||||
|
||||
def appendChild(self, node):
|
||||
self.dom.appendChild(node.element)
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.dom
|
||||
|
||||
def getFragment(self):
|
||||
return _base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def insertText(self, data, parent=None):
|
||||
data = data
|
||||
if parent != self:
|
||||
_base.TreeBuilder.insertText(self, data, parent)
|
||||
else:
|
||||
# HACK: allow text nodes as children of the document node
|
||||
if hasattr(self.dom, '_child_node_types'):
|
||||
if not Node.TEXT_NODE in self.dom._child_node_types:
|
||||
self.dom._child_node_types = list(self.dom._child_node_types)
|
||||
self.dom._child_node_types.append(Node.TEXT_NODE)
|
||||
self.dom.appendChild(self.dom.createTextNode(data))
|
||||
|
||||
implementation = DomImplementation
|
||||
name = None
|
||||
|
||||
def testSerializer(element):
|
||||
element.normalize()
|
||||
rv = []
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
|
||||
if element.name:
|
||||
if element.publicId or element.systemId:
|
||||
publicId = element.publicId or ""
|
||||
systemId = element.systemId or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, element.name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif element.nodeType == Node.DOCUMENT_NODE:
|
||||
rv.append("#document")
|
||||
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
|
||||
rv.append("#document-fragment")
|
||||
elif element.nodeType == Node.COMMENT_NODE:
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
|
||||
elif element.nodeType == Node.TEXT_NODE:
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
|
||||
else:
|
||||
if (hasattr(element, "namespaceURI") and
|
||||
element.namespaceURI is not None):
|
||||
name = "%s %s" % (constants.prefixes[element.namespaceURI],
|
||||
element.nodeName)
|
||||
else:
|
||||
name = element.nodeName
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.hasAttributes():
|
||||
attributes = []
|
||||
for i in range(len(element.attributes)):
|
||||
attr = element.attributes.item(i)
|
||||
name = attr.nodeName
|
||||
value = attr.value
|
||||
ns = attr.namespaceURI
|
||||
if ns:
|
||||
name = "%s %s" % (constants.prefixes[ns], attr.localName)
|
||||
else:
|
||||
name = attr.nodeName
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.childNodes:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
# The actual means to get a module!
|
||||
getDomModule = moduleFactoryFactory(getDomBuilder)
|
337
lib/html5lib/treebuilders/etree.py
Normal file
337
lib/html5lib/treebuilders/etree.py
Normal file
|
@ -0,0 +1,337 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from . import _base
|
||||
from .. import ihatexml
|
||||
from .. import constants
|
||||
from ..constants import namespaces
|
||||
from ..utils import moduleFactoryFactory
|
||||
|
||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||
|
||||
|
||||
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||
ElementTree = ElementTreeImplementation
|
||||
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
||||
|
||||
class Element(_base.Node):
|
||||
def __init__(self, name, namespace=None):
|
||||
self._name = name
|
||||
self._namespace = namespace
|
||||
self._element = ElementTree.Element(self._getETreeTag(name,
|
||||
namespace))
|
||||
if namespace is None:
|
||||
self.nameTuple = namespaces["html"], self._name
|
||||
else:
|
||||
self.nameTuple = self._namespace, self._name
|
||||
self.parent = None
|
||||
self._childNodes = []
|
||||
self._flags = []
|
||||
|
||||
def _getETreeTag(self, name, namespace):
|
||||
if namespace is None:
|
||||
etree_tag = name
|
||||
else:
|
||||
etree_tag = "{%s}%s" % (namespace, name)
|
||||
return etree_tag
|
||||
|
||||
def _setName(self, name):
|
||||
self._name = name
|
||||
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
||||
|
||||
def _getName(self):
|
||||
return self._name
|
||||
|
||||
name = property(_getName, _setName)
|
||||
|
||||
def _setNamespace(self, namespace):
|
||||
self._namespace = namespace
|
||||
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
||||
|
||||
def _getNamespace(self):
|
||||
return self._namespace
|
||||
|
||||
namespace = property(_getNamespace, _setNamespace)
|
||||
|
||||
def _getAttributes(self):
|
||||
return self._element.attrib
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
# Delete existing attributes first
|
||||
# XXX - there may be a better way to do this...
|
||||
for key in list(self._element.attrib.keys()):
|
||||
del self._element.attrib[key]
|
||||
for key, value in attributes.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], key[1])
|
||||
else:
|
||||
name = key
|
||||
self._element.set(name, value)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
||||
def _setChildNodes(self, value):
|
||||
del self._element[:]
|
||||
self._childNodes = []
|
||||
for element in value:
|
||||
self.insertChild(element)
|
||||
|
||||
childNodes = property(_getChildNodes, _setChildNodes)
|
||||
|
||||
def hasContent(self):
|
||||
"""Return true if the node has children or text"""
|
||||
return bool(self._element.text or len(self._element))
|
||||
|
||||
def appendChild(self, node):
|
||||
self._childNodes.append(node)
|
||||
self._element.append(node._element)
|
||||
node.parent = self
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = list(self._element).index(refNode._element)
|
||||
self._element.insert(index, node._element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
self._element.remove(node._element)
|
||||
node.parent = None
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if not(len(self._element)):
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
elif insertBefore is None:
|
||||
# Insert the text as the tail of the last child element
|
||||
if not self._element[-1].tail:
|
||||
self._element[-1].tail = ""
|
||||
self._element[-1].tail += data
|
||||
else:
|
||||
# Insert the text before the specified node
|
||||
children = list(self._element)
|
||||
index = children.index(insertBefore._element)
|
||||
if index > 0:
|
||||
if not self._element[index - 1].tail:
|
||||
self._element[index - 1].tail = ""
|
||||
self._element[index - 1].tail += data
|
||||
else:
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
for name, value in self.attributes.items():
|
||||
element.attributes[name] = value
|
||||
return element
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
if newParent.childNodes:
|
||||
newParent.childNodes[-1]._element.tail += self._element.text
|
||||
else:
|
||||
if not newParent._element.text:
|
||||
newParent._element.text = ""
|
||||
if self._element.text is not None:
|
||||
newParent._element.text += self._element.text
|
||||
self._element.text = ""
|
||||
_base.Node.reparentChildren(self, newParent)
|
||||
|
||||
class Comment(Element):
|
||||
def __init__(self, data):
|
||||
# Use the superclass constructor to set all properties on the
|
||||
# wrapper element
|
||||
self._element = ElementTree.Comment(data)
|
||||
self.parent = None
|
||||
self._childNodes = []
|
||||
self._flags = []
|
||||
|
||||
def _getData(self):
|
||||
return self._element.text
|
||||
|
||||
def _setData(self, value):
|
||||
self._element.text = value
|
||||
|
||||
data = property(_getData, _setData)
|
||||
|
||||
class DocumentType(Element):
|
||||
def __init__(self, name, publicId, systemId):
|
||||
Element.__init__(self, "<!DOCTYPE>")
|
||||
self._element.text = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
def _getPublicId(self):
|
||||
return self._element.get("publicId", "")
|
||||
|
||||
def _setPublicId(self, value):
|
||||
if value is not None:
|
||||
self._element.set("publicId", value)
|
||||
|
||||
publicId = property(_getPublicId, _setPublicId)
|
||||
|
||||
def _getSystemId(self):
|
||||
return self._element.get("systemId", "")
|
||||
|
||||
def _setSystemId(self, value):
|
||||
if value is not None:
|
||||
self._element.set("systemId", value)
|
||||
|
||||
systemId = property(_getSystemId, _setSystemId)
|
||||
|
||||
class Document(Element):
|
||||
def __init__(self):
|
||||
Element.__init__(self, "DOCUMENT_ROOT")
|
||||
|
||||
class DocumentFragment(Element):
|
||||
def __init__(self):
|
||||
Element.__init__(self, "DOCUMENT_FRAGMENT")
|
||||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if not(hasattr(element, "tag")):
|
||||
element = element.getroot()
|
||||
if element.tag == "<!DOCTYPE>":
|
||||
if element.get("publicId") or element.get("systemId"):
|
||||
publicId = element.get("publicId") or ""
|
||||
systemId = element.get("systemId") or ""
|
||||
rv.append("""<!DOCTYPE %s "%s" "%s">""" %
|
||||
(element.text, publicId, systemId))
|
||||
else:
|
||||
rv.append("<!DOCTYPE %s>" % (element.text,))
|
||||
elif element.tag == "DOCUMENT_ROOT":
|
||||
rv.append("#document")
|
||||
if element.text is not None:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||
if element.tail is not None:
|
||||
raise TypeError("Document node cannot have tail")
|
||||
if hasattr(element, "attrib") and len(element.attrib):
|
||||
raise TypeError("Document node cannot have attributes")
|
||||
elif element.tag == ElementTreeCommentType:
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
||||
else:
|
||||
assert isinstance(element.tag, text_type), \
|
||||
"Expected unicode, got %s, %s" % (type(element.tag), element.tag)
|
||||
nsmatch = tag_regexp.match(element.tag)
|
||||
|
||||
if nsmatch is None:
|
||||
name = element.tag
|
||||
else:
|
||||
ns, name = nsmatch.groups()
|
||||
prefix = constants.prefixes[ns]
|
||||
name = "%s %s" % (prefix, name)
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
|
||||
if hasattr(element, "attrib"):
|
||||
attributes = []
|
||||
for name, value in element.attrib.items():
|
||||
nsmatch = tag_regexp.match(name)
|
||||
if nsmatch is not None:
|
||||
ns, name = nsmatch.groups()
|
||||
prefix = constants.prefixes[ns]
|
||||
attr_string = "%s %s" % (prefix, name)
|
||||
else:
|
||||
attr_string = name
|
||||
attributes.append((attr_string, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||
indent += 2
|
||||
for child in element:
|
||||
serializeElement(child, indent)
|
||||
if element.tail:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
def tostring(element):
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
filter = ihatexml.InfosetFilter()
|
||||
|
||||
def serializeElement(element):
|
||||
if isinstance(element, ElementTree.ElementTree):
|
||||
element = element.getroot()
|
||||
|
||||
if element.tag == "<!DOCTYPE>":
|
||||
if element.get("publicId") or element.get("systemId"):
|
||||
publicId = element.get("publicId") or ""
|
||||
systemId = element.get("systemId") or ""
|
||||
rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
|
||||
(element.text, publicId, systemId))
|
||||
else:
|
||||
rv.append("<!DOCTYPE %s>" % (element.text,))
|
||||
elif element.tag == "DOCUMENT_ROOT":
|
||||
if element.text is not None:
|
||||
rv.append(element.text)
|
||||
if element.tail is not None:
|
||||
raise TypeError("Document node cannot have tail")
|
||||
if hasattr(element, "attrib") and len(element.attrib):
|
||||
raise TypeError("Document node cannot have attributes")
|
||||
|
||||
for child in element:
|
||||
serializeElement(child)
|
||||
|
||||
elif element.tag == ElementTreeCommentType:
|
||||
rv.append("<!--%s-->" % (element.text,))
|
||||
else:
|
||||
# This is assumed to be an ordinary element
|
||||
if not element.attrib:
|
||||
rv.append("<%s>" % (filter.fromXmlName(element.tag),))
|
||||
else:
|
||||
attr = " ".join(["%s=\"%s\"" % (
|
||||
filter.fromXmlName(name), value)
|
||||
for name, value in element.attrib.items()])
|
||||
rv.append("<%s %s>" % (element.tag, attr))
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
|
||||
for child in element:
|
||||
serializeElement(child)
|
||||
|
||||
rv.append("</%s>" % (element.tag,))
|
||||
|
||||
if element.tail:
|
||||
rv.append(element.tail)
|
||||
|
||||
serializeElement(element)
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = Element
|
||||
commentClass = Comment
|
||||
fragmentClass = DocumentFragment
|
||||
implementation = ElementTreeImplementation
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
if fullTree:
|
||||
return self.document._element
|
||||
else:
|
||||
if self.defaultNamespace is not None:
|
||||
return self.document._element.find(
|
||||
"{%s}html" % self.defaultNamespace)
|
||||
else:
|
||||
return self.document._element.find("html")
|
||||
|
||||
def getFragment(self):
|
||||
return _base.TreeBuilder.getFragment(self)._element
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
369
lib/html5lib/treebuilders/etree_lxml.py
Normal file
369
lib/html5lib/treebuilders/etree_lxml.py
Normal file
|
@ -0,0 +1,369 @@
|
|||
"""Module for supporting the lxml.etree library. The idea here is to use as much
|
||||
of the native library as possible, without using fragile hacks like custom element
|
||||
names that break between releases. The downside of this is that we cannot represent
|
||||
all possible trees; specifically the following are known to cause problems:
|
||||
|
||||
Text or comments as siblings of the root element
|
||||
Docypes with no name
|
||||
|
||||
When any of these things occur, we emit a DataLossWarning
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import warnings
|
||||
import re
|
||||
import sys
|
||||
|
||||
from . import _base
|
||||
from ..constants import DataLossWarning
|
||||
from .. import constants
|
||||
from . import etree as etree_builders
|
||||
from .. import ihatexml
|
||||
|
||||
import lxml.etree as etree
|
||||
|
||||
|
||||
fullTree = True
|
||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||
|
||||
comment_type = etree.Comment("asd").tag
|
||||
|
||||
|
||||
class DocumentType(object):
|
||||
def __init__(self, name, publicId, systemId):
|
||||
self.name = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
|
||||
class Document(object):
|
||||
def __init__(self):
|
||||
self._elementTree = None
|
||||
self._childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
self._elementTree.getroot().addnext(element._element)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
||||
childNodes = property(_getChildNodes)
|
||||
|
||||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
finalText = None
|
||||
infosetFilter = ihatexml.InfosetFilter()
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if not hasattr(element, "tag"):
|
||||
if hasattr(element, "getroot"):
|
||||
# Full tree case
|
||||
rv.append("#document")
|
||||
if element.docinfo.internalDTD:
|
||||
if not (element.docinfo.public_id or
|
||||
element.docinfo.system_url):
|
||||
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
||||
else:
|
||||
dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
|
||||
element.docinfo.root_name,
|
||||
element.docinfo.public_id,
|
||||
element.docinfo.system_url)
|
||||
rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
|
||||
next_element = element.getroot()
|
||||
while next_element.getprevious() is not None:
|
||||
next_element = next_element.getprevious()
|
||||
while next_element is not None:
|
||||
serializeElement(next_element, indent + 2)
|
||||
next_element = next_element.getnext()
|
||||
elif isinstance(element, str) or isinstance(element, bytes):
|
||||
# Text in a fragment
|
||||
assert isinstance(element, str) or sys.version_info.major == 2
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
# Fragment case
|
||||
rv.append("#document-fragment")
|
||||
for next_element in element:
|
||||
serializeElement(next_element, indent + 2)
|
||||
elif element.tag == comment_type:
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
||||
if hasattr(element, "tail") and element.tail:
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
|
||||
else:
|
||||
assert isinstance(element, etree._Element)
|
||||
nsmatch = etree_builders.tag_regexp.match(element.tag)
|
||||
if nsmatch is not None:
|
||||
ns = nsmatch.group(1)
|
||||
tag = nsmatch.group(2)
|
||||
prefix = constants.prefixes[ns]
|
||||
rv.append("|%s<%s %s>" % (' ' * indent, prefix,
|
||||
infosetFilter.fromXmlName(tag)))
|
||||
else:
|
||||
rv.append("|%s<%s>" % (' ' * indent,
|
||||
infosetFilter.fromXmlName(element.tag)))
|
||||
|
||||
if hasattr(element, "attrib"):
|
||||
attributes = []
|
||||
for name, value in element.attrib.items():
|
||||
nsmatch = tag_regexp.match(name)
|
||||
if nsmatch is not None:
|
||||
ns, name = nsmatch.groups()
|
||||
name = infosetFilter.fromXmlName(name)
|
||||
prefix = constants.prefixes[ns]
|
||||
attr_string = "%s %s" % (prefix, name)
|
||||
else:
|
||||
attr_string = infosetFilter.fromXmlName(name)
|
||||
attributes.append((attr_string, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||
indent += 2
|
||||
for child in element:
|
||||
serializeElement(child, indent)
|
||||
if hasattr(element, "tail") and element.tail:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("|%s\"%s\"" % (' ' * 2, finalText))
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
|
||||
def tostring(element):
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
finalText = None
|
||||
|
||||
def serializeElement(element):
|
||||
if not hasattr(element, "tag"):
|
||||
if element.docinfo.internalDTD:
|
||||
if element.docinfo.doctype:
|
||||
dtd_str = element.docinfo.doctype
|
||||
else:
|
||||
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
||||
rv.append(dtd_str)
|
||||
serializeElement(element.getroot())
|
||||
|
||||
elif element.tag == comment_type:
|
||||
rv.append("<!--%s-->" % (element.text,))
|
||||
|
||||
else:
|
||||
# This is assumed to be an ordinary element
|
||||
if not element.attrib:
|
||||
rv.append("<%s>" % (element.tag,))
|
||||
else:
|
||||
attr = " ".join(["%s=\"%s\"" % (name, value)
|
||||
for name, value in element.attrib.items()])
|
||||
rv.append("<%s %s>" % (element.tag, attr))
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
|
||||
for child in element:
|
||||
serializeElement(child)
|
||||
|
||||
rv.append("</%s>" % (element.tag,))
|
||||
|
||||
if hasattr(element, "tail") and element.tail:
|
||||
rv.append(element.tail)
|
||||
|
||||
serializeElement(element)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("%s\"" % (' ' * 2, finalText))
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = None
|
||||
commentClass = None
|
||||
fragmentClass = Document
|
||||
implementation = etree
|
||||
|
||||
def __init__(self, namespaceHTMLElements, fullTree=False):
|
||||
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
||||
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
||||
self.namespaceHTMLElements = namespaceHTMLElements
|
||||
|
||||
class Attributes(dict):
|
||||
def __init__(self, element, value={}):
|
||||
self._element = element
|
||||
dict.__init__(self, value)
|
||||
for key, value in self.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
self._element._element.attrib[name] = value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key, value)
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
self._element._element.attrib[name] = value
|
||||
|
||||
class Element(builder.Element):
|
||||
def __init__(self, name, namespace):
|
||||
name = infosetFilter.coerceElement(name)
|
||||
builder.Element.__init__(self, name, namespace=namespace)
|
||||
self._attributes = Attributes(self)
|
||||
|
||||
def _setName(self, name):
|
||||
self._name = infosetFilter.coerceElement(name)
|
||||
self._element.tag = self._getETreeTag(
|
||||
self._name, self._namespace)
|
||||
|
||||
def _getName(self):
|
||||
return infosetFilter.fromXmlName(self._name)
|
||||
|
||||
name = property(_getName, _setName)
|
||||
|
||||
def _getAttributes(self):
|
||||
return self._attributes
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
self._attributes = Attributes(self, attributes)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
data = infosetFilter.coerceCharacters(data)
|
||||
builder.Element.insertText(self, data, insertBefore)
|
||||
|
||||
def appendChild(self, child):
|
||||
builder.Element.appendChild(self, child)
|
||||
|
||||
class Comment(builder.Comment):
|
||||
def __init__(self, data):
|
||||
data = infosetFilter.coerceComment(data)
|
||||
builder.Comment.__init__(self, data)
|
||||
|
||||
def _setData(self, data):
|
||||
data = infosetFilter.coerceComment(data)
|
||||
self._element.text = data
|
||||
|
||||
def _getData(self):
|
||||
return self._element.text
|
||||
|
||||
data = property(_getData, _setData)
|
||||
|
||||
self.elementClass = Element
|
||||
self.commentClass = builder.Comment
|
||||
# self.fragmentClass = builder.DocumentFragment
|
||||
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
||||
|
||||
def reset(self):
|
||||
_base.TreeBuilder.reset(self)
|
||||
self.insertComment = self.insertCommentInitial
|
||||
self.initial_comments = []
|
||||
self.doctype = None
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
if fullTree:
|
||||
return self.document._elementTree
|
||||
else:
|
||||
return self.document._elementTree.getroot()
|
||||
|
||||
def getFragment(self):
|
||||
fragment = []
|
||||
element = self.openElements[0]._element
|
||||
if element.text:
|
||||
fragment.append(element.text)
|
||||
fragment.extend(list(element))
|
||||
if element.tail:
|
||||
fragment.append(element.tail)
|
||||
return fragment
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
if not name:
|
||||
warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
|
||||
self.doctype = None
|
||||
else:
|
||||
coercedName = self.infosetFilter.coerceElement(name)
|
||||
if coercedName != name:
|
||||
warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
|
||||
|
||||
doctype = self.doctypeClass(coercedName, publicId, systemId)
|
||||
self.doctype = doctype
|
||||
|
||||
def insertCommentInitial(self, data, parent=None):
|
||||
self.initial_comments.append(data)
|
||||
|
||||
def insertCommentMain(self, data, parent=None):
|
||||
if (parent == self.document and
|
||||
self.document._elementTree.getroot()[-1].tag == comment_type):
|
||||
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
||||
super(TreeBuilder, self).insertComment(data, parent)
|
||||
|
||||
def insertRoot(self, token):
|
||||
"""Create the document root"""
|
||||
# Because of the way libxml2 works, it doesn't seem to be possible to
|
||||
# alter information like the doctype after the tree has been parsed.
|
||||
# Therefore we need to use the built-in parser to create our iniial
|
||||
# tree, after which we can add elements like normal
|
||||
docStr = ""
|
||||
if self.doctype:
|
||||
assert self.doctype.name
|
||||
docStr += "<!DOCTYPE %s" % self.doctype.name
|
||||
if (self.doctype.publicId is not None or
|
||||
self.doctype.systemId is not None):
|
||||
docStr += (' PUBLIC "%s" ' %
|
||||
(self.infosetFilter.coercePubid(self.doctype.publicId or "")))
|
||||
if self.doctype.systemId:
|
||||
sysid = self.doctype.systemId
|
||||
if sysid.find("'") >= 0 and sysid.find('"') >= 0:
|
||||
warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
|
||||
sysid = sysid.replace("'", 'U00027')
|
||||
if sysid.find("'") >= 0:
|
||||
docStr += '"%s"' % sysid
|
||||
else:
|
||||
docStr += "'%s'" % sysid
|
||||
else:
|
||||
docStr += "''"
|
||||
docStr += ">"
|
||||
if self.doctype.name != token["name"]:
|
||||
warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
|
||||
docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
|
||||
root = etree.fromstring(docStr)
|
||||
|
||||
# Append the initial comments:
|
||||
for comment_token in self.initial_comments:
|
||||
root.addprevious(etree.Comment(comment_token["data"]))
|
||||
|
||||
# Create the root document and add the ElementTree to it
|
||||
self.document = self.documentClass()
|
||||
self.document._elementTree = root.getroottree()
|
||||
|
||||
# Give the root element the right name
|
||||
name = token["name"]
|
||||
namespace = token.get("namespace", self.defaultNamespace)
|
||||
if namespace is None:
|
||||
etree_tag = name
|
||||
else:
|
||||
etree_tag = "{%s}%s" % (namespace, name)
|
||||
root.tag = etree_tag
|
||||
|
||||
# Add the root element to the internal child/open data structures
|
||||
root_element = self.elementClass(name, namespace)
|
||||
root_element._element = root
|
||||
self.document._childNodes.append(root_element)
|
||||
self.openElements.append(root_element)
|
||||
|
||||
# Reset to the default insert comment function
|
||||
self.insertComment = self.insertCommentMain
|
Loading…
Add table
Add a link
Reference in a new issue