mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-06 05:01:14 -07:00
Update html5lib-1.1
This commit is contained in:
parent
3a116486e7
commit
586fd15464
142 changed files with 90234 additions and 2393 deletions
|
@ -1,56 +1,68 @@
|
|||
"""A collection of modules for building different kinds of tree from
|
||||
HTML documents.
|
||||
"""A collection of modules for building different kinds of trees from HTML
|
||||
documents.
|
||||
|
||||
To create a treebuilder for a new type of tree, you need to do
|
||||
implement several things:
|
||||
|
||||
1) A set of classes for various types of elements: Document, Doctype,
|
||||
Comment, Element. These must implement the interface of
|
||||
_base.treebuilders.Node (although comment nodes have a different
|
||||
signature for their constructor, see treebuilders.etree.Comment)
|
||||
Textual content may also be implemented as another node type, or not, as
|
||||
your tree implementation requires.
|
||||
1. A set of classes for various types of elements: Document, Doctype, Comment,
|
||||
Element. These must implement the interface of ``base.treebuilders.Node``
|
||||
(although comment nodes have a different signature for their constructor,
|
||||
see ``treebuilders.etree.Comment``) Textual content may also be implemented
|
||||
as another node type, or not, as your tree implementation requires.
|
||||
|
||||
2) A treebuilder object (called TreeBuilder by convention) that
|
||||
inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
|
||||
documentClass - the class to use for the bottommost node of a document
|
||||
elementClass - the class to use for HTML Elements
|
||||
commentClass - the class to use for comments
|
||||
doctypeClass - the class to use for doctypes
|
||||
It also has one required method:
|
||||
getDocument - Returns the root node of the complete document tree
|
||||
2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
|
||||
from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
|
||||
|
||||
* ``documentClass`` - the class to use for the bottommost node of a document
|
||||
* ``elementClass`` - the class to use for HTML Elements
|
||||
* ``commentClass`` - the class to use for comments
|
||||
* ``doctypeClass`` - the class to use for doctypes
|
||||
|
||||
It also has one required method:
|
||||
|
||||
* ``getDocument`` - Returns the root node of the complete document tree
|
||||
|
||||
3. If you wish to run the unit tests, you must also create a ``testSerializer``
|
||||
method on your treebuilder which accepts a node and returns a string
|
||||
containing Node and its children serialized according to the format used in
|
||||
the unittests
|
||||
|
||||
3) If you wish to run the unit tests, you must also create a
|
||||
testSerializer method on your treebuilder which accepts a node and
|
||||
returns a string containing Node and its children serialized according
|
||||
to the format used in the unittests
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from ..utils import default_etree
|
||||
from .._utils import default_etree
|
||||
|
||||
treeBuilderCache = {}
|
||||
|
||||
|
||||
def getTreeBuilder(treeType, implementation=None, **kwargs):
|
||||
"""Get a TreeBuilder class for various types of tree with built-in support
|
||||
"""Get a TreeBuilder class for various types of trees with built-in support
|
||||
|
||||
treeType - the name of the tree type required (case-insensitive). Supported
|
||||
values are:
|
||||
:arg treeType: the name of the tree type required (case-insensitive). Supported
|
||||
values are:
|
||||
|
||||
"dom" - A generic builder for DOM implementations, defaulting to
|
||||
a xml.dom.minidom based implementation.
|
||||
"etree" - A generic builder for tree implementations exposing an
|
||||
ElementTree-like interface, defaulting to
|
||||
xml.etree.cElementTree if available and
|
||||
xml.etree.ElementTree if not.
|
||||
"lxml" - A etree-based builder for lxml.etree, handling
|
||||
limitations of lxml's implementation.
|
||||
* "dom" - A generic builder for DOM implementations, defaulting to a
|
||||
xml.dom.minidom based implementation.
|
||||
* "etree" - A generic builder for tree implementations exposing an
|
||||
ElementTree-like interface, defaulting to xml.etree.cElementTree if
|
||||
available and xml.etree.ElementTree if not.
|
||||
* "lxml" - A etree-based builder for lxml.etree, handling limitations
|
||||
of lxml's implementation.
|
||||
|
||||
implementation - (Currently applies to the "etree" and "dom" tree types). A
|
||||
module implementing the tree type e.g.
|
||||
xml.etree.ElementTree or xml.etree.cElementTree."""
|
||||
:arg implementation: (Currently applies to the "etree" and "dom" tree
|
||||
types). A module implementing the tree type e.g. xml.etree.ElementTree
|
||||
or xml.etree.cElementTree.
|
||||
|
||||
:arg kwargs: Any additional options to pass to the TreeBuilder when
|
||||
creating it.
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib.treebuilders import getTreeBuilder
|
||||
>>> builder = getTreeBuilder('etree')
|
||||
|
||||
"""
|
||||
|
||||
treeType = treeType.lower()
|
||||
if treeType not in treeBuilderCache:
|
||||
|
|
|
@ -10,9 +10,9 @@ Marker = None
|
|||
|
||||
listElementsMap = {
|
||||
None: (frozenset(scopingElements), False),
|
||||
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
|
||||
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
|
||||
(namespaces["html"], "ul")])), False),
|
||||
"button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
|
||||
"list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
|
||||
(namespaces["html"], "ul")}), False),
|
||||
"table": (frozenset([(namespaces["html"], "html"),
|
||||
(namespaces["html"], "table")]), False),
|
||||
"select": (frozenset([(namespaces["html"], "optgroup"),
|
||||
|
@ -21,22 +21,25 @@ listElementsMap = {
|
|||
|
||||
|
||||
class Node(object):
|
||||
"""Represents an item in the tree"""
|
||||
def __init__(self, name):
|
||||
"""Node representing an item in the tree.
|
||||
name - The tag name associated with the node
|
||||
parent - The parent of the current node (or None for the document node)
|
||||
value - The value of the current node (applies to text nodes and
|
||||
comments
|
||||
attributes - a dict holding name, value pairs for attributes of the node
|
||||
childNodes - a list of child nodes of the current node. This must
|
||||
include all elements but not necessarily other node types
|
||||
_flags - A list of miscellaneous flags that can be set on the node
|
||||
"""Creates a Node
|
||||
|
||||
:arg name: The tag name associated with the node
|
||||
|
||||
"""
|
||||
# The tag name associated with the node
|
||||
self.name = name
|
||||
# The parent of the current node (or None for the document node)
|
||||
self.parent = None
|
||||
# The value of the current node (applies to text nodes and comments)
|
||||
self.value = None
|
||||
# A dict holding name -> value pairs for attributes of the node
|
||||
self.attributes = {}
|
||||
# A list of child nodes of the current node. This must include all
|
||||
# elements but not necessarily other node types.
|
||||
self.childNodes = []
|
||||
# A list of miscellaneous flags that can be set on the node.
|
||||
self._flags = []
|
||||
|
||||
def __str__(self):
|
||||
|
@ -53,23 +56,41 @@ class Node(object):
|
|||
|
||||
def appendChild(self, node):
|
||||
"""Insert node as a child of the current node
|
||||
|
||||
:arg node: the node to insert
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
"""Insert data as text in the current node, positioned before the
|
||||
start of node insertBefore or to the end of the node's text.
|
||||
|
||||
:arg data: the data to insert
|
||||
|
||||
:arg insertBefore: True if you want to insert the text before the node
|
||||
and False if you want to insert it after the node
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
"""Insert node as a child of the current node, before refNode in the
|
||||
list of child nodes. Raises ValueError if refNode is not a child of
|
||||
the current node"""
|
||||
the current node
|
||||
|
||||
:arg node: the node to insert
|
||||
|
||||
:arg refNode: the child node to insert the node before
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def removeChild(self, node):
|
||||
"""Remove node from the children of the current node
|
||||
|
||||
:arg node: the child node to remove
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
@ -77,6 +98,9 @@ class Node(object):
|
|||
"""Move all the children of the current node to newParent.
|
||||
This is needed so that trees that don't store text as nodes move the
|
||||
text in the correct way
|
||||
|
||||
:arg newParent: the node to move all this node's children to
|
||||
|
||||
"""
|
||||
# XXX - should this method be made more general?
|
||||
for child in self.childNodes:
|
||||
|
@ -121,11 +145,14 @@ class ActiveFormattingElements(list):
|
|||
|
||||
class TreeBuilder(object):
|
||||
"""Base treebuilder implementation
|
||||
documentClass - the class to use for the bottommost node of a document
|
||||
elementClass - the class to use for HTML Elements
|
||||
commentClass - the class to use for comments
|
||||
doctypeClass - the class to use for doctypes
|
||||
|
||||
* documentClass - the class to use for the bottommost node of a document
|
||||
* elementClass - the class to use for HTML Elements
|
||||
* commentClass - the class to use for comments
|
||||
* doctypeClass - the class to use for doctypes
|
||||
|
||||
"""
|
||||
# pylint:disable=not-callable
|
||||
|
||||
# Document class
|
||||
documentClass = None
|
||||
|
@ -143,6 +170,11 @@ class TreeBuilder(object):
|
|||
fragmentClass = None
|
||||
|
||||
def __init__(self, namespaceHTMLElements):
|
||||
"""Create a TreeBuilder
|
||||
|
||||
:arg namespaceHTMLElements: whether or not to namespace HTML elements
|
||||
|
||||
"""
|
||||
if namespaceHTMLElements:
|
||||
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
|
||||
else:
|
||||
|
@ -166,12 +198,17 @@ class TreeBuilder(object):
|
|||
# If we pass a node in we match that. if we pass a string
|
||||
# match any node with that name
|
||||
exactNode = hasattr(target, "nameTuple")
|
||||
if not exactNode:
|
||||
if isinstance(target, text_type):
|
||||
target = (namespaces["html"], target)
|
||||
assert isinstance(target, tuple)
|
||||
|
||||
listElements, invert = listElementsMap[variant]
|
||||
|
||||
for node in reversed(self.openElements):
|
||||
if (node.name == target and not exactNode or
|
||||
node == target and exactNode):
|
||||
if exactNode and node == target:
|
||||
return True
|
||||
elif not exactNode and node.nameTuple == target:
|
||||
return True
|
||||
elif (invert ^ (node.nameTuple in listElements)):
|
||||
return False
|
||||
|
@ -353,19 +390,19 @@ class TreeBuilder(object):
|
|||
def generateImpliedEndTags(self, exclude=None):
|
||||
name = self.openElements[-1].name
|
||||
# XXX td, th and tr are not actually needed
|
||||
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
|
||||
and name != exclude):
|
||||
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
|
||||
name != exclude):
|
||||
self.openElements.pop()
|
||||
# XXX This is not entirely what the specification says. We should
|
||||
# investigate it more closely.
|
||||
self.generateImpliedEndTags(exclude)
|
||||
|
||||
def getDocument(self):
|
||||
"Return the final tree"
|
||||
"""Return the final tree"""
|
||||
return self.document
|
||||
|
||||
def getFragment(self):
|
||||
"Return the final fragment"
|
||||
"""Return the final fragment"""
|
||||
# assert self.innerHTML
|
||||
fragment = self.fragmentClass()
|
||||
self.openElements[0].reparentChildren(fragment)
|
||||
|
@ -373,5 +410,8 @@ class TreeBuilder(object):
|
|||
|
||||
def testSerializer(self, node):
|
||||
"""Serialize the subtree of node in the format required by unit tests
|
||||
node - the node from which to start serializing"""
|
||||
|
||||
:arg node: the node from which to start serializing
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
|
@ -1,54 +1,65 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
except ImportError: # Python 2.7
|
||||
from collections import MutableMapping
|
||||
from xml.dom import minidom, Node
|
||||
import weakref
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
from .. import constants
|
||||
from ..constants import namespaces
|
||||
from ..utils import moduleFactoryFactory
|
||||
from .._utils import moduleFactoryFactory
|
||||
|
||||
|
||||
def getDomBuilder(DomImplementation):
|
||||
Dom = DomImplementation
|
||||
|
||||
class AttrList(object):
|
||||
class AttrList(MutableMapping):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
|
||||
def __iter__(self):
|
||||
return list(self.element.attributes.items()).__iter__()
|
||||
return iter(self.element.attributes.keys())
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
self.element.setAttribute(name, value)
|
||||
|
||||
def __len__(self):
|
||||
return len(list(self.element.attributes.items()))
|
||||
|
||||
def items(self):
|
||||
return [(item[0], item[1]) for item in
|
||||
list(self.element.attributes.items())]
|
||||
|
||||
def keys(self):
|
||||
return list(self.element.attributes.keys())
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self.element.getAttribute(name)
|
||||
|
||||
def __contains__(self, name):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return self.element.hasAttribute(name)
|
||||
attr = self.element.ownerDocument.createAttribute(name)
|
||||
attr.value = value
|
||||
self.element.attributes[name] = attr
|
||||
|
||||
class NodeBuilder(_base.Node):
|
||||
def __len__(self):
|
||||
return len(self.element.attributes)
|
||||
|
||||
def items(self):
|
||||
return list(self.element.attributes.items())
|
||||
|
||||
def values(self):
|
||||
return list(self.element.attributes.values())
|
||||
|
||||
def __getitem__(self, name):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return self.element.attributes[name].value
|
||||
|
||||
def __delitem__(self, name):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
del self.element.attributes[name]
|
||||
|
||||
class NodeBuilder(base.Node):
|
||||
def __init__(self, element):
|
||||
_base.Node.__init__(self, element.nodeName)
|
||||
base.Node.__init__(self, element.nodeName)
|
||||
self.element = element
|
||||
|
||||
namespace = property(lambda self: hasattr(self.element, "namespaceURI")
|
||||
and self.element.namespaceURI or None)
|
||||
namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
|
||||
self.element.namespaceURI or None)
|
||||
|
||||
def appendChild(self, node):
|
||||
node.parent = self
|
||||
|
@ -109,7 +120,7 @@ def getDomBuilder(DomImplementation):
|
|||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
|
||||
def documentClass(self):
|
||||
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
|
||||
return weakref.proxy(self)
|
||||
|
@ -149,16 +160,17 @@ def getDomBuilder(DomImplementation):
|
|||
return self.dom
|
||||
|
||||
def getFragment(self):
|
||||
return _base.TreeBuilder.getFragment(self).element
|
||||
return base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def insertText(self, data, parent=None):
|
||||
data = data
|
||||
if parent != self:
|
||||
_base.TreeBuilder.insertText(self, data, parent)
|
||||
base.TreeBuilder.insertText(self, data, parent)
|
||||
else:
|
||||
# HACK: allow text nodes as children of the document node
|
||||
if hasattr(self.dom, '_child_node_types'):
|
||||
if not Node.TEXT_NODE in self.dom._child_node_types:
|
||||
# pylint:disable=protected-access
|
||||
if Node.TEXT_NODE not in self.dom._child_node_types:
|
||||
self.dom._child_node_types = list(self.dom._child_node_types)
|
||||
self.dom._child_node_types.append(Node.TEXT_NODE)
|
||||
self.dom.appendChild(self.dom.createTextNode(data))
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
# pylint:disable=protected-access
|
||||
|
||||
from six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from . import _base
|
||||
from .. import ihatexml
|
||||
from copy import copy
|
||||
|
||||
from . import base
|
||||
from .. import _ihatexml
|
||||
from .. import constants
|
||||
from ..constants import namespaces
|
||||
from ..utils import moduleFactoryFactory
|
||||
from .._utils import moduleFactoryFactory
|
||||
|
||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||
|
||||
|
@ -16,7 +20,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
ElementTree = ElementTreeImplementation
|
||||
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
||||
|
||||
class Element(_base.Node):
|
||||
class Element(base.Node):
|
||||
def __init__(self, name, namespace=None):
|
||||
self._name = name
|
||||
self._namespace = namespace
|
||||
|
@ -59,16 +63,17 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
return self._element.attrib
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
# Delete existing attributes first
|
||||
# XXX - there may be a better way to do this...
|
||||
for key in list(self._element.attrib.keys()):
|
||||
del self._element.attrib[key]
|
||||
for key, value in attributes.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], key[1])
|
||||
else:
|
||||
name = key
|
||||
self._element.set(name, value)
|
||||
el_attrib = self._element.attrib
|
||||
el_attrib.clear()
|
||||
if attributes:
|
||||
# calling .items _always_ allocates, and the above truthy check is cheaper than the
|
||||
# allocation on average
|
||||
for key, value in attributes.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], key[1])
|
||||
else:
|
||||
name = key
|
||||
el_attrib[name] = value
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
|
@ -98,6 +103,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
self._childNodes.remove(node)
|
||||
self._element.remove(node._element)
|
||||
node.parent = None
|
||||
|
||||
|
@ -126,8 +132,8 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
for name, value in self.attributes.items():
|
||||
element.attributes[name] = value
|
||||
if self._element.attrib:
|
||||
element._element.attrib = copy(self._element.attrib)
|
||||
return element
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
|
@ -139,7 +145,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
if self._element.text is not None:
|
||||
newParent._element.text += self._element.text
|
||||
self._element.text = ""
|
||||
_base.Node.reparentChildren(self, newParent)
|
||||
base.Node.reparentChildren(self, newParent)
|
||||
|
||||
class Comment(Element):
|
||||
def __init__(self, data):
|
||||
|
@ -253,10 +259,10 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
|
||||
return "\n".join(rv)
|
||||
|
||||
def tostring(element):
|
||||
def tostring(element): # pylint:disable=unused-variable
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
filter = ihatexml.InfosetFilter()
|
||||
filter = _ihatexml.InfosetFilter()
|
||||
|
||||
def serializeElement(element):
|
||||
if isinstance(element, ElementTree.ElementTree):
|
||||
|
@ -307,7 +313,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
|
||||
return "".join(rv)
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = Element
|
||||
|
@ -329,7 +335,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
return self.document._element.find("html")
|
||||
|
||||
def getFragment(self):
|
||||
return _base.TreeBuilder.getFragment(self)._element
|
||||
return base.TreeBuilder.getFragment(self)._element
|
||||
|
||||
return locals()
|
||||
|
||||
|
|
|
@ -10,18 +10,25 @@ When any of these things occur, we emit a DataLossWarning
|
|||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
# pylint:disable=protected-access
|
||||
|
||||
import warnings
|
||||
import re
|
||||
import sys
|
||||
|
||||
from . import _base
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
except ImportError:
|
||||
from collections import MutableMapping
|
||||
|
||||
from . import base
|
||||
from ..constants import DataLossWarning
|
||||
from .. import constants
|
||||
from . import etree as etree_builders
|
||||
from .. import ihatexml
|
||||
from .. import _ihatexml
|
||||
|
||||
import lxml.etree as etree
|
||||
from six import PY3, binary_type
|
||||
|
||||
|
||||
fullTree = True
|
||||
|
@ -43,7 +50,11 @@ class Document(object):
|
|||
self._childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
self._elementTree.getroot().addnext(element._element)
|
||||
last = self._elementTree.getroot()
|
||||
for last in self._elementTree.getroot().itersiblings():
|
||||
pass
|
||||
|
||||
last.addnext(element._element)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
@ -53,8 +64,7 @@ class Document(object):
|
|||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
finalText = None
|
||||
infosetFilter = ihatexml.InfosetFilter()
|
||||
infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if not hasattr(element, "tag"):
|
||||
|
@ -79,7 +89,7 @@ def testSerializer(element):
|
|||
next_element = next_element.getnext()
|
||||
elif isinstance(element, str) or isinstance(element, bytes):
|
||||
# Text in a fragment
|
||||
assert isinstance(element, str) or sys.version_info.major == 2
|
||||
assert isinstance(element, str) or sys.version_info[0] == 2
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
# Fragment case
|
||||
|
@ -128,16 +138,12 @@ def testSerializer(element):
|
|||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("|%s\"%s\"" % (' ' * 2, finalText))
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
|
||||
def tostring(element):
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
finalText = None
|
||||
|
||||
def serializeElement(element):
|
||||
if not hasattr(element, "tag"):
|
||||
|
@ -173,13 +179,10 @@ def tostring(element):
|
|||
|
||||
serializeElement(element)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("%s\"" % (' ' * 2, finalText))
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
class TreeBuilder(base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = None
|
||||
|
@ -189,27 +192,40 @@ class TreeBuilder(_base.TreeBuilder):
|
|||
|
||||
def __init__(self, namespaceHTMLElements, fullTree=False):
|
||||
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
||||
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
||||
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||||
self.namespaceHTMLElements = namespaceHTMLElements
|
||||
|
||||
class Attributes(dict):
|
||||
def __init__(self, element, value={}):
|
||||
class Attributes(MutableMapping):
|
||||
def __init__(self, element):
|
||||
self._element = element
|
||||
dict.__init__(self, value)
|
||||
for key, value in self.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
self._element._element.attrib[name] = value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key, value)
|
||||
def _coerceKey(self, key):
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
self._element._element.attrib[name] = value
|
||||
return name
|
||||
|
||||
def __getitem__(self, key):
|
||||
value = self._element._element.attrib[self._coerceKey(key)]
|
||||
if not PY3 and isinstance(value, binary_type):
|
||||
value = value.decode("ascii")
|
||||
return value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._element._element.attrib[self._coerceKey(key)] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._element._element.attrib[self._coerceKey(key)]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._element._element.attrib)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._element._element.attrib)
|
||||
|
||||
def clear(self):
|
||||
return self._element._element.attrib.clear()
|
||||
|
||||
class Element(builder.Element):
|
||||
def __init__(self, name, namespace):
|
||||
|
@ -230,8 +246,10 @@ class TreeBuilder(_base.TreeBuilder):
|
|||
def _getAttributes(self):
|
||||
return self._attributes
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
self._attributes = Attributes(self, attributes)
|
||||
def _setAttributes(self, value):
|
||||
attributes = self.attributes
|
||||
attributes.clear()
|
||||
attributes.update(value)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
|
@ -239,8 +257,11 @@ class TreeBuilder(_base.TreeBuilder):
|
|||
data = infosetFilter.coerceCharacters(data)
|
||||
builder.Element.insertText(self, data, insertBefore)
|
||||
|
||||
def appendChild(self, child):
|
||||
builder.Element.appendChild(self, child)
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
if self._element.attrib:
|
||||
element._element.attrib.update(self._element.attrib)
|
||||
return element
|
||||
|
||||
class Comment(builder.Comment):
|
||||
def __init__(self, data):
|
||||
|
@ -257,12 +278,12 @@ class TreeBuilder(_base.TreeBuilder):
|
|||
data = property(_getData, _setData)
|
||||
|
||||
self.elementClass = Element
|
||||
self.commentClass = builder.Comment
|
||||
self.commentClass = Comment
|
||||
# self.fragmentClass = builder.DocumentFragment
|
||||
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
||||
base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
||||
|
||||
def reset(self):
|
||||
_base.TreeBuilder.reset(self)
|
||||
base.TreeBuilder.reset(self)
|
||||
self.insertComment = self.insertCommentInitial
|
||||
self.initial_comments = []
|
||||
self.doctype = None
|
||||
|
@ -303,19 +324,20 @@ class TreeBuilder(_base.TreeBuilder):
|
|||
self.doctype = doctype
|
||||
|
||||
def insertCommentInitial(self, data, parent=None):
|
||||
assert parent is None or parent is self.document
|
||||
assert self.document._elementTree is None
|
||||
self.initial_comments.append(data)
|
||||
|
||||
def insertCommentMain(self, data, parent=None):
|
||||
if (parent == self.document and
|
||||
self.document._elementTree.getroot()[-1].tag == comment_type):
|
||||
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
||||
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
||||
super(TreeBuilder, self).insertComment(data, parent)
|
||||
|
||||
def insertRoot(self, token):
|
||||
"""Create the document root"""
|
||||
# Because of the way libxml2 works, it doesn't seem to be possible to
|
||||
# alter information like the doctype after the tree has been parsed.
|
||||
# Therefore we need to use the built-in parser to create our iniial
|
||||
# Therefore we need to use the built-in parser to create our initial
|
||||
# tree, after which we can add elements like normal
|
||||
docStr = ""
|
||||
if self.doctype:
|
||||
|
@ -344,7 +366,8 @@ class TreeBuilder(_base.TreeBuilder):
|
|||
|
||||
# Append the initial comments:
|
||||
for comment_token in self.initial_comments:
|
||||
root.addprevious(etree.Comment(comment_token["data"]))
|
||||
comment = self.commentClass(comment_token["data"])
|
||||
root.addprevious(comment._element)
|
||||
|
||||
# Create the root document and add the ElementTree to it
|
||||
self.document = self.documentClass()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue