diff --git a/lib/xmltodict.py b/lib/xmltodict.py
index d6dbcd7a..ca760aa6 100644
--- a/lib/xmltodict.py
+++ b/lib/xmltodict.py
@@ -15,7 +15,12 @@ except ImportError: # pragma no cover
except ImportError:
from io import StringIO
-from collections import OrderedDict
+_dict = dict
+import platform
+if tuple(map(int, platform.python_version_tuple()[:2])) < (3, 7):
+ from collections import OrderedDict as _dict
+
+from inspect import isgenerator
try: # pragma no cover
_basestring = basestring
@@ -27,7 +32,7 @@ except NameError: # pragma no cover
_unicode = str
__author__ = 'Martin Blech'
-__version__ = '0.12.0'
+__version__ = '0.13.0'
__license__ = 'MIT'
@@ -45,11 +50,12 @@ class _DictSAXHandler(object):
force_cdata=False,
cdata_separator='',
postprocessor=None,
- dict_constructor=OrderedDict,
+ dict_constructor=_dict,
strip_whitespace=True,
namespace_separator=':',
namespaces=None,
- force_list=None):
+ force_list=None,
+ comment_key='#comment'):
self.path = []
self.stack = []
self.data = []
@@ -66,17 +72,21 @@ class _DictSAXHandler(object):
self.strip_whitespace = strip_whitespace
self.namespace_separator = namespace_separator
self.namespaces = namespaces
- self.namespace_declarations = OrderedDict()
+ self.namespace_declarations = dict_constructor()
self.force_list = force_list
+ self.comment_key = comment_key
def _build_name(self, full_name):
- if not self.namespaces:
+ if self.namespaces is None:
return full_name
i = full_name.rfind(self.namespace_separator)
if i == -1:
return full_name
namespace, name = full_name[:i], full_name[i+1:]
- short_namespace = self.namespaces.get(namespace, namespace)
+ try:
+ short_namespace = self.namespaces[namespace]
+ except KeyError:
+ short_namespace = namespace
if not short_namespace:
return name
else:
@@ -95,7 +105,7 @@ class _DictSAXHandler(object):
attrs = self._attrs_to_dict(attrs)
if attrs and self.namespace_declarations:
attrs['xmlns'] = self.namespace_declarations
- self.namespace_declarations = OrderedDict()
+ self.namespace_declarations = self.dict_constructor()
self.path.append((name, attrs or None))
if len(self.path) > self.item_depth:
self.stack.append((self.item, self.data))
@@ -126,7 +136,7 @@ class _DictSAXHandler(object):
should_continue = self.item_callback(self.path, item)
if not should_continue:
raise ParsingInterrupted()
- if len(self.stack):
+ if self.stack:
data = (None if not self.data
else self.cdata_separator.join(self.data))
item = self.item
@@ -152,6 +162,11 @@ class _DictSAXHandler(object):
else:
self.data.append(data)
+ def comments(self, data):
+ if self.strip_whitespace:
+ data = data.strip()
+ self.item = self.push_data(self.item, self.comment_key, data)
+
def push_data(self, item, key, data):
if self.postprocessor is not None:
result = self.postprocessor(self.path, key, data)
@@ -185,10 +200,10 @@ class _DictSAXHandler(object):
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
- namespace_separator=':', disable_entities=True, **kwargs):
+ namespace_separator=':', disable_entities=True, process_comments=False, **kwargs):
"""Parse the given XML input and convert it into a dictionary.
- `xml_input` can either be a `string` or a file-like object.
+ `xml_input` can either be a `string`, a file-like object, or a generator of strings.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
@@ -243,21 +258,21 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
... return key, value
>>> xmltodict.parse('12x',
... postprocessor=postprocessor)
- OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
+ {'a': {'b:int': [1, 2], 'b': 'x'}}
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('hello', expat=defusedexpat.pyexpat)
- OrderedDict([(u'a', u'hello')])
+ {'a': 'hello'}
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
- The index_keys operation takes precendence over this. This is applied
+ The index_keys operation takes precedence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
@@ -287,6 +302,36 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
+
+
+ If `process_comment` is `True` then comment will be added with comment_key
+ (default=`'#comment'`) to then tag which contains comment
+
+ For example, given this input:
+
+
+
+
+
+ 1
+
+ 2
+
+
+
+ If called with process_comment=True, it will produce
+ this dictionary:
+ 'a': {
+ 'b': {
+ '#comment': 'b comment',
+ 'c': {
+
+ '#comment': 'c comment',
+ '#text': '1',
+ },
+ 'd': '2',
+ },
+ }
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
@@ -309,6 +354,8 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
+ if process_comments:
+ parser.CommentHandler = handler.comments
parser.buffer_text = True
if disable_entities:
try:
@@ -323,6 +370,10 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, 'read'):
parser.ParseFile(xml_input)
+ elif isgenerator(xml_input):
+ for chunk in xml_input:
+ parser.Parse(chunk,False)
+ parser.Parse(b'',True)
else:
parser.Parse(xml_input, True)
return handler.item
@@ -353,7 +404,8 @@ def _emit(key, value, content_handler,
indent='\t',
namespace_separator=':',
namespaces=None,
- full_document=True):
+ full_document=True,
+ expand_iter=None):
key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
if preprocessor is not None:
result = preprocessor(key, value)
@@ -368,18 +420,21 @@ def _emit(key, value, content_handler,
if full_document and depth == 0 and index > 0:
raise ValueError('document with multiple roots')
if v is None:
- v = OrderedDict()
+ v = _dict()
elif isinstance(v, bool):
if v:
v = _unicode('true')
else:
v = _unicode('false')
elif not isinstance(v, dict):
- v = _unicode(v)
+ if expand_iter and hasattr(v, '__iter__') and not isinstance(v, _basestring):
+ v = _dict(((expand_iter, v),))
+ else:
+ v = _unicode(v)
if isinstance(v, _basestring):
- v = OrderedDict(((cdata_key, v),))
+ v = _dict(((cdata_key, v),))
cdata = None
- attrs = OrderedDict()
+ attrs = _dict()
children = []
for ik, iv in v.items():
if ik == cdata_key:
@@ -407,7 +462,8 @@ def _emit(key, value, content_handler,
_emit(child_key, child_value, content_handler,
attr_prefix, cdata_key, depth+1, preprocessor,
pretty, newl, indent, namespaces=namespaces,
- namespace_separator=namespace_separator)
+ namespace_separator=namespace_separator,
+ expand_iter=expand_iter)
if cdata is not None:
content_handler.characters(cdata)
if pretty and children:
diff --git a/requirements.txt b/requirements.txt
index 6fb9cf89..4af4f688 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -48,7 +48,7 @@ tzlocal==4.2
urllib3==1.26.9
webencodings==0.5.1
websocket-client==1.3.2
-xmltodict==0.12.0
+xmltodict==0.13.0
zipp==3.8.0
# configobj==5.1.0